In [87]:
%load_ext autoreload
%autoreload 2
import numpy as np
from xgboost import XGBClassifier
from sklearn import metrics
import seaborn as sns
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

from source.read_open_dataset import DatasetOpenDataset, openDatasetParticipants
from source.tserie import TSerie
from source.models.manifold.umap_fl import UMAP_FL

FEATURE_SIZE = 32
UMAP_NEIGHTBORS = 20
N_EPOCHS = 2000
CLASS_LABELS = ['Sedentary', 'Light']

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Reading the dataset

In [4]:
dataset = DatasetOpenDataset(mode='leave-one-subject')
dataset.loadData()

True

In [14]:
print('Unique Labels (intensities): {}'.format(np.unique(dataset.Int_train)))

print('Sedentary windows: {}  Light windows: {}'.format(len(dataset.Int_train[dataset.Int_train == 0]), len(dataset.Int_train[dataset.Int_train == 1])))

Unique Labels (intensities): [0. 1.]
Sedentary windows: 24866  Light windows: 1036


# Data preparation

In [16]:
# Preparing data
ts_train = TSerie(dataset.X_train, dataset.Int_train)
ts_test = TSerie(dataset.X_test, dataset.Int_test)

ts_train.center()
ts_test.center()

ts_train.folding_features_v2()
ts_test.folding_features_v2()

# Engenharia de features

In [58]:

# Getting features
model = UMAP_FL(n_components=FEATURE_SIZE,n_neighbors=UMAP_NEIGHTBORS, n_epochs=N_EPOCHS)
ts_train.features = model.fit_transform(ts_train.features, y=ts_train.I)
ts_test.features = model.transform(ts_test.features)

In [72]:

# Getting features
model_vis = UMAP_FL(n_components=2,n_neighbors=UMAP_NEIGHTBORS, n_epochs=N_EPOCHS)
train_proj = model_vis.fit_transform(ts_train.features, y=ts_train.I)
test_proj = model_vis.transform(ts_test.features)

# Straight-forward Classifier


In [88]:


# clf = XGBClassifier(tree_method='gpu_hist', predictor='gpu_predictor')
clf = LinearDiscriminantAnalysis(solver="svd", store_covariance=True)

clf.fit(ts_train.features, ts_train.y)


train_pr = clf.predict(ts_train.features)
test_pr = clf.predict(ts_test.features)

In [89]:
print(metrics.classification_report(ts_test.y, test_pr, target_names=CLASS_LABELS))

              precision    recall  f1-score   support

   Sedentary       0.97      1.00      0.98       888
       Light       1.00      0.19      0.32        37

    accuracy                           0.97       925
   macro avg       0.98      0.59      0.65       925
weighted avg       0.97      0.97      0.96       925



# Bayesian Risk


In [90]:
c10 = 1
c01 = 10000
p1 = np.mean(ts_train.y)
p0 = 1 - np.mean(ts_test.y)

eta = (c10*p0)/(c01*p1)


In [103]:
Lambda = clf.predict_proba (ts_test.features)

In [106]:
Lambda[0]

array([1.000e+00, 5.951e-12], dtype=float32)

In [92]:
y_pred = (Lambda>eta)*1

In [94]:
y_pred.shape



(925, 2)

In [57]:
np.unique(y_pred)

array([0, 1])

In [84]:
sns.scatterplot(test_proj[y_pred==0,0], test_proj[y_pred==0,1])
sns.scatterplot(test_proj[y_pred==1,0], test_proj[y_pred==1,1])

IndexError: too many indices for array: array is 2-dimensional, but 3 were indexed

In [76]:
test_proj.shape
y_pred.shape

(925, 2)

In [77]:
test_proj[y_pred==0,0].shape

IndexError: too many indices for array: array is 2-dimensional, but 3 were indexed

In [85]:
test_proj[y_pred==0]

array([ -5.189,  12.598,  -6.949,  -6.772,  -6.594, -17.918,  -7.663,
         7.432,   2.182,  11.751,  14.051,  14.71 ,  -8.42 , -20.777,
        12.409,  -1.128,   5.56 ,  -8.62 ,  12.728,  -2.338,  -3.815,
        -6.017,   8.511,   0.241,   5.562,  -5.274,   6.771,  11.184,
         1.821,  -7.872,  24.715,  11.929,  14.155,  -9.803, -23.734,
        18.944,   3.777,  -9.767,   6.032,  16.761,  -1.158, -16.902,
        12.37 ,   2.936,  -3.586, -16.988, -18.349,  -3.596,   7.391,
         7.159,  -7.403,  -8.566,  -0.138,   6.476,  -7.732,  -7.059,
        -7.331,  -6.986,  12.097,  -3.524,   8.043,   7.217,  -5.796,
         7.715,   1.293,  -7.194,   6.953,  -0.287,   7.523,  -2.466,
        12.839,   1.572,  13.218,  -9.679,  -0.429,   2.071,   6.838,
        -7.137,  -9.444,  -7.048,  -7.145,  11.583,  -6.204, -18.984,
        -9.594,  10.022, -19.556, -18.15 ,  14.769, -18.353,  -9.498,
        24.065,  -7.038, -18.421,   9.164,  -5.923,  11.709,  13.87 ,
        -8.578,  -2.

In [82]:
test_proj.shape

(925, 2)

In [83]:
y_pred==0

array([[False, False],
       [False,  True],
       [False,  True],
       ...,
       [False,  True],
       [False, False],
       [False,  True]])

In [86]:
Lambda.shape

(925, 2)