In [1]:
import pandas as pd
import numpy as np
import os
from utils import k_eros

In [2]:
features_1 = [
    'sx_x',
    'sx_y',
    'sx_z',
    'sx_roll',
    'sx_pitch',
    'sx_yaw',
    'sx_thumb',
    'sx_forefinger',
    'sx_middle_finger',
    'sx_ring_finger',
    'sx_little_finger',
    'dx_x',
    'dx_y',
    'dx_z',
    'dx_roll',
    'dx_pitch',
    'dx_yaw',
    'dx_thumb',
    'dx_forefinger',
    'dx_middle_finger',
    'dx_ring_finger',
    'dx_little_finger'
    ]



In [3]:
path = "./tctodd/"
#path = "../../Desktop/MML Project/tctodd/"
dirs = os.listdir(path=path)
weeks = sorted([i for i in dirs if i != ".DS_Store"])
filenames = sorted(os.listdir(path+weeks[1]))

data = []
labels = dict()
label_cnt = 0

for w in weeks:
    temp_path = path+w+"/"
    filenames = sorted(os.listdir(temp_path))
    for fn in filenames:
        label = fn.split('.')[0][:-2]
        
        if label not in labels:
            labels[label] = label_cnt
            label_cnt += 1
            
        data.append({'label':labels[label], 'time_series':pd.read_csv(temp_path+fn, header=None, sep='\t',).values})
        

In [4]:
df = pd.DataFrame(data, columns=['label', 'time_series'])
print(df['label'].value_counts())
df.head()

0     27
60    27
69    27
68    27
67    27
      ..
29    27
28    27
27    27
26    27
94    27
Name: label, Length: 95, dtype: int64


Unnamed: 0,label,time_series
0,0,"[[-0.064909, 0.034318, -0.043964, 0.626383, 0...."
1,0,"[[-0.107059, -0.126109, -0.053742, 0.612516, 0..."
2,0,"[[-0.061427, -0.082576, -0.102991, 0.735469, 0..."
3,1,"[[-0.128178, 0.02695, -0.050126, 0.455028, 0.4..."
4,1,"[[-0.143672, -0.144416, -0.047447, 0.660979, 0..."


In [None]:
from sklearn.model_selection import train_test_split
X = df['time_series']
y = df['label']
seed = 0
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    stratify=y, 
                                                    test_size=0.222222222222222, random_state=seed)
X_train, X_test = X_train.reset_index(drop=True),X_test.reset_index(drop=True)
y_train, y_test = y_train.reset_index(drop=True), y_test.reset_index(drop=True)

In [None]:
y_train.value_counts()

In [None]:
import sklearn as sk
n_folds = 5
skf = sk.model_selection.StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=seed)

## SVC

In [8]:
import sklearn as sk
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import itertools
from tqdm import tqdm

seed = 0
n_pcs = 54

##LINEAR KERNEL
params = [['linear'], [0.0001, 0.0005, 0.001, 0.005, 0.01, 1]]
params_comb = list(itertools.product(*params))

##RBF KERNEL
params = [['rbf'], [0.0001, 0.0005, 0.001, 0.005, 0.01, 1], [0.001, 0.01, 1, 2, 5, 10]]
params_comb += list(itertools.product(*params))

##POLYNOMIAL KERNEL
params = [['poly'], [1e-5, 0.0001, 0.0005, 0.001, 0.005, 0.01, 1], [0.001, 0.01, 1, 2, 5, 10], [3, 6, 10, 15, 20, 23, 25, 30], [0, 0.1, 0.5, 1, 5, 10]]
params_comb += list(itertools.product(*params))

In [9]:
acc_scores = np.zeros(len(params_comb))
for train_index, val_index in skf.split(X_train, y_train):
    X_train_cv = X_train.iloc[train_index]
    y_train_cv = y_train.iloc[train_index]
    X_val_cv = X_train.iloc[val_index]
    y_val_cv = y_train.iloc[val_index]
    X_train_matrix = np.vstack(X_train_cv)
    means_train = np.mean(X_train_matrix, axis=0)
    vars_train = np.var(X_train_matrix, axis=0)

    S, v_list_train = k_eros.compute_S_matrix(X_train_cv, means_train, vars_train)
    _, v_list_test = k_eros.compute_S_matrix(X_val_cv, means_train, vars_train)
    w = k_eros.compute_weight_vector(S, algorithm=2)
    K_eros_train_mc, V, _ = k_eros.perform_PCA(len(X_train_cv), weight_vector=w, v_list=v_list_train)
    Y, _ = k_eros.project_test_data(len(X_train_cv), len(X_val_cv), w, v_list_train, v_list_test, K_eros_train_mc, V)
    princ_components = V[:, :n_pcs]
    test_princ_components = Y[:, :n_pcs]
    for i, params in enumerate(tqdm(params_comb, desc='doing parameters search...')):
        if len(params) == 2:
            combination = tuple([params[0], params[1], 1, 3, 0])
        elif len(params) == 3:
            combination = tuple([params[0], params[1], params[2], 3, 0])
        else:
            combination = params
        svc = SVC(kernel=combination[0], C=combination[1], gamma=combination[2], degree=combination[3], coef0=combination[4])#, max_iter=1000)
        
        svc.fit(princ_components, y_train_cv.values)
        
        predictions = svc.predict(test_princ_components)
        res = accuracy_score(y_val_cv.values, predictions)
        acc_scores[i] += res
acc_scores = acc_scores / n_folds
best_idx = np.argmax(acc_scores)    
print(f'Found best combination! {params_comb[best_idx]} w. accuracy of {acc_scores[best_idx]}.')
best_comb = params_comb[best_idx]

doing parameters search...:   0%|          | 1/2016 [00:00<16:04,  2.09it/s]

Found best combination! ('poly', 1e-05, 0.001, 3, 0) w. accuracy of 0.20622568093385213.


doing parameters search...:   0%|          | 7/2016 [00:03<15:54,  2.10it/s]

Found best combination! ('poly', 1e-05, 0.001, 6, 0) w. accuracy of 0.2140077821011673.


doing parameters search...:   1%|          | 17/2016 [00:08<15:45,  2.11it/s]

Found best combination! ('poly', 1e-05, 0.001, 10, 5) w. accuracy of 0.3424124513618677.


doing parameters search...:   1%|          | 18/2016 [00:08<13:35,  2.45it/s]

Found best combination! ('poly', 1e-05, 0.001, 10, 10) w. accuracy of 0.7431906614785992.


doing parameters search...:   5%|▌         | 108/2016 [00:49<13:33,  2.34it/s]

Found best combination! ('poly', 1e-05, 1, 6, 10) w. accuracy of 0.7587548638132295.


doing parameters search...:   6%|▌         | 113/2016 [00:51<12:16,  2.58it/s]

Found best combination! ('poly', 1e-05, 1, 10, 5) w. accuracy of 0.7898832684824902.


doing parameters search...:   6%|▌         | 125/2016 [00:56<11:55,  2.64it/s]

Found best combination! ('poly', 1e-05, 1, 20, 5) w. accuracy of 0.8054474708171206.


doing parameters search...:   6%|▋         | 131/2016 [00:58<11:50,  2.65it/s]

Found best combination! ('poly', 1e-05, 1, 23, 5) w. accuracy of 0.8171206225680934.


doing parameters search...:   7%|▋         | 136/2016 [01:00<13:44,  2.28it/s]

Found best combination! ('poly', 1e-05, 1, 25, 1) w. accuracy of 0.8365758754863813.


doing parameters search...:   7%|▋         | 142/2016 [01:02<13:45,  2.27it/s]

Found best combination! ('poly', 1e-05, 1, 30, 1) w. accuracy of 0.8754863813229572.


doing parameters search...:   9%|▊         | 172/2016 [01:15<13:17,  2.31it/s]

Found best combination! ('poly', 1e-05, 2, 20, 1) w. accuracy of 0.8793774319066148.


doing parameters search...:   9%|▉         | 178/2016 [01:17<13:14,  2.31it/s]

Found best combination! ('poly', 1e-05, 2, 23, 1) w. accuracy of 0.8871595330739299.


doing parameters search...:   9%|▉         | 184/2016 [01:19<13:23,  2.28it/s]

Found best combination! ('poly', 1e-05, 2, 25, 1) w. accuracy of 0.8988326848249028.


doing parameters search...:   9%|▉         | 190/2016 [01:22<13:33,  2.25it/s]

Found best combination! ('poly', 1e-05, 2, 30, 1) w. accuracy of 0.9066147859922179.


doing parameters search...:  11%|█         | 223/2016 [01:35<11:21,  2.63it/s]

Found best combination! ('poly', 1e-05, 5, 23, 0) w. accuracy of 0.9105058365758755.


doing parameters search...:  11%|█▏        | 229/2016 [01:37<11:20,  2.63it/s]

Found best combination! ('poly', 1e-05, 5, 25, 0) w. accuracy of 0.914396887159533.


doing parameters search...:  12%|█▏        | 235/2016 [01:40<11:16,  2.63it/s]

Found best combination! ('poly', 1e-05, 5, 30, 0) w. accuracy of 0.9299610894941635.


doing parameters search...: 100%|██████████| 2016/2016 [13:18<00:00,  2.53it/s]


In [14]:
n_princ_cs = [1, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 54]
res_per_pc = np.zeros(len(n_princ_cs))

X_train_matrix = np.vstack(X_train)
means_train = np.mean(X_train_matrix, axis=0)
vars_train = np.var(X_train_matrix, axis=0)

S, v_list_train = k_eros.compute_S_matrix(X_train, means_train, vars_train)
_, v_list_test = k_eros.compute_S_matrix(X_test, means_train, vars_train)
w = k_eros.compute_weight_vector(S, algorithm=2)
K_eros_train_mc, V, eig_vals = k_eros.perform_PCA(len(X_train), weight_vector=w, v_list=v_list_train)
Y, K_eros_test_mc = k_eros.project_test_data(len(X_train), len(X_test), w, v_list_train, v_list_test, K_eros_train_mc, V)
svc = SVC(kernel=best_comb[0], C=best_comb[1], gamma=best_comb[2], degree=best_comb[3], coef0=best_comb[4])
for i, n_pc in enumerate(n_princ_cs):
    princ_components = V[:, :n_pc]
    svc.fit(princ_components, y_train.values)
    test_princ_components = Y[:, :n_pc]
    predictions = svc.predict(test_princ_components)
    res = accuracy_score(y_test.values, predictions)
    res_per_pc[i] = res
print(f'Reached an accuracy of {res_per_pc[-1]}.')
svc_res_per_pc = res_per_pc


-27.699317411486145
now PSD
K eros mean centered is PSD
K eros mean centered is PSD
result of using 1 components: 0.00
result of using 5 components: 0.20
result of using 10 components: 0.54
result of using 15 components: 0.56
result of using 20 components: 0.70
result of using 25 components: 0.75
result of using 30 components: 0.81
result of using 35 components: 0.82
result of using 40 components: 0.85
result of using 45 components: 0.86
result of using 50 components: 0.88
result of using 55 components: 0.89
-27.965133013384865
now PSD
K eros mean centered is PSD
K eros mean centered is PSD
result of using 1 components: 0.01
result of using 5 components: 0.19
result of using 10 components: 0.50
result of using 15 components: 0.54
result of using 20 components: 0.66
result of using 25 components: 0.71
result of using 30 components: 0.82
result of using 35 components: 0.82
result of using 40 components: 0.88
result of using 45 components: 0.88
result of using 50 components: 0.91
result o

## Random Forest Classifier

In [25]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import ParameterGrid
import itertools
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import KernelPCA
from tqdm import tqdm
criterion = ['gini', 'entropy']
n_estimators = [10, 25, 50, 100, 200]
max_depth = [10, 20, 30, 50, 100]
min_samples_split = [2, 6, 10] # minimum sample number to split a node
min_samples_leaf = [1, 3, 4] # minimum sample number that can be stored in a leaf node
params_grid = {
    'criterion': criterion,
    'n_estimators': n_estimators,
    'max_depth': max_depth,
    'min_samples_split': min_samples_split,
    'min_samples_leaf': min_samples_leaf,
}
n_pcs = 54
n_coords = 60
params_list = list(ParameterGrid(params_grid))
acc_scores = np.zeros(len(params_list))
for train_index, val_index in skf.split(X_train, y_train):
    X_train_cv = X_train.iloc[train_index]
    y_train_cv = y_train.iloc[train_index]
    X_val_cv = X_train.iloc[val_index]
    y_val_cv = y_train.iloc[val_index]
    X_train_matrix = np.vstack(X_train_cv)
    means_train = np.mean(X_train_matrix, axis=0)
    vars_train = np.var(X_train_matrix, axis=0)
    S, v_list_train = k_eros.compute_S_matrix(X_train_cv, means_train, vars_train)
    _, v_list_test = k_eros.compute_S_matrix(X_val_cv, means_train, vars_train)
    w = k_eros.compute_weight_vector(S, algorithm=2)
    K_eros_train_mc, V, eig_vals = k_eros.perform_PCA(len(X_train_cv), weight_vector=w, v_list=v_list_train)
    Y, K_eros_test_mc = k_eros.project_test_data(len(X_train_cv), len(X_val_cv), w, v_list_train, v_list_test, K_eros_train_mc, V)
    princ_components = V[:, :n_pcs]
    test_princ_components = Y[:, :n_pcs]
    for i, params in enumerate(tqdm(params_list)):
        rf = RandomForestClassifier(criterion = params['criterion'], n_estimators=params['n_estimators'], max_depth=params['max_depth'], min_samples_split=params['min_samples_split'], min_samples_leaf=params['min_samples_leaf'])
        rf.fit(princ_components, y_train_cv.values)
        predictions = rf.predict(test_princ_components)
        res = accuracy_score(y_val_cv.values, predictions)
        acc_scores[i] += res
acc_scores = acc_scores / n_folds
best_idx = np.argmax(acc_scores)    
print(f'Found best combination! {params_comb[best_idx]} w. accuracy of {acc_scores[best_idx]}.')
best_comb = params_comb[best_idx]

In [30]:
#for the best params combination il valore dell accuracy per 10Kfold
n_princ_cs = [1, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55]
res_per_pc = np.zeros(len(n_princ_cs))
X_train_matrix = np.vstack(X_train)
means_train = np.mean(X_train_matrix, axis=0)
vars_train = np.var(X_train_matrix, axis=0)

S, v_list_train = k_eros.compute_S_matrix(X_train, means_train, vars_train)
_, v_list_test = k_eros.compute_S_matrix(X_test, means_train, vars_train)
w = k_eros.compute_weight_vector(S, algorithm=2)
K_eros_train_mc, V, eig_vals = k_eros.perform_PCA(len(X_train), weight_vector=w, v_list=v_list_train)
Y, K_eros_test_mc = k_eros.project_test_data(len(X_train), len(X_test), w, v_list_train, v_list_test, K_eros_train_mc, V)
rf = RandomForestClassifier(**best_comb)
for i, n_pc in enumerate(n_princ_cs):
    princ_components = V[:, :n_pc]
    rf.fit(princ_components, y_train.values)
    test_princ_components = Y[:, :n_pc]
    predictions = rf.predict(test_princ_components)
    res = accuracy_score(y_test.values, predictions)
    res_per_pc[i] += res

print(f'Reached an accuracy of {res_per_pc[-1]}.')
rf_res_per_pc = res_per_pc

Performing stratified 10-fold: 0it [00:00, ?it/s]

-27.699317411486145
now PSD
K eros mean centered is PSD
K eros mean centered is PSD
result of using 1 components: 0.02
result of using 5 components: 0.09
result of using 10 components: 0.33
result of using 15 components: 0.34
result of using 20 components: 0.46
result of using 25 components: 0.53
result of using 30 components: 0.54
result of using 35 components: 0.59
result of using 40 components: 0.58
result of using 45 components: 0.59
result of using 50 components: 0.63


Performing stratified 10-fold: 1it [04:39, 279.50s/it]

result of using 55 components: 0.60
-27.965133013384865
now PSD
K eros mean centered is PSD
K eros mean centered is PSD
result of using 1 components: 0.02
result of using 5 components: 0.09
result of using 10 components: 0.30
result of using 15 components: 0.37
result of using 20 components: 0.42
result of using 25 components: 0.49
result of using 30 components: 0.56
result of using 35 components: 0.56
result of using 40 components: 0.62
result of using 45 components: 0.58
result of using 50 components: 0.63


Performing stratified 10-fold: 2it [09:30, 286.35s/it]

result of using 55 components: 0.64
-27.80066850713274
now PSD
K eros mean centered is PSD
K eros mean centered is PSD
result of using 1 components: 0.02
result of using 5 components: 0.12
result of using 10 components: 0.33
result of using 15 components: 0.43
result of using 20 components: 0.45
result of using 25 components: 0.46
result of using 30 components: 0.58
result of using 35 components: 0.59
result of using 40 components: 0.59
result of using 45 components: 0.61
result of using 50 components: 0.65


Performing stratified 10-fold: 3it [14:25, 290.42s/it]

result of using 55 components: 0.64
-27.49469146225962
now PSD
K eros mean centered is PSD
K eros mean centered is PSD
result of using 1 components: 0.02
result of using 5 components: 0.15
result of using 10 components: 0.34
result of using 15 components: 0.40
result of using 20 components: 0.43
result of using 25 components: 0.42
result of using 30 components: 0.51
result of using 35 components: 0.53
result of using 40 components: 0.56
result of using 45 components: 0.59
result of using 50 components: 0.63


Performing stratified 10-fold: 4it [19:18, 291.41s/it]

result of using 55 components: 0.61
-27.54659577896942
now PSD
K eros mean centered is PSD
K eros mean centered is PSD
result of using 1 components: 0.02
result of using 5 components: 0.11
result of using 10 components: 0.33
result of using 15 components: 0.46
result of using 20 components: 0.46
result of using 25 components: 0.48
result of using 30 components: 0.55
result of using 35 components: 0.57
result of using 40 components: 0.62
result of using 45 components: 0.57
result of using 50 components: 0.63


Performing stratified 10-fold: 5it [24:17, 293.89s/it]

result of using 55 components: 0.60
-27.61820750056072
now PSD
K eros mean centered is not PSD
K eros mean centered is PSD
result of using 1 components: 0.02
result of using 5 components: 0.11
result of using 10 components: 0.33
result of using 15 components: 0.36
result of using 20 components: 0.42
result of using 25 components: 0.46
result of using 30 components: 0.52
result of using 35 components: 0.56
result of using 40 components: 0.60
result of using 45 components: 0.62
result of using 50 components: 0.57


Performing stratified 10-fold: 6it [29:16, 295.62s/it]

result of using 55 components: 0.59
-27.739814166907166
now PSD
K eros mean centered is PSD
K eros mean centered is PSD
result of using 1 components: 0.02
result of using 5 components: 0.09
result of using 10 components: 0.29
result of using 15 components: 0.37
result of using 20 components: 0.40
result of using 25 components: 0.45
result of using 30 components: 0.59
result of using 35 components: 0.59
result of using 40 components: 0.60
result of using 45 components: 0.61
result of using 50 components: 0.63


Performing stratified 10-fold: 7it [34:14, 296.57s/it]

result of using 55 components: 0.66
-27.40713070793741
now PSD
K eros mean centered is PSD
K eros mean centered is PSD
result of using 1 components: 0.02
result of using 5 components: 0.11
result of using 10 components: 0.33
result of using 15 components: 0.39
result of using 20 components: 0.43
result of using 25 components: 0.45
result of using 30 components: 0.50
result of using 35 components: 0.55
result of using 40 components: 0.59
result of using 45 components: 0.62
result of using 50 components: 0.58


Performing stratified 10-fold: 8it [39:11, 296.81s/it]

result of using 55 components: 0.64
-27.730006664333168
now PSD
K eros mean centered is not PSD
K eros mean centered is PSD
result of using 1 components: 0.02
result of using 5 components: 0.11
result of using 10 components: 0.37
result of using 15 components: 0.36
result of using 20 components: 0.41
result of using 25 components: 0.45
result of using 30 components: 0.57
result of using 35 components: 0.58
result of using 40 components: 0.58
result of using 45 components: 0.64
result of using 50 components: 0.70


Performing stratified 10-fold: 9it [44:21, 300.87s/it]

result of using 55 components: 0.65
-27.69289686113509
now PSD
K eros mean centered is not PSD
K eros mean centered is PSD
result of using 1 components: 0.02
result of using 5 components: 0.09
result of using 10 components: 0.36
result of using 15 components: 0.33
result of using 20 components: 0.40
result of using 25 components: 0.50
result of using 30 components: 0.55
result of using 35 components: 0.57
result of using 40 components: 0.58
result of using 45 components: 0.66
result of using 50 components: 0.67


Performing stratified 10-fold: 10it [49:25, 296.52s/it]

result of using 55 components: 0.62
Reached an accuracy of 0.6269136065175097.





In [32]:
print('SVC accuracy x n principal components: ', svc_res_per_pc)
print('RF accuracy x n principal components: ', rf_res_per_pc)

SVC accuracy x n principal components:  [0.00389409 0.21480575 0.55088765 0.59844662 0.7009834  0.7376292
 0.8202669  0.84639622 0.87173517 0.87603204 0.89552073 0.89823839]
RF accuracy x n principal components:  [0.02105119 0.10642479 0.33139135 0.38007812 0.42842899 0.46938382
 0.54814719 0.56881384 0.59258876 0.61056815 0.6327441  0.62691361]
