In [2]:
import pandas as pd
import numpy as np
import os
import utils

In [11]:
features_1 = [
    'sx_x',
    'sx_y',
    'sx_z',
    'sx_roll',
    'sx_pitch',
    'sx_yaw',
    'sx_thumb',
    'sx_forefinger',
    'sx_middle_finger',
    'sx_ring_finger',
    'sx_little_finger',
    'dx_x',
    'dx_y',
    'dx_z',
    'dx_roll',
    'dx_pitch',
    'dx_yaw',
    'dx_thumb',
    'dx_forefinger',
    'dx_middle_finger',
    'dx_ring_finger',
    'dx_little_finger'
    ]



In [3]:
path = "./tctodd/"
#path = "../../Desktop/MML Project/tctodd/"
dirs = os.listdir(path=path)
weeks = sorted([i for i in dirs if i != ".DS_Store"])
filenames = sorted(os.listdir(path+weeks[1]))

data = []
labels = dict()
label_cnt = 0

for w in weeks:
    temp_path = path+w+"/"
    filenames = sorted(os.listdir(temp_path))
    for fn in filenames:
        label = fn.split('.')[0][:-2]
        
        if label not in labels:
            labels[label] = label_cnt
            label_cnt += 1
            
        data.append({'label':labels[label], 'time_series':pd.read_csv(temp_path+fn, header=None, sep='\t',).values})
        

In [4]:
df = pd.DataFrame(data, columns=['label', 'time_series'])
seed = 0

In [5]:
X = df['time_series']
y = df['label']

n_coords = 60
X_interp = utils.interpolate_data(X, n_coords)
X_interp_concatenated = utils.concatenate_examples(X_interp)

print(f"Shape of new interpolated and stacked X is {X_interp.shape}")

Shape of new interpolated and stacked X is (2565,)


# TimeSeriesForest

In [56]:
from sktime.classification.interval_based import TimeSeriesForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

X_train, X_test, y_train, y_test = train_test_split(X_interp_concatenated, y,
                                                    stratify=y, 
                                                    test_size=0.1, random_state=seed)

ts_forest = TimeSeriesForestClassifier(random_state=seed)

ts_forest.fit(X_train, y_train)
preds = ts_forest.predict(X_test)
score = accuracy_score(y_test, preds)
print(f"Score of the plain TimeSeriesForestClassifier without cross-validation is {score}")

Score of the plain TimeSeriesForestClassifier without cross-validation is 0.6264591439688716


-------
# 1-Nearest Neighbor + DTW

In [20]:
from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier
import time
import os

d = dict()


for id in range(len(X)):
    l = []
    for i in range(X.iloc[id].shape[-1]):
        l.append(pd.Series(X.iloc[id][:,i]))
    d[id] = l

X_df = pd.DataFrame.from_dict(d, orient='index', columns=features_1)
X_df.head()

Unnamed: 0,sx_x,sx_y,sx_z,sx_roll,sx_pitch,sx_yaw,sx_thumb,sx_forefinger,sx_middle_finger,sx_ring_finger,...,dx_y,dx_z,dx_roll,dx_pitch,dx_yaw,dx_thumb,dx_forefinger,dx_middle_finger,dx_ring_finger,dx_little_finger
0,0 -0.064909 1 -0.033878 2 0.015014 3...,0 0.034318 1 0.034764 2 0.030924 3...,0 -0.043964 1 -0.032445 2 -0.012665 3...,0 0.626383 1 0.647282 2 0.646525 3...,0 0.499976 1 0.486303 2 0.478735 3...,0 0.506653 1 0.480203 2 0.466018 3...,0 0.648993 1 0.915143 2 0.993164 3...,0 1.000000 1 1.000000 2 1.000000 3...,0 0.949510 1 0.915993 2 0.911142 3...,0 0.965124 1 1.000000 2 1.000000 3...,...,0 0.073279 1 0.065376 2 0.040059 3...,0 -0.024444 1 -0.009397 2 -0.006763 3...,0 0.233232 1 0.194742 2 0.183840 3...,0 0.560134 1 0.585891 2 0.576174 3...,0 0.606738 1 0.632960 2 0.662819 3...,0 0.685484 1 1.000000 2 1.000000 3...,0 0.758431 1 0.916470 2 0.999608 3...,0 1.0 1 1.0 2 1.0 3 1.0 4 ...,0 0.754117 1 0.974510 2 1.000000 3...,0 1.000000 1 1.000000 2 1.000000 3...
1,0 -0.107059 1 -0.110184 2 -0.093173 3...,0 -0.126109 1 -0.085076 2 -0.052705 3...,0 -0.053742 1 -0.065351 2 -0.101607 3...,0 0.612516 1 0.639799 2 0.677154 3...,0 0.231632 1 0.268084 2 0.312909 3...,0 0.456520 1 0.446730 2 0.419971 3...,0 0.561185 1 0.674464 2 0.555823 3...,0 0.000000 1 0.016346 2 0.561060 3...,0 0.000000 1 0.109372 2 0.571559 3...,0 0.006947 1 0.211904 2 0.552427 3...,...,0 -0.097820 1 -0.025531 2 0.040773 3...,0 -0.010380 1 -0.039983 2 -0.063022 3...,0 0.339790 1 0.291986 2 0.262554 3...,0 0.422849 1 0.510767 2 0.590310 3...,0 0.540611 1 0.559166 2 0.578857 3...,0 0.574041 1 0.683829 2 0.921681 3...,0 0.011373 1 0.552941 2 0.873725 3...,0 0.431530 1 0.984636 2 1.000000 3...,0 0.000000 1 0.298431 2 0.683921 3...,0 0.131444 1 0.773682 2 1.000000 3...
2,0 -0.061427 1 -0.029725 2 0.019032 3...,0 -0.082576 1 -0.068734 2 -0.036810 3...,0 -0.102991 1 -0.110537 2 -0.103750 3...,0 0.735469 1 0.758138 2 0.768636 3...,0 0.365694 1 0.397067 2 0.405271 3...,0 0.355016 1 0.325694 2 0.297458 3...,0 0.482091 1 0.732780 2 0.964297 3...,0 1.000000 1 1.000000 2 1.000000 3...,0 1.000000 1 1.000000 2 1.000000 3...,0 0.898279 1 0.986921 2 0.998547 3...,...,0 -0.008029 1 0.019476 2 0.012064 3...,0 -0.061147 1 -0.059316 2 -0.066683 3...,0 0.222611 1 0.212198 2 0.195804 3...,0 0.545070 1 0.596219 2 0.619584 3...,0 0.623341 1 0.653468 2 0.674245 3...,0 0.521629 1 0.945082 2 1.000000 3...,0 0.758431 1 0.877255 2 0.991765 3...,0 1.000000 1 1.000000 2 1.000000 3...,0 0.521176 1 0.696863 2 0.754117 3...,0 1.000000 1 1.000000 2 1.000000 3...
3,0 -0.128178 1 -0.130143 2 -0.130857 3...,0 0.026950 1 0.027263 2 0.028022 3...,0 -0.050126 1 -0.049769 2 -0.049635 3...,0 0.455028 1 0.454942 2 0.454381 3...,0 0.435252 1 0.434129 2 0.433274 3...,0 0.547972 1 0.548277 2 0.548753 3...,0 0.423775 1 0.423105 2 0.423105 3...,0 0.085542 1 0.085542 2 0.085542 3...,0 0.0 1 0.0 2 0.0 3 0.0 4 ...,0 0.002165 1 0.001083 2 0.001949 3...,...,0 0.077431 1 0.081315 2 0.083861 3...,0 0.112139 1 0.104013 2 0.102406 3...,0 0.217508 1 0.219364 2 0.221622 3...,0 0.492651 1 0.486523 2 0.482006 3...,0 0.811237 1 0.811640 2 0.811872 3...,0 1.000000 1 1.000000 2 1.000000 3...,0 0.250588 1 0.276471 2 0.283529 3...,0 1.000000 1 1.000000 2 1.000000 3...,0 0.633333 1 0.630588 2 0.617255 3...,0 0.870675 1 0.858072 2 0.842180 3...
4,0 -0.143672 1 -0.146038 2 -0.145502 3...,0 -0.144416 1 -0.141290 2 -0.139236 3...,0 -0.047447 1 -0.046911 2 -0.045929 3...,0 0.660979 1 0.660906 2 0.660967 3...,0 0.214054 1 0.213370 2 0.211173 3...,0 0.423035 1 0.425782 2 0.424378 3...,0 0.202579 1 0.205260 2 0.208612 3...,0 0.0 1 0.0 2 0.0 3 0.0 4 ...,0 0.0 1 0.0 2 0.0 3 0.0 4 ...,0 0.0 1 0.0 2 0.0 3 0.0 4 ...,...,0 -0.163053 1 -0.150819 2 -0.068619 3...,0 -0.061772 1 -0.072845 2 -0.074497 3...,0 0.315265 1 0.217765 2 0.182644 3...,0 0.376241 1 0.446507 2 0.525977 3...,0 0.553734 1 0.607617 2 0.648195 3...,0 0.498458 1 0.620935 2 0.761067 3...,0 0.000000 1 0.000000 2 0.000000 3...,0 0.169464 1 0.672526 2 0.996927 3...,0 0.000000 1 0.012941 2 0.272941 3...,0 0.080481 1 0.292551 2 0.860812 3...


In [22]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_df, y,
                                                    stratify=y, 
                                                    test_size=0.1, random_state=seed)

nn_dtw = KNeighborsTimeSeriesClassifier()


t0_fit = time.time()
nn_dtw.fit(X_train, y_train)
t1_fit = time.time() - t0_fit

os.system(f'say "Ci ha impiegato {t1_fit} secondi per fittare 1-NeighborTimeSeriesClassifier"')

print("Starting prediction")
t0_pred = time.time()
preds = nn_dtw.predict(X_test)
t1_pred = time.time() - t0_pred
score = accuracy_score(y_test, preds)
os.system(f'say "Ci ha impiegato {t1_pred} secondi per predire"')

print(f"Score of the plain 1-NeighborTimeSeriesClassifier without cross-validation is {score}")

os.system(f'say "Accuracy calcolata pari a {score}. Ci sono voluti {t1_fit + t1_pred} secondi per fittare e predire."')

ValueError: Data seen by KNeighborsTimeSeriesClassifier instance has unequal length series, but this KNeighborsTimeSeriesClassifier instance cannot handle unequal length series. Calls with unequal length series may result in error or unreliable results.

# NN-DTW with tslearn

In [None]:
l = [i for i in X]
l1 = []
for i in range(len(l)):
    a = l[i]
    temp_list = [list(a[j, :]) for j in range(a.shape[0])]
    l1.append(temp_list)
l1

In [None]:
from tslearn.neighbors import KNeighborsTimeSeriesClassifier
neigh = KNeighborsTimeSeriesClassifier(n_neighbors=1)
neigh.fit(l1, y)


# NN-DTW with interpolated data

In [55]:
from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier
import time
import os

# to match KNeighborsTimeSeriesClassifier requirements, we need predictors x time instants
X_interp_new = X_interp.apply(lambda x: x.T)
print("The new shape is", X_interp_new[0].shape)

X_train, X_test, y_train, y_test = train_test_split(X_interp_new, y,
                                                    stratify=y, 
                                                    test_size=0.1, random_state=seed)
X_train_3d = np.stack([i for i in X_train])
X_test_3d = np.stack([i for i in X_test])

nn_dtw = KNeighborsTimeSeriesClassifier()


t0_fit = time.time()
nn_dtw.fit(X_train_3d, y_train)
t1_fit = time.time() - t0_fit

os.system(f'say "Ci ha impiegato {t1_fit} secondi per fittare 1-NeighborTimeSeriesClassifier"')

print("Starting prediction")
t0_pred = time.time()
preds = nn_dtw.predict(X_test_3d)
t1_pred = time.time() - t0_pred
score = accuracy_score(y_test, preds)
os.system(f'say "Ci ha impiegato {t1_pred} secondi per predire"')

print(f"Score of the plain 1-NeighborTimeSeriesClassifier without cross-validation is {score}")

os.system(f'say "Accuracy calcolata pari a {score}. Ci sono voluti {t1_fit + t1_pred} secondi per fittare e predire."')

The new shape is (22, 60)
Starting prediction
Score of the plain 1-NeighborTimeSeriesClassifier without cross-validation is 0.8132295719844358


0

--------
# Time Series SVC (tslearn tutorial)

In [1]:
from tslearn.utils import to_time_series_dataset
X = to_time_series_dataset([[1, 2, 3, 4], [1, 2, 3], [2, 5, 6, 7, 8, 9]])
y = [0, 0, 1]

Install h5py to use hdf5 features: http://docs.h5py.org/
  warn(h5py_msg)


In [2]:
X

array([[[ 1.],
        [ 2.],
        [ 3.],
        [ 4.],
        [nan],
        [nan]],

       [[ 1.],
        [ 2.],
        [ 3.],
        [nan],
        [nan],
        [nan]],

       [[ 2.],
        [ 5.],
        [ 6.],
        [ 7.],
        [ 8.],
        [ 9.]]])

In [3]:
from tslearn.svm import TimeSeriesSVC
clf = TimeSeriesSVC(C=1.0, kernel="gak")
clf.fit(X, y)