In [2]:
import pandas as pd
import numpy as np
import os
import utils

features_1 = [
    'sx_x',
    'sx_y',
    'sx_z',
    'sx_roll',
    'sx_pitch',
    'sx_yaw',
    'sx_thumb',
    'sx_forefinger',
    'sx_middle_finger',
    'sx_ring_finger',
    'sx_little_finger',
    'dx_x',
    'dx_y',
    'dx_z',
    'dx_roll',
    'dx_pitch',
    'dx_yaw',
    'dx_thumb',
    'dx_forefinger',
    'dx_middle_finger',
    'dx_ring_finger',
    'dx_little_finger'
    ]



In [3]:
path = "./tctodd/"
#path = "../../Desktop/MML Project/tctodd/"
dirs = os.listdir(path=path)
weeks = sorted([i for i in dirs if i != ".DS_Store"])
filenames = sorted(os.listdir(path+weeks[1]))

data = []
labels = dict()
label_cnt = 0

for w in weeks:
    temp_path = path+w+"/"
    filenames = sorted(os.listdir(temp_path))
    for fn in filenames:
        label = fn.split('.')[0][:-2]
        
        if label not in labels:
            labels[label] = label_cnt
            label_cnt += 1
            
        data.append({'label':labels[label], 'time_series':pd.read_csv(temp_path+fn, header=None, sep='\t',).values})
        

In [4]:
df = pd.DataFrame(data, columns=['label', 'time_series'])
seed = 0
X = df['time_series']
y = df['label']

## sktime k neighbors time series with interpolated data

In [6]:
from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import time
import os

# Interpolation, if want to
n_coords = 60
X_interp = utils.interpolate_data(X, n_coords)
X_interp_concatenated = utils.concatenate_examples(X_interp)

print(f"Shape of new interpolated and stacked X is {X_interp.shape}")

# to match KNeighborsTimeSeriesClassifier requirements, we need predictors x time instants
X_interp_new = X_interp.apply(lambda x: x.T)
print("The new shape is", X_interp_new[0].shape)

X_train, X_test, y_train, y_test = train_test_split(X_interp_new, y,
                                                    stratify=y, 
                                                    test_size=0.1, random_state=seed)
X_train_3d = np.stack([i for i in X_train])
X_test_3d = np.stack([i for i in X_test])

nn_dtw = KNeighborsTimeSeriesClassifier()


t0_fit = time.time()
nn_dtw.fit(X_train_3d, y_train)
t1_fit = time.time() - t0_fit

os.system(f'say "Ci ha impiegato {t1_fit} secondi per fittare 1-NeighborTimeSeriesClassifier"')

print("Starting prediction")
t0_pred = time.time()
preds = nn_dtw.predict(X_test_3d)
t1_pred = time.time() - t0_pred
score = accuracy_score(y_test, preds)
os.system(f'say "Ci ha impiegato {t1_pred} secondi per predire"')

print(f"Score of the plain 1-NeighborTimeSeriesClassifier without cross-validation is {score}")

os.system(f'say "Accuracy calcolata pari a {score}. Ci sono voluti {t1_fit + t1_pred} secondi per fittare e predire."')

Shape of new interpolated and stacked X is (2565,)
The new shape is (22, 60)
Starting prediction
Score of the plain 1-NeighborTimeSeriesClassifier without cross-validation is 0.8132295719844358


0

In [7]:
score

0.8132295719844358

# NO DATA INTERPOLATION

## Implementation from scratch

In [4]:
longest_series_shape = sorted(X.apply(lambda x: x.shape), key=lambda a: -a[0])[0]

flag_value = 10000

new_X = X.apply(lambda x: utils.fill_return_array(longest_series_shape, x, flag_value=flag_value))
new_X.apply(lambda x: x.shape).value_counts()

(2992,)    2565
Name: time_series, dtype: int64

In [5]:
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

X_train, X_test, y_train, y_test = train_test_split(new_X, y,
                                                    stratify=y, 
                                                    test_size=0.1, random_state=seed)

train_list = np.array([i for i in X_train])
test_list = np.array([i for i in X_test])

In [8]:
knn = KNeighborsClassifier(n_neighbors=1, metric=utils.DTW)

knn.fit(train_list, y_train)

preds = knn.predict(test_list)

score = accuracy_score(y_test, preds)
print(score)

0.8


## tslearn implementation (the fastest)

In [5]:
from tslearn.utils import to_time_series_dataset

X_new = to_time_series_dataset(X)

In [6]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

X_train, X_test, y_train, y_test = train_test_split(X_new, y,
                                                    stratify=y, 
                                                    test_size=0.1, random_state=seed)

In [None]:
from tslearn.neighbors import KNeighborsTimeSeriesClassifier
knn = KNeighborsTimeSeriesClassifier(n_neighbors=1, metric='dtw')
knn.fit(X_train, y_train)
print("predicting")
preds = knn.predict(X_test)
score = accuracy_score(y_test, preds)
print(f"KNN (tslearn, dtw) accuracy score is {score}")