In [10]:
import pandas as pd
import numpy as np
import os
from utils import base

In [11]:
features = [
    'sx_x',
    'sx_y',
    'sx_z',
    'sx_roll',
    'sx_pitch',
    'sx_yaw',
    'sx_thumb',
    'sx_forefinger',
    'sx_middle_finger',
    'sx_ring_finger',
    'sx_little_finger',
    'dx_x',
    'dx_y',
    'dx_z',
    'dx_roll',
    'dx_pitch',
    'dx_yaw',
    'dx_thumb',
    'dx_forefinger',
    'dx_middle_finger',
    'dx_ring_finger',
    'dx_little_finger'
    ]

In [12]:
path = "./tctodd/"
#path = "../../Desktop/MML Project/tctodd/"
dirs = os.listdir(path=path)
weeks = sorted([i for i in dirs if i != ".DS_Store"])
filenames = sorted(os.listdir(path+weeks[1]))

data = []
labels = dict()
label_cnt = 0

for w in weeks:
    temp_path = path+w+"/"
    filenames = sorted(os.listdir(temp_path))
    for fn in filenames:
        label = fn.split('.')[0][:-2]
        
        if label not in labels:
            labels[label] = label_cnt
            label_cnt += 1
            
        data.append({'label':labels[label], 'time_series':pd.read_csv(temp_path+fn, header=None, sep='\t',).values})
        

In [13]:
df = pd.DataFrame(data, columns=['label', 'time_series'])
seed = 0

In [14]:
from tslearn.utils import to_time_series_dataset
X = df['time_series']
y = df['label']

n_coords = 60
X_interp = base.interpolate_data(X, n_coords)
X_interp_concatenated = base.concatenate_examples(X_interp, by_row=False)

In [15]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

X_train, X_test, y_train, y_test = train_test_split(X_interp_concatenated, y,
                                                    stratify=y, 
                                                    test_size=0.222222222222222, random_state=seed)

In [16]:
import sklearn as sk
from sklearn.metrics import accuracy_score
import itertools
from sktime.classification.interval_based import TimeSeriesForestClassifier
from tqdm import tqdm

n_folds = 5
skf = sk.model_selection.StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=seed)
params = [[10, 25, 50, 100, 200], [3, 4, 5, 6, 10]]
params_comb = list(itertools.product(*params))
acc_scores = np.zeros(len(params_comb))

for train_index, val_index in skf.split(X_train, y_train):
    X_train_cv = X_train[train_index]
    y_train_cv = y_train.iloc[train_index]
    X_val_cv = X_train[val_index]
    y_val_cv = y_train.iloc[val_index]
    for i, params in enumerate(tqdm(params_comb, desc='Searching for the best parameters')):
        ts_forest = TimeSeriesForestClassifier(n_estimators = params[0],
                        min_interval = params[1], random_state=seed)

        ts_forest.fit(X_train_cv, y_train_cv)
        preds = ts_forest.predict(X_val_cv)
        res = accuracy_score(y_val_cv, preds)
        acc_scores[i] += res
acc_scores = acc_scores / n_folds
best_idx = np.argmax(acc_scores)    
print(f'Found best combination! {params_comb[best_idx]} w. accuracy of {acc_scores[best_idx]}.')
best_comb = params_comb[best_idx]

Searching for the best parameters: 100%|██████████| 25/25 [33:30<00:00, 80.41s/it] 
Searching for the best parameters: 100%|██████████| 25/25 [32:04<00:00, 76.96s/it] 
Searching for the best parameters: 100%|██████████| 25/25 [29:01<00:00, 69.64s/it] 
Searching for the best parameters: 100%|██████████| 25/25 [29:00<00:00, 69.63s/it] 
Searching for the best parameters: 100%|██████████| 25/25 [28:58<00:00, 69.53s/it] 

Found best combination! (200, 4) w. accuracy of 0.9067669172932332.





In [17]:
ts_forest = TimeSeriesForestClassifier(n_estimators = best_comb[0],
                        min_interval = best_comb[1], random_state=seed)
ts_forest.fit(X_train, y_train)
preds = ts_forest.predict(X_test)
res = accuracy_score(y_test, preds)
print(f'Reached an accuracy of {res}.')

Reached an accuracy of 0.9315789473684211.
