# FingerMovements

In [7]:
###libraries

#Data manipulation
import pandas as pd
import numpy as np
from collections import Counter
from sklearn import preprocessing
import sys
import os

#Time series transformers
from pyts.multivariate.classification import MultivariateClassifier
from pyts.multivariate.transformation import WEASELMUSE
from pyts.classification import KNeighborsClassifier
from pywt import wavedec
import pyeeg
import scipy.stats

#Classifiers
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import FunctionTransformer
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from lightgbm import LGBMClassifier

#Deep Learning
import mcfly

from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.models import load_model
from tensorflow.keras.utils import to_categorical
import tensorflow as tf
from keras import backend
from numba import cuda 

#Random
import multiprocessing
from multiprocessing import Pool
import gc
from tqdm.notebook import tqdm

np.random.seed(42)

In [8]:
###Read the data
dataset = 'FingerMovements'

X_train = np.load('Datasets_clean/{}/X_train.npy'.format(dataset))
y_train = np.load('Datasets_clean/{}/y_train.npy'.format(dataset))
X_test = np.load('Datasets_clean/{}/X_test.npy'.format(dataset))
y_test = np.load('Datasets_clean/{}/y_test.npy'.format(dataset))

X_full = np.vstack([X_train,X_test])
y_full = np.hstack([y_train,y_test])

#Create scores dict
scores = {}

### Functions

In [9]:
def ResampleLinear1D(original, targetLen = 40):
    original = np.array(original, dtype=np.float)
    index_arr = np.linspace(0, len(original)-1, num=targetLen, dtype=np.float)
    index_floor = np.array(index_arr, dtype=np.int) #Round down
    index_ceil = index_floor + 1
    index_rem = index_arr - index_floor #Remain

    val1 = original[index_floor]
    val2 = original[index_ceil % len(original)]
    interp = val1 * (1.0-index_rem) + val2 * index_rem
    assert(len(interp) == targetLen)
    return interp

def calculate_entropy(list_values):
    counter_values = Counter(list_values).most_common()
    probabilities = [elem[1]/len(list_values) for elem in counter_values]
    entropy=scipy.stats.entropy(probabilities)
    return entropy
 
def calculate_statistics(list_values):
    n5 = np.nanpercentile(list_values, 5)
    n25 = np.nanpercentile(list_values, 25)
    n75 = np.nanpercentile(list_values, 75)
    n95 = np.nanpercentile(list_values, 95)
    median = np.nanpercentile(list_values, 50)
    mean = np.nanmean(list_values)
    std = np.nanstd(list_values)
    var = np.nanvar(list_values)
    rms = np.nanmean(np.sqrt(list_values**2))
    return [n5, n25, n75, n95, median, mean, std, var, rms]
 
def calculate_crossings(list_values):
    zero_crossing_indices = np.nonzero(np.diff(np.array(list_values) > 0))[0]
    no_zero_crossings = len(zero_crossing_indices)
    mean_crossing_indices = np.nonzero(np.diff(np.array(list_values) > np.nanmean(list_values)))[0]
    no_mean_crossings = len(mean_crossing_indices)
    return [no_zero_crossings, no_mean_crossings]
 
def get_features(list_values):
    entropy = calculate_entropy(list_values)
    crossings = calculate_crossings(list_values)
    statistics = calculate_statistics(list_values)
    return [entropy] + crossings + statistics

### DTW + 1-Knn

In [10]:
dtw_acc = []

skf = StratifiedKFold(n_splits=10)
for train_index, test_index in tqdm(skf.split(X_full, y_full)):
    
    X_train = X_full[train_index]
    y_train = y_full[train_index]
    X_test = X_full[test_index]
    y_test = y_full[test_index]
    
#     X_train = np.apply_along_axis(ResampleLinear1D, axis = 2, arr = X_train)
#     X_test = np.apply_along_axis(ResampleLinear1D, axis = 2, arr = X_test)
    
    dtw_knn = MultivariateClassifier(KNeighborsClassifier(metric = 'dtw_itakura',
                                                      n_jobs = -1,
                                                      metric_params = {'max_slope' : 2}))
    
    dtw_knn.fit(X_train,y_train)
    acc = dtw_knn.score(X_test,y_test)
    dtw_acc.append(acc)
    
scores['dtw_knn'] = [np.mean(dtw_acc), np.std(dtw_acc)]

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




### WEASELMUSE

In [38]:
accuracies_lr = []
accuracies_rf = []
accuracies_svc = []
accuracies_lgbm = []

skf = StratifiedKFold(n_splits=10)
for train_index, test_index in tqdm(skf.split(X_full, y_full)):
    
    X_train = X_full[train_index]
    y_train = y_full[train_index]
    X_test = X_full[test_index]
    y_test = y_full[test_index]

    #Optimal hyperparameters
    hyperparameters = {}

    #wm + lr    
    hyperparameters['wm_lr'] = {}
    hyperparameters['wm_lr']['word_size'] = 2
    hyperparameters['wm_lr']['n_bins'] = 3
    hyperparameters['wm_lr']['C'] = 0.5

    wm = WEASELMUSE(word_size = hyperparameters['wm_lr']['word_size'], 
                    n_bins = hyperparameters['wm_lr']['n_bins'])

    logistic = LogisticRegression(solver = 'lbfgs',
                                  multi_class = 'auto',
                                  max_iter=3000,
                                  C = hyperparameters['wm_lr']['C'])

    clf = make_pipeline(wm, logistic)
    clf.fit(X_train,y_train)
    acc = clf.score(X_test,y_test)
    accuracies_lr.append(np.round(acc,2))

    #wm + rf    
    hyperparameters['wm_rf'] = {}
    hyperparameters['wm_rf']['word_size'] = 2
    hyperparameters['wm_rf']['n_bins'] = 3
    hyperparameters['wm_rf']['n_estimators'] = 300
    hyperparameters['wm_rf']['max_depth'] = 9

    wm = WEASELMUSE(word_size = hyperparameters['wm_rf']['word_size'], 
                    n_bins = hyperparameters['wm_rf']['n_bins'])

    rf = RandomForestClassifier(n_estimators = hyperparameters['wm_rf']['n_estimators'],
                                max_depth = hyperparameters['wm_rf']['max_depth'])

    clf = make_pipeline(wm, rf)
    clf.fit(X_train,y_train)
    acc = clf.score(X_test,y_test)
    accuracies_rf.append(np.round(acc,2))
    
    #wm + svc
    hyperparameters['wm_svc'] = {}
    hyperparameters['wm_svc']['word_size'] = 2
    hyperparameters['wm_svc']['n_bins'] = 3
    hyperparameters['wm_svc']['C'] = 1
    hyperparameters['wm_svc']['kernel'] = 'rbf'
    hyperparameters['wm_svc']['degree'] = 3
    hyperparameters['wm_svc']['gamma'] = 'scale'

    wm = WEASELMUSE(word_size = hyperparameters['wm_svc']['word_size'], 
                    n_bins = hyperparameters['wm_svc']['n_bins'])

    svc = SVC(C = hyperparameters['wm_svc']['C'],
              kernel = hyperparameters['wm_svc']['kernel'],
              degree = hyperparameters['wm_svc']['degree'],
              gamma = hyperparameters['wm_svc']['gamma'])

    clf = make_pipeline(wm, svc)
    clf.fit(X_train,y_train)
    acc = clf.score(X_test,y_test)
    accuracies_svc.append(np.round(acc,2))

    #wm + lgbm
    hyperparameters['wm_lgbm'] = {}
    hyperparameters['wm_lgbm']['word_size'] = 4
    hyperparameters['wm_lgbm']['n_bins'] = 5
    hyperparameters['wm_lgbm']['num_leaves'] = 92
    hyperparameters['wm_lgbm']['max_depth'] = 3
    hyperparameters['wm_lgbm']['learning_rate'] = 0.3
    hyperparameters['wm_lgbm']['n_estimators'] = 250
    hyperparameters['wm_lgbm']['min_split_gain'] = 0.2
    hyperparameters['wm_lgbm']['min_child_samples'] = 20
    hyperparameters['wm_lgbm']['colsample_bytree'] = 1
    hyperparameters['wm_lgbm']['reg_alpha'] = 0.3
    hyperparameters['wm_lgbm']['reg_lambda'] = 0.2

    wm = WEASELMUSE(word_size = hyperparameters['wm_svc']['word_size'], 
                    n_bins = hyperparameters['wm_svc']['n_bins'])

    def sparse_float(mat):
        return mat.astype('float')

    trans_sparse_float = FunctionTransformer(sparse_float, validate = False)

    lgbm = LGBMClassifier(n_jobs = -1,
                          num_leaves = hyperparameters['wm_lgbm']['num_leaves'],
                          max_depth = hyperparameters['wm_lgbm']['max_depth'],
                          learning_rate = hyperparameters['wm_lgbm']['learning_rate'],
                          n_estimators = hyperparameters['wm_lgbm']['n_estimators'],
                          min_split_gain = hyperparameters['wm_lgbm']['min_split_gain'],
                          min_child_samples = hyperparameters['wm_lgbm']['min_child_samples'],
                          colsample_by_tree = hyperparameters['wm_lgbm']['colsample_bytree'],
                          reg_alpha = hyperparameters['wm_lgbm']['reg_alpha'],
                          reg_lambda = hyperparameters['wm_lgbm']['reg_lambda'])

    clf = make_pipeline(wm, trans_sparse_float, lgbm)
    clf.fit(X_train,y_train)
    acc = clf.score(X_test,y_test)
    accuracies_lgbm.append(np.round(acc,2))
    
scores['wm_lr'] = [np.mean(accuracies_lr), np.std(accuracies_lr)]
scores['wm_rf'] = [np.mean(accuracies_rf), np.std(accuracies_rf)]
scores['wm_svc'] = [np.mean(accuracies_svc), np.std(accuracies_svc)]
scores['wm_lgbm'] = [np.mean(accuracies_lgbm), np.std(accuracies_lgbm)]

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




### Deep Learning

In [12]:
def dl_func(dl_type,
            X_train_dl, 
            y_train_dl,
            X_val_dl,
            y_val_dl,
            X_test_dl,
            y_test_dl):
    
    #Validate diferent architectures
    num_of_candidate_models = 8
    random_search_epoches = 100
    random_search_es = 30
    best_model_epoches = 200
    best_model_es = 30

    for mod_type in [dl_type]:

        #Create architectures
        num_classes = y_train_dl.shape[1]
        metric = 'accuracy'
        models = mcfly.modelgen.generate_models(X_train_dl.shape,
                                                number_of_classes=num_classes,
                                                number_of_models = num_of_candidate_models,
                                                model_types = [mod_type],
                                                metrics=[metric])

        #Save intermediate results
        resultpath = 'temp'
        outputfile = os.path.join(resultpath, 'modelcomparison_{}_{}.json'.format(mod_type,dataset))

        #Find best architecture
        histories, val_accuracies, val_losses = mcfly.find_architecture.train_models_on_samples(X_train_dl, y_train_dl,
                                                                                                X_val_dl, y_val_dl,
                                                                                                models,
                                                                                                nr_epochs=random_search_epoches,
                                                                                                subset_size=None,
                                                                                                verbose=False,
                                                                                                outputfile=outputfile,
                                                                                                metric=metric,
                                                                                                early_stopping_patience=random_search_es
                                                                                                )

        #Select and train best architecture
        best_model_index = np.argmax(val_accuracies)
        best_model, best_params, best_model_types = models[best_model_index]

        es = EarlyStopping(monitor='val_accuracy', mode='max', verbose=1, patience=best_model_es)
        mc = ModelCheckpoint('best_model.h5', monitor='val_accuracy', mode='max', verbose=1, save_best_only=True)

        history = best_model.fit(X_train_dl, y_train_dl,
                                 epochs=best_model_epoches, validation_data=(X_val_dl, y_val_dl),
                                 callbacks = [es,mc])

        #Accuracy in test dataset
        saved_model = load_model('best_model.h5')
        return saved_model.evaluate(X_test_dl, y_test_dl, verbose = False)[1]

In [None]:
accuracies_cnn = []
accuracies_it = []

skf = StratifiedKFold(n_splits=10)
for train_index, test_index in tqdm(skf.split(X_full, y_full)):
    
    X_train = X_full[train_index]
    y_train = y_full[train_index]
    X_test = X_full[test_index]
    y_test = y_full[test_index]

    #Create train, validation and test sets for DL
    LE = LabelEncoder()
    skf_train_val = StratifiedKFold(n_splits=5)

    for train_index, val_index in skf_train_val.split(X_train, y_train):
        train_index = train_index
        val_index = val_index

    X_train_val = X_train.copy()

    X_train_dl = X_train_val[train_index].copy()
    X_train_dl = np.transpose(X_train_dl, (0,2,1))
    y_train_dl = y_train[train_index].copy()
    y_train_dl = to_categorical(LE.fit_transform(y_train_dl))

    X_val_dl = X_train_val[val_index].copy()
    X_val_dl = np.transpose(X_val_dl, (0,2,1))
    y_val_dl = y_train[val_index].copy()
    y_val_dl = to_categorical(LE.transform(y_val_dl))

    X_test_dl = np.transpose(X_test, (0,2,1))
    y_test_dl = to_categorical(LE.transform(y_test))
    
    acc_cnn = dl_func('CNN',
                     X_train_dl, 
                     y_train_dl,
                     X_val_dl,
                     y_val_dl,
                     X_test_dl,
                     y_test_dl)
    
    acc_it = dl_func('InceptionTime',
                     X_train_dl, 
                     y_train_dl,
                     X_val_dl,
                     y_val_dl,
                     X_test_dl,
                     y_test_dl)

    accuracies_cnn.append(acc_cnn.copy())
    accuracies_it.append(acc_it.copy())

scores['dl_cnn'] = [np.mean(accuracies_cnn), np.std(accuracies_cnn)]
scores['dl_it'] = [np.mean(accuracies_it), np.std(accuracies_it)]

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

The value of model_types is set from ['CNN', 'DeepConvLSTM', 'ResNet', 'InceptionTime'] (default) to ['CNN']
The value of metrics is set from ['accuracy'] (default) to ['accuracy']
Set maximum kernel size for InceptionTime models to number of timesteps.
Train on 300 samples, validate on 74 samples
Epoch 1/200
Epoch 00001: val_accuracy improved from -inf to 0.56757, saving model to best_model.h5
Epoch 2/200
Epoch 00002: val_accuracy improved from 0.56757 to 0.59459, saving model to best_model.h5
Epoch 3/200
Epoch 00003: val_accuracy did not improve from 0.59459
Epoch 4/200
Epoch 00004: val_accuracy improved from 0.59459 to 0.60811, saving model to best_model.h5
Epoch 5/200
Epoch 00005: val_accuracy did not improve from 0.60811
Epoch 6/200
Epoch 00006: val_accuracy did not improve from 0.60811
Epoch 7/200
Epoch 00007: val_accuracy did not improve from 0.60811
Epoch 8/200
Epoch 00008: val_accuracy did not improve from 0.60811
Epoch 9/200
Epoch 00009: val_accuracy did not improve from 0.60

Epoch 28/200
Epoch 00028: val_accuracy did not improve from 0.62162
Epoch 29/200
Epoch 00029: val_accuracy did not improve from 0.62162
Epoch 30/200
Epoch 00030: val_accuracy did not improve from 0.62162
Epoch 31/200
Epoch 00031: val_accuracy did not improve from 0.62162
Epoch 32/200
Epoch 00032: val_accuracy did not improve from 0.62162
Epoch 33/200
Epoch 00033: val_accuracy improved from 0.62162 to 0.64865, saving model to best_model.h5
Epoch 34/200
Epoch 00034: val_accuracy did not improve from 0.64865
Epoch 35/200
Epoch 00035: val_accuracy did not improve from 0.64865
Epoch 36/200
Epoch 00036: val_accuracy did not improve from 0.64865
Epoch 37/200
Epoch 00037: val_accuracy did not improve from 0.64865
Epoch 38/200
Epoch 00038: val_accuracy did not improve from 0.64865
Epoch 39/200
Epoch 00039: val_accuracy did not improve from 0.64865
Epoch 40/200
Epoch 00040: val_accuracy did not improve from 0.64865
Epoch 41/200
Epoch 00041: val_accuracy did not improve from 0.64865
Epoch 42/200


Epoch 00008: val_accuracy did not improve from 0.62162
Epoch 9/200
Epoch 00009: val_accuracy did not improve from 0.62162
Epoch 10/200
Epoch 00010: val_accuracy did not improve from 0.62162
Epoch 11/200
Epoch 00011: val_accuracy did not improve from 0.62162
Epoch 12/200
Epoch 00012: val_accuracy did not improve from 0.62162
Epoch 13/200
Epoch 00013: val_accuracy did not improve from 0.62162
Epoch 14/200
Epoch 00014: val_accuracy did not improve from 0.62162
Epoch 15/200
Epoch 00015: val_accuracy did not improve from 0.62162
Epoch 16/200
Epoch 00016: val_accuracy did not improve from 0.62162
Epoch 17/200
Epoch 00017: val_accuracy did not improve from 0.62162
Epoch 18/200
Epoch 00018: val_accuracy did not improve from 0.62162
Epoch 19/200
Epoch 00019: val_accuracy did not improve from 0.62162
Epoch 20/200
Epoch 00020: val_accuracy did not improve from 0.62162
Epoch 21/200
Epoch 00021: val_accuracy did not improve from 0.62162
Epoch 22/200
Epoch 00022: val_accuracy did not improve from 0.

Epoch 00032: val_accuracy did not improve from 0.63514
Epoch 33/200
Epoch 00033: val_accuracy did not improve from 0.63514
Epoch 34/200
Epoch 00034: val_accuracy did not improve from 0.63514
Epoch 35/200
Epoch 00035: val_accuracy did not improve from 0.63514
Epoch 36/200
Epoch 00036: val_accuracy did not improve from 0.63514
Epoch 37/200
Epoch 00037: val_accuracy did not improve from 0.63514
Epoch 38/200
Epoch 00038: val_accuracy did not improve from 0.63514
Epoch 39/200
Epoch 00039: val_accuracy did not improve from 0.63514
Epoch 40/200
Epoch 00040: val_accuracy did not improve from 0.63514
Epoch 41/200
Epoch 00041: val_accuracy did not improve from 0.63514
Epoch 42/200
Epoch 00042: val_accuracy did not improve from 0.63514
Epoch 43/200
Epoch 00043: val_accuracy did not improve from 0.63514
Epoch 44/200
Epoch 00044: val_accuracy did not improve from 0.63514
Epoch 45/200
Epoch 00045: val_accuracy did not improve from 0.63514
Epoch 46/200
Epoch 00046: val_accuracy did not improve from 0

Epoch 2/200
Epoch 00002: val_accuracy improved from 0.55405 to 0.62162, saving model to best_model.h5
Epoch 3/200
Epoch 00003: val_accuracy did not improve from 0.62162
Epoch 4/200
Epoch 00004: val_accuracy did not improve from 0.62162
Epoch 5/200
Epoch 00005: val_accuracy did not improve from 0.62162
Epoch 6/200
Epoch 00006: val_accuracy did not improve from 0.62162
Epoch 7/200
Epoch 00007: val_accuracy did not improve from 0.62162
Epoch 8/200
Epoch 00008: val_accuracy did not improve from 0.62162
Epoch 9/200
Epoch 00009: val_accuracy did not improve from 0.62162
Epoch 10/200
Epoch 00010: val_accuracy did not improve from 0.62162
Epoch 11/200
Epoch 00011: val_accuracy did not improve from 0.62162
Epoch 12/200
Epoch 00012: val_accuracy did not improve from 0.62162
Epoch 13/200
Epoch 00013: val_accuracy did not improve from 0.62162
Epoch 14/200
Epoch 00014: val_accuracy did not improve from 0.62162
Epoch 15/200
Epoch 00015: val_accuracy improved from 0.62162 to 0.63514, saving model to 

Epoch 30/200
Epoch 00030: val_accuracy did not improve from 0.63514
Epoch 31/200
Epoch 00031: val_accuracy did not improve from 0.63514
Epoch 32/200
Epoch 00032: val_accuracy did not improve from 0.63514
Epoch 33/200
Epoch 00033: val_accuracy did not improve from 0.63514
Epoch 34/200
Epoch 00034: val_accuracy did not improve from 0.63514
Epoch 35/200
Epoch 00035: val_accuracy did not improve from 0.63514
Epoch 36/200
Epoch 00036: val_accuracy did not improve from 0.63514
Epoch 37/200
Epoch 00037: val_accuracy did not improve from 0.63514
Epoch 38/200
Epoch 00038: val_accuracy did not improve from 0.63514
Epoch 39/200
Epoch 00039: val_accuracy did not improve from 0.63514
Epoch 40/200
Epoch 00040: val_accuracy did not improve from 0.63514
Epoch 41/200
Epoch 00041: val_accuracy did not improve from 0.63514
Epoch 42/200
Epoch 00042: val_accuracy did not improve from 0.63514
Epoch 43/200
Epoch 00043: val_accuracy did not improve from 0.63514
Epoch 44/200
Epoch 00044: val_accuracy improved 

Epoch 00011: val_accuracy did not improve from 0.67568
Epoch 12/200
Epoch 00012: val_accuracy did not improve from 0.67568
Epoch 13/200
Epoch 00013: val_accuracy did not improve from 0.67568
Epoch 14/200
Epoch 00014: val_accuracy did not improve from 0.67568
Epoch 15/200
Epoch 00015: val_accuracy did not improve from 0.67568
Epoch 16/200
Epoch 00016: val_accuracy did not improve from 0.67568
Epoch 17/200
Epoch 00017: val_accuracy did not improve from 0.67568
Epoch 18/200
Epoch 00018: val_accuracy did not improve from 0.67568
Epoch 19/200
Epoch 00019: val_accuracy did not improve from 0.67568
Epoch 20/200
Epoch 00020: val_accuracy did not improve from 0.67568
Epoch 21/200
Epoch 00021: val_accuracy did not improve from 0.67568
Epoch 22/200
Epoch 00022: val_accuracy did not improve from 0.67568
Epoch 23/200
Epoch 00023: val_accuracy did not improve from 0.67568
Epoch 24/200
Epoch 00024: val_accuracy did not improve from 0.67568
Epoch 25/200
Epoch 00025: val_accuracy did not improve from 0

Epoch 40/200
Epoch 00040: val_accuracy did not improve from 0.68919
Epoch 41/200
Epoch 00041: val_accuracy did not improve from 0.68919
Epoch 42/200
Epoch 00042: val_accuracy did not improve from 0.68919
Epoch 43/200
Epoch 00043: val_accuracy did not improve from 0.68919
Epoch 44/200
Epoch 00044: val_accuracy did not improve from 0.68919
Epoch 45/200
Epoch 00045: val_accuracy did not improve from 0.68919
Epoch 46/200
Epoch 00046: val_accuracy did not improve from 0.68919
Epoch 47/200
Epoch 00047: val_accuracy did not improve from 0.68919
Epoch 48/200
Epoch 00048: val_accuracy did not improve from 0.68919
Epoch 49/200
Epoch 00049: val_accuracy did not improve from 0.68919
Epoch 50/200
Epoch 00050: val_accuracy did not improve from 0.68919
Epoch 51/200
Epoch 00051: val_accuracy did not improve from 0.68919
Epoch 52/200
Epoch 00052: val_accuracy did not improve from 0.68919
Epoch 53/200
Epoch 00053: val_accuracy did not improve from 0.68919
Epoch 54/200
Epoch 00054: val_accuracy did not i

Epoch 69/200
Epoch 00069: val_accuracy did not improve from 0.70270
Epoch 70/200
Epoch 00070: val_accuracy did not improve from 0.70270
Epoch 71/200
Epoch 00071: val_accuracy did not improve from 0.70270
Epoch 72/200
Epoch 00072: val_accuracy did not improve from 0.70270
Epoch 73/200
Epoch 00073: val_accuracy did not improve from 0.70270
Epoch 74/200
Epoch 00074: val_accuracy did not improve from 0.70270
Epoch 75/200
Epoch 00075: val_accuracy did not improve from 0.70270
Epoch 76/200
Epoch 00076: val_accuracy did not improve from 0.70270
Epoch 77/200
Epoch 00077: val_accuracy did not improve from 0.70270
Epoch 78/200
Epoch 00078: val_accuracy did not improve from 0.70270
Epoch 79/200
Epoch 00079: val_accuracy did not improve from 0.70270
Epoch 80/200
Epoch 00080: val_accuracy did not improve from 0.70270
Epoch 81/200
Epoch 00081: val_accuracy did not improve from 0.70270
Epoch 82/200
Epoch 00082: val_accuracy did not improve from 0.70270
Epoch 83/200
Epoch 00083: val_accuracy did not i

Epoch 10/200
Epoch 00010: val_accuracy did not improve from 0.60811
Epoch 11/200
Epoch 00011: val_accuracy did not improve from 0.60811
Epoch 12/200
Epoch 00012: val_accuracy did not improve from 0.60811
Epoch 13/200
Epoch 00013: val_accuracy did not improve from 0.60811
Epoch 14/200
Epoch 00014: val_accuracy did not improve from 0.60811
Epoch 15/200
Epoch 00015: val_accuracy did not improve from 0.60811
Epoch 16/200
Epoch 00016: val_accuracy improved from 0.60811 to 0.63514, saving model to best_model.h5
Epoch 17/200
Epoch 00017: val_accuracy did not improve from 0.63514
Epoch 18/200
Epoch 00018: val_accuracy did not improve from 0.63514
Epoch 19/200
Epoch 00019: val_accuracy did not improve from 0.63514
Epoch 20/200
Epoch 00020: val_accuracy did not improve from 0.63514
Epoch 21/200
Epoch 00021: val_accuracy did not improve from 0.63514
Epoch 22/200
Epoch 00022: val_accuracy did not improve from 0.63514
Epoch 23/200
Epoch 00023: val_accuracy did not improve from 0.63514
Epoch 24/200


Epoch 39/200
Epoch 00039: val_accuracy did not improve from 0.63514
Epoch 40/200
Epoch 00040: val_accuracy did not improve from 0.63514
Epoch 41/200
Epoch 00041: val_accuracy did not improve from 0.63514
Epoch 42/200
Epoch 00042: val_accuracy did not improve from 0.63514
Epoch 43/200
Epoch 00043: val_accuracy did not improve from 0.63514
Epoch 44/200
Epoch 00044: val_accuracy did not improve from 0.63514
Epoch 45/200
Epoch 00045: val_accuracy did not improve from 0.63514
Epoch 46/200
Epoch 00046: val_accuracy did not improve from 0.63514
Epoch 00046: early stopping
The value of model_types is set from ['CNN', 'DeepConvLSTM', 'ResNet', 'InceptionTime'] (default) to ['InceptionTime']
The value of metrics is set from ['accuracy'] (default) to ['accuracy']
Set maximum kernel size for InceptionTime models to number of timesteps.
Train on 300 samples, validate on 74 samples
Epoch 1/200
Epoch 00001: val_accuracy improved from -inf to 0.62162, saving model to best_model.h5
Epoch 2/200
Epoch 00

Epoch 00006: val_accuracy did not improve from 0.64865
Epoch 7/200
Epoch 00007: val_accuracy did not improve from 0.64865
Epoch 8/200
Epoch 00008: val_accuracy did not improve from 0.64865
Epoch 9/200
Epoch 00009: val_accuracy did not improve from 0.64865
Epoch 10/200
Epoch 00010: val_accuracy did not improve from 0.64865
Epoch 11/200
Epoch 00011: val_accuracy did not improve from 0.64865
Epoch 12/200
Epoch 00012: val_accuracy did not improve from 0.64865
Epoch 13/200
Epoch 00013: val_accuracy did not improve from 0.64865
Epoch 14/200
Epoch 00014: val_accuracy did not improve from 0.64865
Epoch 15/200
Epoch 00015: val_accuracy improved from 0.64865 to 0.66216, saving model to best_model.h5
Epoch 16/200
Epoch 00016: val_accuracy improved from 0.66216 to 0.67568, saving model to best_model.h5
Epoch 17/200
Epoch 00017: val_accuracy did not improve from 0.67568
Epoch 18/200
Epoch 00018: val_accuracy did not improve from 0.67568
Epoch 19/200
Epoch 00019: val_accuracy improved from 0.67568 t

Epoch 00012: val_accuracy did not improve from 0.56757
Epoch 13/200
Epoch 00013: val_accuracy did not improve from 0.56757
Epoch 14/200
Epoch 00014: val_accuracy did not improve from 0.56757
Epoch 15/200
Epoch 00015: val_accuracy improved from 0.56757 to 0.59459, saving model to best_model.h5
Epoch 16/200
Epoch 00016: val_accuracy did not improve from 0.59459
Epoch 17/200
Epoch 00017: val_accuracy did not improve from 0.59459
Epoch 18/200
Epoch 00018: val_accuracy did not improve from 0.59459
Epoch 19/200
Epoch 00019: val_accuracy did not improve from 0.59459
Epoch 20/200
Epoch 00020: val_accuracy did not improve from 0.59459
Epoch 21/200
Epoch 00021: val_accuracy did not improve from 0.59459
Epoch 22/200
Epoch 00022: val_accuracy did not improve from 0.59459
Epoch 23/200
Epoch 00023: val_accuracy did not improve from 0.59459
Epoch 24/200
Epoch 00024: val_accuracy improved from 0.59459 to 0.60811, saving model to best_model.h5
Epoch 25/200
Epoch 00025: val_accuracy improved from 0.6081

### PyEEG

In [40]:
pyeeg_lr_acc = []
pyeeg_rf_acc = []
pyeeg_svc_acc = []
pyeeg_lgbm_acc = []

skf = StratifiedKFold(n_splits=10)
for train_index, test_index in tqdm(skf.split(X_full, y_full)):
    
    X_train = X_full[train_index]
    y_train = y_full[train_index]
    X_test = X_full[test_index]
    y_test = y_full[test_index]
    
    feat_dict = {}

    for df,name in zip([X_train,X_test],['X_train_pyeeg','X_test_pyeeg']):
        feat_dict[name] = np.array([]).reshape(-1,2100)

        for ind in range(0,df.shape[0]):
            features = np.array([])

            for channel in range(0,df[ind].shape[0]):

                eeg_series = df[ind][channel]

                coefs = wavedec(eeg_series, 'db4', level=4)

                for band in coefs:
                    features = np.hstack([features,get_features(band)])

                fft_power = pyeeg.bin_power(eeg_series, Band = [0.5,4,7,12,30,100], Fs = 256)
                power_spectral_intensity = fft_power[0]
                relative_intensity_ratio = fft_power[1]
#                 fisher_info = pyeeg.fisher_info(eeg_series, Tau = 1, DE = 90)
                pfd = pyeeg.pfd(eeg_series)
                spectral_entropy = pyeeg.spectral_entropy(eeg_series, Band = [0.5,4,7,12,30,100], Fs = 256,
                                                          Power_Ratio = relative_intensity_ratio)

#                 eeg_series = ResampleLinear1D(eeg_series,200)

#                 ap_entropy = pyeeg.ap_entropy(eeg_series, M = 3, R = 0.25*np.std(eeg_series))
                hfd = pyeeg.hfd(eeg_series, Kmax = 10)
                hjorth = pyeeg.hjorth(eeg_series)
                hjorth_mob = hjorth[0]
                hjorth_comp = hjorth[1]
    #             hurst = pyeeg.hurst(eeg_series)


                features = np.hstack([features, power_spectral_intensity, relative_intensity_ratio,
                                      hfd, hjorth_mob, hjorth_comp, pfd, spectral_entropy])


            feat_dict[name] = np.vstack([feat_dict[name],features])

    X_train_pyeeg = feat_dict['X_train_pyeeg'].copy()
    X_test_pyeeg = feat_dict['X_test_pyeeg'].copy()
    
    X_train_pyeeg = np.nan_to_num(X_train_pyeeg)
    X_test_pyeeg = np.nan_to_num(X_test_pyeeg)
    scaler = preprocessing.StandardScaler().fit(X_train_pyeeg)
    X_train_pyeeg = scaler.transform(X_train_pyeeg)
    X_test_pyeeg = scaler.transform(X_test_pyeeg)
    
    #pyeeg + lr    
    hyperparameters = {}
    hyperparameters['pyeeg_lr'] = {}
    hyperparameters['pyeeg_lr']['C'] = 1

    logistic = LogisticRegression(solver = 'lbfgs',
                                  multi_class = 'auto',
                                  max_iter=3000,
                                  C = hyperparameters['pyeeg_lr']['C'])

    logistic.fit(X_train_pyeeg,y_train)
    acc = logistic.score(X_test_pyeeg,y_test)
    pyeeg_lr_acc.append(np.round(acc,2).copy())
    
    #pyeeg + rf    
    hyperparameters = {}
    hyperparameters['pyeeg_rf'] = {}
    hyperparameters['pyeeg_rf']['n_estimators'] = 500
    hyperparameters['pyeeg_rf']['max_depth'] = 3

    rf = RandomForestClassifier(n_estimators = hyperparameters['pyeeg_rf']['n_estimators'],
                                max_depth = hyperparameters['pyeeg_rf']['max_depth'])

    rf.fit(X_train_pyeeg,y_train)
    acc = rf.score(X_test_pyeeg,y_test)
    pyeeg_rf_acc.append(np.round(acc,2).copy())
    
    #pyeeg + svc
    hyperparameters = {}
    hyperparameters['pyeeg_svc'] = {}
    hyperparameters['pyeeg_svc']['C'] = 2
    hyperparameters['pyeeg_svc']['kernel'] = 'rbf'
    hyperparameters['pyeeg_svc']['degree'] = 3
    hyperparameters['pyeeg_svc']['gamma'] = 'scale'

    svc = SVC(C = hyperparameters['pyeeg_svc']['C'],
              kernel = hyperparameters['pyeeg_svc']['kernel'],
              degree = hyperparameters['pyeeg_svc']['degree'],
              gamma = hyperparameters['pyeeg_svc']['gamma'])

    clf = make_pipeline(svc)
    clf.fit(X_train_pyeeg,y_train)
    acc = clf.score(X_test_pyeeg,y_test)
    pyeeg_svc_acc.append(np.round(acc,2).copy())

    
    #pyeeg + lgbm
    hyperparameters = {}
    hyperparameters['pyeeg_lgbm'] = {}
    hyperparameters['pyeeg_lgbm']['num_leaves'] = 254
    hyperparameters['pyeeg_lgbm']['max_depth'] = 2
    hyperparameters['pyeeg_lgbm']['learning_rate'] = 0.1
    hyperparameters['pyeeg_lgbm']['n_estimators'] = 500
    hyperparameters['pyeeg_lgbm']['min_split_gain'] = 0.2
    hyperparameters['pyeeg_lgbm']['min_child_samples'] = 20
    hyperparameters['pyeeg_lgbm']['colsample_bytree'] = 1
    hyperparameters['pyeeg_lgbm']['reg_alpha'] = 0.1
    hyperparameters['pyeeg_lgbm']['reg_lambda'] = 0

    def sparse_float(mat):
        return mat.astype('float')

    trans_sparse_float = FunctionTransformer(sparse_float, validate = False)

    lgbm = LGBMClassifier(n_jobs = -1,
                          num_leaves = hyperparameters['pyeeg_lgbm']['num_leaves'],
                          max_depth = hyperparameters['pyeeg_lgbm']['max_depth'],
                          learning_rate = hyperparameters['pyeeg_lgbm']['learning_rate'],
                          n_estimators = hyperparameters['pyeeg_lgbm']['n_estimators'],
                          min_split_gain = hyperparameters['pyeeg_lgbm']['min_split_gain'],
                          min_child_samples = hyperparameters['pyeeg_lgbm']['min_child_samples'],
                          colsample_by_tree = hyperparameters['pyeeg_lgbm']['colsample_bytree'],
                          reg_alpha = hyperparameters['pyeeg_lgbm']['reg_alpha'],
                          reg_lambda = hyperparameters['pyeeg_lgbm']['reg_lambda'])

    clf = make_pipeline(trans_sparse_float, lgbm)
    clf.fit(X_train_pyeeg,y_train)
    acc = clf.score(X_test_pyeeg,y_test)
    pyeeg_lgbm_acc.append(np.round(acc,2).copy())

scores['pyeeg_lr'] = [np.mean(pyeeg_lr_acc), np.std(pyeeg_lr_acc)]
scores['pyeeg_rf'] = [np.mean(pyeeg_rf_acc), np.std(pyeeg_rf_acc)]
scores['pyeeg_svc'] = [np.mean(pyeeg_svc_acc), np.std(pyeeg_svc_acc)]
scores['pyeeg_lgbm'] = [np.mean(pyeeg_lgbm_acc), np.std(pyeeg_lgbm_acc)]


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

  "boundary effects.").format(level))
  "boundary effects.").format(level))
  "boundary effects.").format(level))
  "boundary effects.").format(level))
  "boundary effects.").format(level))
  "boundary effects.").format(level))
  "boundary effects.").format(level))
  "boundary effects.").format(level))
  "boundary effects.").format(level))
  "boundary effects.").format(level))





In [41]:
#Salva scores
individual_results = pd.DataFrame({'dtw_acc' : dtw_acc,
                                   'wm_lr_acc' : accuracies_lr,
                                   'wm_rf_acc' : accuracies_rf,
                                   'wm_svc_acc' : accuracies_svc,
                                   'wm_lgbm_acc' : accuracies_lgbm,
                                   'cnn_acc' : accuracies_cnn,
                                   'it_acc' : accuracies_it,
                                   'pyeeg_lr_acc' : pyeeg_lr_acc,
                                   'pyeeg_rf_acc' : pyeeg_rf_acc,
                                   'pyeeg_svc_acc' : pyeeg_svc_acc,
                                   'pyeeg_lgbm_acc' : pyeeg_lgbm_acc})

individual_results.to_csv('results/Individual_Scores_{}.csv'.format(dataset), index = False)

results = pd.DataFrame(scores).T.reset_index().rename(columns={0 : 'mean',
                                                               1 : 'std',
                                                               'index' : 'method'}).sort_values('mean', ascending = False)

results.to_csv('results/Scores_{}.csv'.format(dataset), index = False)

In [42]:
results

Unnamed: 0,method,mean,std
9,pyeeg_svc,0.588,0.058275
8,pyeeg_rf,0.581,0.063632
10,pyeeg_lgbm,0.556,0.089688
6,dl_it,0.555459,0.046151
7,pyeeg_lr,0.548,0.058103
0,dtw_knn,0.540883,0.061287
2,wm_rf,0.535,0.088459
1,wm_lr,0.524,0.051029
5,dl_cnn,0.521777,0.061977
4,wm_lgbm,0.521,0.07341
