# Notebook to test Classififers

In [5]:
# Imports
import os, sys
import numpy as np

# Import metrics
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_predict, cross_val_score
from sklearn.metrics import f1_score
from sklearn.metrics import make_scorer
f1_scorer = make_scorer(f1_score) 

# to enable local imports
module_path = os.path.abspath('../code')
print(module_path)
if module_path not in sys.path:
    sys.path.append(module_path)

from machine_learning_load_data import loadOnlineEEGdata

/home/nvidia/masterthesis/thesis_eeg/code


In [6]:
from utils import loadTargetLabelsTxt

# Load some online EEG Data
eegData, freqData, entropyData = loadOnlineEEGdata(dirPath='../../EEG_Data/eeg_data_online', splitData=True)
eegData_exp, freqData_exp, entropyData_exp = loadOnlineEEGdata(dirPath='../../EEG_Data/muse_data', splitData=True)

##################
# online eeg data
##################
X_train_eeg, y_train_eeg, X_test_eeg, y_test_eeg = eegData
X_train_freq, y_train_freq, X_test_freq, y_test_freq = freqData
X_train_entropy, y_train_entropy, X_test_entropy, y_test_entropy = entropyData

# reshape
X_train_freq = X_train_freq.reshape(X_train_freq.shape[0], X_train_freq.shape[2])
X_test_freq = X_test_freq.reshape(X_test_freq.shape[0], X_test_freq.shape[2])

X_train_entropy = X_train_entropy.reshape(X_train_entropy.shape[0], X_train_entropy.shape[2])
X_test_entropy = X_test_entropy.reshape(X_test_entropy.shape[0], X_test_entropy.shape[2])

targetLabelsDict = loadTargetLabelsTxt(filePath='../../EEG_Data/eeg_data_online/target_labels.txt')
targetNames = ['Fatigue ({})'.format(targetLabelsDict['FATIGUE']),
               'Normal ({})'.format(targetLabelsDict['NORMAL'])]


##################
# experiment data
##################
X_train_eeg_exp, y_train_eeg_exp, X_test_eeg_exp, y_test_eeg_exp = eegData_exp
X_train_freq_exp, y_train_freq_exp, X_test_freq_exp, y_test_freq_exp = freqData_exp
X_train_entropy_exp, y_train_entropy_exp, X_test_entropy_exp, y_test_entropy_exp = entropyData_exp


# reshape
X_train_freq_exp = X_train_freq_exp.reshape(X_train_freq_exp.shape[0], X_train_freq_exp.shape[2])
X_test_freq_exp = X_test_freq_exp.reshape(X_test_freq_exp.shape[0], X_test_freq_exp.shape[2])

X_train_entropy_exp = X_train_entropy_exp.reshape(X_train_entropy_exp.shape[0], X_train_entropy_exp.shape[2])
X_test_entropy_exp = X_test_entropy_exp.reshape(X_test_entropy_exp.shape[0], X_test_entropy_exp.shape[2])

targetLabelsDict_exp = loadTargetLabelsTxt(filePath='../../EEG_Data/muse_data/target_labels.txt')
targetNames_exp = ['AWAKE ({})'.format(targetLabelsDict_exp['AWAKE']),
               'FATIGUE ({})'.format(targetLabelsDict_exp['FATIGUE'])]

Loading Online EEG Data from ../../EEG_Data/eeg_data_online ...
EEG Data Shape:
(5024, 512, 40) (5024,) (2154, 512, 40) (2154,)
Freq Data Shape:
(1008, 1, 1200) (1008,) (432, 1, 1200) (432,)
Entropy Data Shape:
(5024, 1, 200) (5024,) (2154, 1, 200) (2154,)
Loading Online EEG Data from ../../EEG_Data/muse_data ...
EEG Data Shape:
(5393, 512, 4) (5393,) (2312, 512, 4) (2312,)
Freq Data Shape:
(1084, 1, 120) (1084,) (465, 1, 120) (465,)
Entropy Data Shape:
(5393, 1, 20) (5393,) (2312, 1, 20) (2312,)


In [12]:
import time

def testModel(model, paramGrid, X, y, n_jobs=-1, scoring=f1_scorer, kFoldTimes=8):
    ''' Test the Model with the '''
    start_time = time.time()
    print("Testing Classifier: {}".format(model.__class__.__name__))
    print("Scoring: {}".format(scoring))
    print("K-fold times: {} --- n-jobs: {}".format(kFoldTimes, n_jobs))
    print("\n")
    
    # create a grid search
    grid_search = GridSearchCV(model, param_grid, cv=kFoldTimes, scoring=scoring, return_train_score=True, n_jobs=n_jobs)

    # fit it with the data
    result = grid_search.fit(X_train_entropy, y_train_entropy)

    print("Best Params: {}".format(grid_search.best_params_))
    print("Best Estimator: {}".format(grid_search.best_estimator_))

    cvres = grid_search.cv_results_
    for mean_score, params in zip(cvres["mean_test_score"], cvres["params"]):
        print(mean_score, params)
    
    print('Minutes taken: ',(time.time() - start_time)/60)
    return grid_search.best_estimator_

def evaluateWithAllDatasets(model, paramGrid, title):
    
    print("\n############# {} ############".format(title.upper()))
    
    print("##################################################")
    print("#### Model for Entropy Data - Online EEG Data ####")
    bestEstimator_entropy_eeg_online= testModel(model=model, paramGrid=param_grid, X=X_train_entropy, y=y_train_entropy)

    print("\n\n##################################################")
    print("#### Model for Frequency Data - Online EEG Data ####")
    bestEstimator_freq_eeg_online = testModel(model=model, paramGrid=param_grid, X=X_train_freq, y=y_train_freq)

    print("\n\n##################################################")
    print("#### Model for Entropy Data - Experiment EEG Data ####")
    bestEstimator_entropy_exp = testModel(model=model, paramGrid=param_grid, X=X_train_entropy_exp, y=y_train_entropy_exp)

    print("\n\n##################################################")
    print("#### Model for Frequency Data - Experiment EEG Data ####")
    bestEstimator_freq_exp = testModel(model=model, paramGrid=param_grid, X=X_train_freq_exp, y=y_train_freq_exp)
    
    return (bestEstimator_entropy_eeg_online, bestEstimator_freq_eeg_online, bestEstimator_entropy_exp, bestEstimator_freq_exp)

## Decision Tree

In [16]:
best_estimators_dt[0]

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=None, splitter='random')

In [13]:
from sklearn.tree import DecisionTreeClassifier

model = DecisionTreeClassifier()
param_grid = [
        {'criterion' : ['gini', 'entropy'],
         'splitter' : ['best', 'random'],
    }
]

best_estimators_dt = evaluateWithAllDatasets(model, param_grid, title="Decision Tree")



############# DECISION TREE ############
##################################################
#### Model for Entropy Data - Online EEG Data ####
Testing Classifier: DecisionTreeClassifier
Scoring: make_scorer(f1_score)
K-fold times: 8 --- n-jobs: -1


Best Params: {'splitter': 'random', 'criterion': 'gini'}
Best Estimator: DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=None, splitter='random')
0.5621669628299104 {'splitter': 'best', 'criterion': 'gini'}
0.6384263465646531 {'splitter': 'random', 'criterion': 'gini'}
0.5923385666400515 {'splitter': 'best', 'criterion': 'entropy'}
0.5684670257110602 {'splitter': 'random', 'criterion': 'ent

# Suport Vector Machine (SVM)

In [17]:
from sklearn import svm

# Create a model to test
model = svm.SVC()

# Create a parameter grid - here you specifiy which combinations you want to test
param_grid = [
    {'kernel': ['poly', 'rbf', 'sigmoid'],
     'degree': [2, 3, 5, 10], # only for poly kernel
     'gamma': ['scale', 'auto']
    }
]
    
best_estimators_smv = evaluateWithAllDatasets(model, param_grid, title="Support Vector Machine")


############# SUPPORT VECTOR MACHINE ############
##################################################
#### Model for Entropy Data - Online EEG Data ####
Testing Classifier: SVC
Scoring: make_scorer(f1_score)
K-fold times: 8 --- n-jobs: -1


Best Params: {'kernel': 'poly', 'gamma': 'scale', 'degree': 5}
Best Estimator: SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=5, gamma='scale', kernel='poly',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)
0.6806950784521192 {'kernel': 'poly', 'gamma': 'scale', 'degree': 2}
0.6355423926744519 {'kernel': 'rbf', 'gamma': 'scale', 'degree': 2}
0.37033703183201777 {'kernel': 'sigmoid', 'gamma': 'scale', 'degree': 2}
0.5378505845740114 {'kernel': 'poly', 'gamma': 'auto', 'degree': 2}
0.5387078800790277 {'kernel': 'rbf', 'gamma': 'auto', 'degree': 2}
0.5569480021449209 {'kernel': 'sigmoid', 'gamma': 'auto', 'degree': 2}
0.73078674023

# Random Forest

In [18]:
from sklearn.ensemble import RandomForestClassifier

# Create a model to test
model = RandomForestClassifier()

# Create a parameter grid - here you specifiy which combinations you want to test
param_grid = [
    {'n_estimators': [500, 1000, 2000], # 1000 seems good
     #'min_samples_split' : [2, 4, 8],
     'criterion' : ['gini', 'entropy'],
     'max_features' : ['auto', 'log2'],
    }
]

best_estimators_rf = evaluateWithAllDatasets(model, param_grid, title="Random Forest")


############# RANDOM FOREST ############
##################################################
#### Model for Entropy Data - Online EEG Data ####
Testing Classifier: RandomForestClassifier
Scoring: make_scorer(f1_score)
K-fold times: 8 --- n-jobs: -1


Best Params: {'max_features': 'log2', 'n_estimators': 500, 'criterion': 'entropy'}
Best Estimator: RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='entropy', max_depth=None, max_features='log2',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=500,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)
0.6224073803407129 {'max_features': 'auto', 'n_estimators': 500, 'criterion': 'gini'}
0.620521692476

# Long-short term memory (LSTM)

# K-nearest neighbour (KNN)

In [19]:
from sklearn.neighbors import KNeighborsClassifier

model = KNeighborsClassifier()

# Create a parameter grid - here you specifiy which combinations you want to test
param_grid = [
        {'n_neighbors' : [3, 5, 10, 25],
         'weights' : ['uniform', 'distance'],
    }
]

best_estimators_knn = evaluateWithAllDatasets(model, param_grid, title="k-nearest neighbour")


############# K-NEAREST NEIGHBOUR ############
##################################################
#### Model for Entropy Data - Online EEG Data ####
Testing Classifier: KNeighborsClassifier
Scoring: make_scorer(f1_score)
K-fold times: 8 --- n-jobs: -1


Best Params: {'n_neighbors': 5, 'weights': 'distance'}
Best Estimator: KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                     weights='distance')
0.6537853779922721 {'n_neighbors': 3, 'weights': 'uniform'}
0.654005283767055 {'n_neighbors': 3, 'weights': 'distance'}
0.6548285821862858 {'n_neighbors': 5, 'weights': 'uniform'}
0.6549411385207313 {'n_neighbors': 5, 'weights': 'distance'}
0.618348262959203 {'n_neighbors': 10, 'weights': 'uniform'}
0.6399826071668968 {'n_neighbors': 10, 'weights': 'distance'}
0.602868384543845 {'n_neighbors': 25, 'weights': 'uniform'}
0.6061941398707827 {'n_neighbors': 25, 'weights': 'distance'}
M

# Gradient Boost

In [20]:
from sklearn.ensemble import GradientBoostingClassifier

model = GradientBoostingClassifier()

param_grid = [
        {'learning_rate' : [0.001, 0.01, 0.1],
         'loss' : ['deviance', 'exponential'],
         'n_estimators' : [1000], # 1000 seems good 
    }
]

best_estimators_gBoost = evaluateWithAllDatasets(model, param_grid, title="Gradient Boost")


############# GRADIENT BOOST ############
##################################################
#### Model for Entropy Data - Online EEG Data ####
Testing Classifier: GradientBoostingClassifier
Scoring: make_scorer(f1_score)
K-fold times: 8 --- n-jobs: -1


Best Params: {'learning_rate': 0.1, 'n_estimators': 1000, 'loss': 'deviance'}
Best Estimator: GradientBoostingClassifier(ccp_alpha=0.0, criterion='friedman_mse', init=None,
                           learning_rate=0.1, loss='deviance', max_depth=3,
                           max_features=None, max_leaf_nodes=None,
                           min_impurity_decrease=0.0, min_impurity_split=None,
                           min_samples_leaf=1, min_samples_split=2,
                           min_weight_fraction_leaf=0.0, n_estimators=1000,
                           n_iter_no_change=None, presort='deprecated',
                           random_state=None, subsample=1.0, tol=0.0001,
                           validation_fraction=0.1, verbose=

# Ada Boost

In [21]:
from sklearn.ensemble import AdaBoostClassifier

# Create a model to test
model = AdaBoostClassifier()

# Create a parameter grid - here you specifiy which combinations you want to test
param_grid = [
    {'n_estimators': [500, 1000, 2000],
     'learning_rate' : [1, 1.2]
    }
]

best_estimators_adaBoost = evaluateWithAllDatasets(model, param_grid, title="Ada Boost")


############# ADA BOOST ############
##################################################
#### Model for Entropy Data - Online EEG Data ####
Testing Classifier: AdaBoostClassifier
Scoring: make_scorer(f1_score)
K-fold times: 8 --- n-jobs: -1


Best Params: {'learning_rate': 1.2, 'n_estimators': 2000}
Best Estimator: AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None, learning_rate=1.2,
                   n_estimators=2000, random_state=None)
0.6814987904093085 {'learning_rate': 1, 'n_estimators': 500}
0.679406349296515 {'learning_rate': 1, 'n_estimators': 1000}
0.6877386966934965 {'learning_rate': 1, 'n_estimators': 2000}
0.6891893546976744 {'learning_rate': 1.2, 'n_estimators': 500}
0.6910805880717243 {'learning_rate': 1.2, 'n_estimators': 1000}
0.7022789704709536 {'learning_rate': 1.2, 'n_estimators': 2000}
Minutes taken:  66.31994532744089


##################################################
#### Model for Frequency Data - Online EEG Data ####
Testing Classifier: AdaBoostCla

## Multilayer Perceptron

In [22]:
from sklearn.neural_network import MLPClassifier

# Create a model to test
model = MLPClassifier()

# Create a parameter grid - here you specifiy which combinations you want to test
param_grid = [
    {'hidden_layer_sizes': [(2, 100), (2, 50), (2, 200), (4, 50), (4, 100)],
     'activation' : ['relu', 'tanh', 'logistic'],
     'solver' : ['adam'],
     'alpha' : [0.0001],
     'learning_rate' : ['constant'],
     'learning_rate_init' : [0.001],
     'shuffle' : [True]
    }
]

best_estimators_mlp = evaluateWithAllDatasets(model, param_grid, title="Multi Layer Perceptron")


############# MULTI LAYER PERCEPTRON ############
##################################################
#### Model for Entropy Data - Online EEG Data ####
Testing Classifier: MLPClassifier
Scoring: make_scorer(f1_score)
K-fold times: 8 --- n-jobs: -1


Best Params: {'shuffle': True, 'hidden_layer_sizes': (4, 100), 'solver': 'adam', 'activation': 'tanh', 'learning_rate': 'constant', 'learning_rate_init': 0.001, 'alpha': 0.0001}
Best Estimator: MLPClassifier(activation='tanh', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(4, 100), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=200,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=None, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)
0.6638642426195617 {'shuffl



Best Params: {'shuffle': True, 'hidden_layer_sizes': (2, 100), 'solver': 'adam', 'activation': 'relu', 'learning_rate': 'constant', 'learning_rate_init': 0.001, 'alpha': 0.0001}
Best Estimator: MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(2, 100), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=200,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=None, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)
0.7134435639504876 {'shuffle': True, 'hidden_layer_sizes': (2, 100), 'solver': 'adam', 'activation': 'relu', 'learning_rate': 'constant', 'learning_rate_init': 0.001, 'alpha': 0.0001}
0.6792583792032255 {'shuffle': True, 'hidden_layer_sizes': (2, 50), 'solver': 'adam', 'activat

## Nerual Network

## CNN