# Notebook to test Classififers

In [7]:
# Imports
import os, sys
import numpy as np

# Import metrics
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_predict, cross_val_score
from sklearn.metrics import f1_score
from sklearn.metrics import make_scorer
f1_scorer = make_scorer(f1_score) 

# to enable local imports
module_path = os.path.abspath('../code')
print(module_path)
if module_path not in sys.path:
    sys.path.append(module_path)

from machine_learning_load_data import loadOnlineEEGdata

/home/nvidia/masterthesis/thesis_eeg/code


In [2]:
# Load some online EEG Data
eegData, freqData, entropyData = loadOnlineEEGdata(dirPath='../../EEG_Data/eeg_data_online', splitData=True)
eegX_train, eegy_train, eegX_test, eegy_test = eegData
freqX_train, freqy_train, freqX_test, freqy_test = freqData
X_train_entropy, y_train_entropy, X_test_entropy, y_test_entropy = entropyData

# reshape
freqX_train = freqX_train.reshape(freqX_train.shape[0], freqX_train.shape[2])
freqX_test = freqX_test.reshape(freqX_test.shape[0], freqX_test.shape[2])

X_train_entropy = X_train_entropy.reshape(X_train_entropy.shape[0], X_train_entropy.shape[2])
X_test_entropy = X_test_entropy.reshape(X_test_entropy.shape[0], X_test_entropy.shape[2])

Loading Online EEG Data from ../../EEG_Data/eeg_data_online ...
EEG Data Shape:
(5024, 512, 40) (5024,) (2154, 512, 40) (2154,)
Freq Data Shape:
(1008, 1, 1200) (1008,) (432, 1, 1200) (432,)
Entropy Data Shape:
(5024, 1, 200) (5024,) (2154, 1, 200) (2154,)


In [16]:
def testModel(model, paramGrid, X, y, n_jobs=-1, scoring=f1_scorer, kFoldTimes=8):
    ''' Test the Model with the '''
    print("Testing Classifier: {}".format(model.__class__.__name__))
    print("Scoring: {}".format(scoring))
    print("K-fold times: {} --- n-jobs: {}".format(kFoldTimes, n_jobs))
    print("\n")
    
    # create a grid search
    grid_search = GridSearchCV(model, param_grid, cv=kFoldTimes, scoring=scoring, return_train_score=True, n_jobs=n_jobs)

    # fit it with the data
    result = grid_search.fit(X_train_entropy, y_train_entropy)

    print("Best Params: {}".format(grid_search.best_params_))
    print("Best Estimator: {}".format(grid_search.best_estimator_))

    cvres = grid_search.cv_results_
    for mean_score, params in zip(cvres["mean_test_score"], cvres["params"]):
        print(mean_score, params)
        
    return grid_search.best_estimator_

## Decision Tree

In [17]:
from sklearn.tree import DecisionTreeClassifier

model = DecisionTreeClassifier()
param_grid = [
        {'criterion' : ['gini', 'entropy'],
         'splitter' : ['best', 'random'],
    }
]

bestEstimator = testModel(model=model, paramGrid=param_grid, X=X_train_entropy, y=y_train_entropy)

Testing Classifier: DecisionTreeClassifier
Scoring: make_scorer(f1_score)
K-fold times: 8 - n-jobs: -1


Best Params: {'splitter': 'random', 'criterion': 'gini'}
Best Estimator: DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=None, splitter='random')
0.5705124777656854 {'splitter': 'best', 'criterion': 'gini'}
0.6246980931136836 {'splitter': 'random', 'criterion': 'gini'}
0.5923053403801248 {'splitter': 'best', 'criterion': 'entropy'}
0.5886940216322119 {'splitter': 'random', 'criterion': 'entropy'}


# Suport Vector Machine (SVM)

In [None]:
from sklearn import svm

# Create a model to test
model = svm.SVC()

# Create a parameter grid - here you specifiy which combinations you want to test
param_grid = [
    {'kernel': ['poly', 'rbf', 'sigmoid'],
     'degree': [2, 3, 5, 10], # only for poly kernel
     'gamma': ['scale', 'auto']
    }
]
    
bestEstimator = testModel(model=model, paramGrid=param_grid, X=X_train_entropy, y=y_train_entropy)

Testing Classifier: SVC
Scoring: make_scorer(f1_score)
K-fold times: 8 - n-jobs: -1




# Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier

# Create a model to test
model = RandomForestClassifier()

# Create a parameter grid - here you specifiy which combinations you want to test
param_grid = [
    {'n_estimators': [500, 1000, 2000],
     'min_samples_split' : [2, 4, 8],
     'criterion' : ['gini', 'entropy'],
     'max_features' : ['auto', 'log2'],
    }
]

bestEstimator = testModel(model=model, paramGrid=param_grid, X=X_train_entropy, y=y_train_entropy)

# Long-short term memory (LSTM)

# K-nearest neighbour (KNN)

In [None]:
model = KNeighborsClassifier()


# Create a parameter grid - here you specifiy which combinations you want to test
param_grid = [
        {'n_neighbors' : [3, 5, 10, 25],
         'weights' : ['uniform', 'distance'],
    }
]

bestEstimator = testModel(model=model, paramGrid=param_grid, X=X_train_entropy, y=y_train_entropy)

# Gradient Boost

In [None]:
from sklearn.ensemble import GradientBoostingClassifier

model = GradientBoostingClassifier()

param_grid = [
        {'learning_rate' : [0.001, 0.01, 0.1],
         'loss' : ['deviance', 'exponential'],
         'n_estimators' : [1000], # 1000 seems good 
    }
]

bestEstimator = testModel(model=model, paramGrid=param_grid, X=X_train_entropy, y=y_train_entropy)

# Ada Boost

In [None]:
from sklearn.ensemble import AdaBoostClassifier

# Create a model to test
model = AdaBoostClassifier()

# Create a parameter grid - here you specifiy which combinations you want to test
param_grid = [
    {'n_estimators': [500, 1000],
     'learning_rate' : [1, 1.2,]
    }
]

bestEstimator = testModel(model=model, paramGrid=param_grid, X=X_train_entropy, y=y_train_entropy)