## KNN - K-nearest neighbour

In [5]:
# Imports
import os, sys
import numpy as np

from sklearn.metrics import accuracy_score
from sklearn.model_selection import KFold
from sklearn.neighbors import KNeighborsClassifier
from numpy import mean

# to enable local imports
module_path = os.path.abspath('../code')
print(module_path)
if module_path not in sys.path:
    sys.path.append(module_path)

from machine_learning_load_data import loadOnlineEEGdata

/home/nvidia/masterthesis/thesis_eeg/code


In [6]:
# Load some online EEG Data
eegData, freqData, entropyData = loadOnlineEEGdata(dirPath='../../EEG_Data/eeg_data_online', splitData=True)
eegX_train, eegy_train, eegX_test, eegy_test = eegData
freqX_train, freqy_train, freqX_test, freqy_test = freqData
X_train_entropy, y_train_entropy, X_test_entropy, y_test_entropy = entropyData

# reshape
freqX_train = freqX_train.reshape(freqX_train.shape[0], freqX_train.shape[2])
freqX_test = freqX_test.reshape(freqX_test.shape[0], freqX_test.shape[2])

X_train_entropy = X_train_entropy.reshape(X_train_entropy.shape[0], X_train_entropy.shape[2])
X_test_entropy = X_test_entropy.reshape(X_test_entropy.shape[0], X_test_entropy.shape[2])

Loading Online EEG Data from ../../EEG_Data/eeg_data_online ...
EEG Data Shape:
(5024, 512, 40) (5024,) (2154, 512, 40) (2154,)
Freq Data Shape:
(1008, 1, 1200) (1008,) (432, 1, 1200) (432,)
Entropy Data Shape:
(5024, 1, 200) (5024,) (2154, 1, 200) (2154,)


In [7]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import RandomizedSearchCV

from sklearn.model_selection import GridSearchCV
from sklearn.metrics import f1_score
from sklearn.metrics import make_scorer
f1_scorer = make_scorer(f1_score)

model = KNeighborsClassifier()


# Create a parameter grid - here you specifiy which combinations you want to test
param_grid = [
        {'n_neighbors' : [3, 5, 10, 25],
         'weights' : ['uniform', 'distance'],
    }
]

# Create randomized search 5-fold cross validation and 100 iterations
grid_search = GridSearchCV(model, param_grid, cv=5,
                            scoring=f1_scorer,
                            return_train_score=True,
                            n_jobs=-1)

# fit it with the data
result = grid_search.fit(X_train_entropy, y_train_entropy)

In [8]:
print("Best Params: {}".format(grid_search.best_params_))
print("Best Estimator: {}".format(grid_search.best_estimator_))

cvres = grid_search.cv_results_
for mean_score, params in zip(cvres["mean_test_score"], cvres["params"]):
    print(mean_score, params)

Best Params: {'weights': 'distance', 'n_neighbors': 3}
Best Estimator: KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=3, p=2,
                     weights='distance')
0.6250248516556443 {'weights': 'uniform', 'n_neighbors': 3}
0.6252677412109849 {'weights': 'distance', 'n_neighbors': 3}
0.6221004533872995 {'weights': 'uniform', 'n_neighbors': 5}
0.6223457191384825 {'weights': 'distance', 'n_neighbors': 5}
0.593377217917785 {'weights': 'uniform', 'n_neighbors': 10}
0.6116101710879578 {'weights': 'distance', 'n_neighbors': 10}
0.5785519773291795 {'weights': 'uniform', 'n_neighbors': 25}
0.5829484079211216 {'weights': 'distance', 'n_neighbors': 25}


In [9]:
grid_search.best_estimator_

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=3, p=2,
                     weights='distance')