# Support Vector Machine

In [1]:
# Imports
import os, sys
import numpy as np
import matplotlib.pyplot as plt

from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.model_selection import KFold

# to enable local imports
module_path = os.path.abspath('../code')
print(module_path)
if module_path not in sys.path:
    sys.path.append(module_path)

from machine_learning_data_generation import loadOnlineEEGdata

D:\Masterthesis\thesis_eeg\code


Using TensorFlow backend.


### K Fold Validierung - Owm implementation
You don't have to use this because sklearn offers the same as a function already `sklearn.model_selection.cross_val_score`

In [51]:
# load the data
# load the dataset
#eegData, freqData = loadOnlineEEGdata(splitData=False)
#eegX, eegy = eegData
#freqX, freqy = freqData

def testSvmFold(X, y, fold = 10, shuffle=True):
    
    scores = []
    kfold = KFold(fold, shuffle=shuffle)
    for train_index, test_index in kfold.split(X):
        #print("TRAIN:", train_index, "TEST:", test_index)
        # define train/test X/y
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        
        # define model
        C = 1.0  # SVM regularization parameter
        #odels = svm.SVC(kernel='linear', C=C)
        #odel = svm.LinearSVC(C=C, max_iter=10000)
        model = svm.SVC(kernel='rbf', gamma=0.7, C=C)
        #model = svm.SVC(kernel='poly', degree=3, gamma='auto', C=C)

        # fit model on train set
        model.fit(X_train, y_train)

        # forecast test set
        yhat = model.predict(X_test)

        # evaluate predictions
        score = accuracy_score(y_test, yhat)
        # store

        scores.append(score)
        print('>%.3f' % score)
        
    # calculate mean score across each run
    print('Final Score: %.3f' % (np.mean(scores)))
    
# Test with Frequency Data
#testSvmFold(X=freqX.reshape(1440,1200), y=freqy, fold=10, shuffle=True)

# Test with EEG Data
#testSvmFold(X=eegX[:, :, 6], y=eegy, fold=2, shuffle=True)

In [4]:
# Load some online EEG Data
eegData, freqData, entropyData = loadOnlineEEGdata(splitData=True)
eegX_train, eegy_train, eegX_test, eegy_test = eegData
freqX_train, freqy_train, freqX_test, freqy_test = freqData
X_train_entropy, y_train_enoptry, X_test_entropy, y_test_entropy = entropyData

# reshape
freqX_train = freqX_train.reshape(freqX_train.shape[0], freqX_train.shape[2])
freqX_test = freqX_test.reshape(freqX_test.shape[0], freqX_test.shape[2])

X_train_entropy = X_train_entropy.reshape(X_train_entropy.shape[0], X_train_entropy.shape[2])
X_test_entropy = X_test_entropy.reshape(X_test_entropy.shape[0], X_test_entropy.shape[2])

Loading Online EEG Data from D:/Masterthesis/EEG_Data/eeg_data_online ...
EEG Data Shape:
(5024, 512, 40) (5024,) (2154, 512, 40) (2154,)
Freq Data Shape:
(1008, 1, 1200) (1008,) (432, 1, 1200) (432,)
Entropy Data Shape:
(5024, 1, 200) (5024,) (2154, 1, 200) (2154,)


# Create & Evaluate Model
- Here we create a very simpel Support Vector Machine Model
- Then we use `cross_val_score` to get some Accuracies (uses kfold)
- Then we use `cross_val_predict` to get some predictions (uses kfold)
- With the predictions we can measure how good the model is
    - We create a confusions matrix
    - We calculate the Precision
    - We calculate the F1 Score

In [5]:
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix
from sklearn.model_selection import cross_val_predict, cross_val_score

# Create a model
model = svm.SVC(kernel='rbf', gamma=0.3, C=1.0)

# generate cross val score
kfoldTimes = 10
print("Calculating cross val scores...")
print("Cross val scores (Accuracies): {}".format(cross_val_score(model, X_train_entropy, y_train_enoptry, cv=kfoldTimes, scoring="accuracy")))

# make predictions with the model
print("\nCaclulating cross val predictions...")
y_train_pred = cross_val_predict(model, freqX_train, freqy_train, cv=kfoldTimes)


svm_confusionMatrix = confusion_matrix(freqy_train, y_train_pred)
print("""\nConfusion Matrix\n------------------------
True Negative:   {tn} - False Positive: {fp}
False Negatives: {fn} - True positive:  {tp}""".format(tn=svm_confusionMatrix[0][0],
                                                       fp=svm_confusionMatrix[0][1],
                                                       fn=svm_confusionMatrix[1][0],
                                                       tp=svm_confusionMatrix[1][1]))

print("----------------------")
print("Precision: {} ".format(precision_score(freqy_train, y_train_pred)))
print("Recall:    {}".format(recall_score(freqy_train, y_train_pred)))
print("F1 Score:  {}".format(f1_score(freqy_train, y_train_pred)))

Calculating cross val scores...
Cross val scores (Accuracies): [0.87077535 0.64811133 0.58846918 0.69781312 0.60956175 0.85258964
 0.6374502  0.74900398 0.43227092 0.68924303]

Caclulating cross val predictions...

Confusion Matrix
------------------------
True Negative:   380 - False Positive: 100
False Negatives: 137 - True positive:  391
----------------------
Precision: 0.7963340122199593 
Recall:    0.740530303030303
F1 Score:  0.7674190382728164


# Find best parameters for the SVM
Now we try to find the best parameters for the Support Vector Machine

In [55]:
from sklearn.model_selection import GridSearchCV

# Create a model to test
svm_model = svm.SVC()

# Create a parameter grid - here you specifiy which combinations you want to test
param_grid = [
    {'kernel': ['rbf'], 'gamma': [0.1, 0.3, 0.5, 0.75, 1], 'C': [0.2, 0.5, 0.75, 1, 5]},
    {'kernel': ['poly'], 'degree': [2, 3, 5, 10], 'C': [0.5, 1, 5], 'coef0' : [1, 10, 100]},
]

kFoldTimes = 8

# create a grid search
grid_search = GridSearchCV(svm_model, param_grid, cv=kFoldTimes,
                            scoring='accuracy',
                            return_train_score=True)

# fit it with the data
result = grid_search.fit(freqX_train, freqy_train)