In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
from scipy.signal import welch, freqz, butter, filtfilt
import seaborn as sns
from mne.decoding import CSP
import mne
mne.set_log_level('WARNING')
from sklearn import metrics 

from sklearn.linear_model import LogisticRegression as LR
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.naive_bayes import GaussianNB as GNB
from sklearn.ensemble import RandomForestClassifier as RF
from sklearn.svm import SVC

from sklearn.model_selection import StratifiedKFold

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
sessions = []
session_dirs = []
sessions_participant_only = []

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
#         print(os.path.join(filename))
        sessions.append(filename[0:4])
        sessions_participant_only.append(filename[0:3])
        session_dirs.append(os.path.join(dirname, filename))

sessions = list(set(sessions))
print(sessions)
sessions_participant_only = list(set(sessions_participant_only))
print(sessions_participant_only)

In [None]:
def cross_val(model, CSP_boo, cutoff):
    all_acc = []
    all_cm = []
    
    # Will calculate results for all participant on all days
    for session in sessions:
        matching_sessions = []
        for session_dir in session_dirs:
            if session in session_dir:
                matching_sessions.append(session_dir)
        
        # Find the matching X & Y pair
        for session_dir in matching_sessions:
            if '_X.' in session_dir:
                X = np.load(session_dir)
                X = X[:,:,cutoff:]  
            else:
                Y = np.load(session_dir)
        
        if CSP_boo:
            csp = CSP(n_components=14, reg=None, log=True, norm_trace=False)

        # Stratified K-fold
        kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
        
        X_acc = []
        Y_preds = []
        Y_orig = []
        
        for train_index, test_index in kf.split(X, Y):
            X_train = X[train_index]
            Y_train = Y[train_index]
            X_test = X[test_index]
            Y_test = Y[test_index]
            
            if CSP_boo:
                X_train = csp.fit_transform(X_train, Y_train)
                X_test = csp.transform(X_test)
            
            fitted_model = model.fit(X_train, Y_train)
            X_acc.append(fitted_model.score(X_test, Y_test))
            [Y_preds.append(i) for i in fitted_model.predict(X_test)]
            [Y_orig.append(i) for i in Y_test]
            
        cm = metrics.confusion_matrix(Y_orig, Y_preds)
            
        all_acc.append(X_acc)
        all_cm.append(cm)
    
    return all_acc, all_cm

In [None]:
SVM_rbf_acc, SVM_rbf_cm = cross_val(SVC(kernel='rbf'), True, 375)

In [None]:
LDA_acc, LDA_cm = cross_val(LDA(shrinkage='auto', solver='eigen'), True, 375)

In [None]:
LR_acc, LR_cm = cross_val(LR(), True, 375)

In [None]:
RF_acc, RF_cm = cross_val(RF(), True, 375)

In [None]:
patients = sessions

plt.figure(figsize=(15,10))
Z = [x for _,x in sorted(zip(patients, [sum(i)/len(i) for i in SVM_rbf_acc]))]
line1, = plt.plot(sorted(patients), Z, label="svm", marker='o')

Z = [x for _,x in sorted(zip(patients, [sum(i)/len(i) for i in LDA_acc]))]
line2, = plt.plot(sorted(patients), Z, label="LDA", marker='o')

Z = [x for _,x in sorted(zip(patients, [sum(i)/len(i) for i in LR_acc]))]
line3, = plt.plot(sorted(patients), Z, label="LR", marker='o')

Z = [x for _,x in sorted(zip(patients, [sum(i)/len(i) for i in RF_acc]))]
line4, = plt.plot(sorted(patients), Z, label="RF", marker='o')

# Place a legend to the right of this smaller subplot.
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)
plt.title('All results')
plt.xlabel('Patient ID')
plt.ylabel('Accuracy')

In [None]:
def cross_val_participants(model, CSP_boo, cutoff):
    all_acc = []
    all_cm = []

    # Will calculate results for all participant on all days
    for session in sessions_participant_only:
        matching_sessions = []
        for session_dir in session_dirs:
            if session[0:3] in session_dir:
                matching_sessions.append(session_dir)

        # Find the matching X & Y pair for both days
        X_only = []
        Y_only = []
        for session_dir in matching_sessions:
            if '_X.' in session_dir:
                X = np.load(session_dir)
                X = X[:,:,cutoff:] # Motor imagery takes time to initiate, I am assuming this EEG is sampling at around 1000Hz so we 
                X_only.append(X)
            else:
                Y = np.load(session_dir)
                Y_only.append(Y)
        X = np.vstack((X_only[0], X_only[1]))
        Y = np.hstack((Y_only[0], Y_only[1]))
        print(X.shape, Y.shape)

        if CSP_boo:
            csp = CSP(n_components=4, reg=None, log=True, norm_trace=False)

        # Stratified K-fold
        kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

        X_acc = []
        Y_preds = []
        Y_orig = []

        for train_index, test_index in kf.split(X, Y):
            X_train = X[train_index]
            Y_train = Y[train_index]
            X_test = X[test_index]
            Y_test = Y[test_index]

            if CSP_boo:
                X_train = csp.fit_transform(X_train, Y_train)
                X_test = csp.transform(X_test)

            fitted_model = model.fit(X_train, Y_train)
            X_acc.append(fitted_model.score(X_test, Y_test))
            [Y_preds.append(i) for i in fitted_model.predict(X_test)]
            [Y_orig.append(i) for i in Y_test]

        cm = metrics.confusion_matrix(Y_orig, Y_preds)

        all_acc.append(X_acc)
        all_cm.append(cm)

    return all_acc, all_cm

In [None]:
SVM_rbf_participant_acc, SVM_rbf_participant_cm = cross_val_participants(SVC(kernel='rbf'), True, 375)

In [None]:
LDA_participant_acc, LDA_participant_cm = cross_val_participants(LDA(shrinkage='auto', solver='eigen'), True, 375)

In [None]:
LR_participant_acc, LR_participant_cm = cross_val_participants(LR(), True, 375)

In [None]:
RF_participant_acc, RF_participant_cm = cross_val_participants(RF(), True, 375)

In [None]:
patients = sessions_participant_only

plt.figure(figsize=(15,10))
Z = [x for _,x in sorted(zip(patients, [sum(i)/len(i) for i in SVM_rbf_participant_acc]))]
line1, = plt.plot(sorted(patients), Z, label="svm", marker='o')

Z = [x for _,x in sorted(zip(patients, [sum(i)/len(i) for i in LDA_participant_acc]))]
line2, = plt.plot(sorted(patients), Z, label="LDA", marker='o')

Z = [x for _,x in sorted(zip(patients, [sum(i)/len(i) for i in LR_participant_acc]))]
line3, = plt.plot(sorted(patients), Z, label="LR", marker='o')

Z = [x for _,x in sorted(zip(patients, [sum(i)/len(i) for i in RF_participant_acc]))]
line4, = plt.plot(sorted(patients), Z, label="RF", marker='o')

# Place a legend to the right of this smaller subplot.
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)
plt.title('All results')
plt.xlabel('Patient ID')
plt.ylabel('Accuracy')