In [8]:
import mne
from mne.io import concatenate_raws, read_raw_edf
import glob


from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV, ShuffleSplit, cross_val_score
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from mne.decoding import CSP, SPoC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from mne.decoding import (
    SlidingEstimator,
    GeneralizingEstimator,
    Scaler,
    cross_val_multiscore,
    LinearModel,
    get_coef,
    Vectorizer,
    CSP,
)
import numpy as np
from mne.preprocessing import ICA

from lightgbm import LGBMClassifier
from xgboost.sklearn import XGBClassifier
from sklearn.tree import DecisionTreeClassifier

In [None]:
# Declare the path of utils function
from utils import preprocess_data

In [2]:
files = glob.glob('../files/S001/*.edf')
files.sort()

In [3]:
len(files)

14

In [4]:
'''
https://github.com/mne-tools/mne-python/blob/main/mne/datasets/eegbci/eegbci.py#L110
=========  ===================================
run        task
=========  ===================================
1          Baseline, eyes open
2          Baseline, eyes closed
3, 7, 11   Motor execution: left vs right hand
4, 8, 12   Motor imagery: left vs right hand
5, 9, 13   Motor execution: hands vs feet
6, 10, 14  Motor imagery: hands vs feet
=========  ===================================
'''
raws = []
f = [5,9,13]
# ,6,10,14]
for i in f:
    print(i)
    current_file = files[i-1]
    r = read_raw_edf(current_file, preload=True, stim_channel='auto')
    events, _ = mne.events_from_annotations(r)
    if i in [5, 9, 13]:
        new_labels_events = {1:'rest', 2:'action_hand', 3:'action_feet'} # action
    else:
        new_labels_events = {1:'rest', 2:'imagine_hand', 3:'imagine_feet'} # imagine
    new_annot = mne.annotations_from_events(events=events, event_desc=new_labels_events, sfreq=r.info['sfreq'], orig_time=r.info['meas_date'])
    r.set_annotations(new_annot)
    raws.append(r)
    
raw_obj = concatenate_raws(raws)

5
Extracting EDF parameters from /Users/owalid/42/post_intership/total-perspective-vortex/files/S001/S001R11.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 19999  =      0.000 ...   124.994 secs...
Used Annotations descriptions: ['T0', 'T1', 'T2']
9
Extracting EDF parameters from /Users/owalid/42/post_intership/total-perspective-vortex/files/S001/S001R06.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 19999  =      0.000 ...   124.994 secs...
Used Annotations descriptions: ['T0', 'T1', 'T2']
13
Extracting EDF parameters from /Users/owalid/42/post_intership/total-perspective-vortex/files/S001/S001R02.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 9759  =      0.000 ...    60.994 secs...
Used Annotations descriptions: ['T0']


In [None]:
raw_obj, event, event_dict, picks, epochs = preprocess_data(raw_obj)

In [9]:
X = epochs.get_data()
y = epochs.events[:, -1] - 1
shuffle_split = ShuffleSplit(n_splits=7, test_size=0.2, random_state=42)
print(X.shape)
print(y.shape)

models = [
    ('Gradient Boosting', GradientBoostingClassifier(), {'model__n_estimators': [50, 100]}),
    ('Linear discriminant analysis', LinearDiscriminantAnalysis(), {'model__solver': ['svd', 'lsqr', 'eigen'], 'model__tol': [0.0001, 0.00001]}),   
    ('SVM', SVC(), {'model__C': [0.5, 1, 3], 'model__kernel': ['linear']}),
    ('KNN', KNeighborsClassifier(), {'model__n_neighbors': [4,5,6]}),
    ('Random Forest', RandomForestClassifier(), {'model__n_estimators': [50,100]}),
    ('MLP', MLPClassifier(), {'model__hidden_layer_sizes': [(100, 50), (200, 100)]}),
    ('Decision Tree', DecisionTreeClassifier(), {'model__max_depth': [50, 100]}),
    ('XGB', XGBClassifier(), {'model__n_estimators': [200, 300], 'model__learning_rate': [0.05, 0.001]})
]

pipelines = []
csp = CSP()
for name, model, param_grid in models:
    pipeline = Pipeline([
        ('csp', csp),
        ('model', model)
    ])
    param_grid['csp__n_components'] = [5, 6, 7, 8, 9, 10, 15, 20, 30, 40]
    pipelines.append((name, pipeline, param_grid))

(30, 64, 721)
(30,)


In [None]:
%%capture
results = []
for name, pipeline, param_grid in pipelines:
    grid_search = GridSearchCV(pipeline, param_grid=param_grid, cv=shuffle_split, n_jobs=-1)
    grid_search.fit(X, y)
    results.append((name, grid_search))

In [12]:
res_grid = []
for name, grid_search in results:
    print(f"Model: {name}")
    print(f"Best Parameters: {grid_search.best_params_}")
    print(f"Best Cross-Validated Accuracy: {grid_search.best_score_:.2f}")
    print("\n")

Model: Gradient Boosting
Best Parameters: {'csp__n_components': 7, 'model__n_estimators': 100}
Best Cross-Validated Accuracy: 0.74


Model: Linear discriminant analysis
Best Parameters: {'csp__n_components': 5, 'model__solver': 'svd', 'model__tol': 0.0001}
Best Cross-Validated Accuracy: 0.86


Model: SVM
Best Parameters: {'csp__n_components': 20, 'model__C': 3, 'model__kernel': 'linear'}
Best Cross-Validated Accuracy: 0.81


Model: KNN
Best Parameters: {'csp__n_components': 5, 'model__n_neighbors': 4}
Best Cross-Validated Accuracy: 0.79


Model: Random Forest
Best Parameters: {'csp__n_components': 8, 'model__n_estimators': 100}
Best Cross-Validated Accuracy: 0.76


Model: MLP
Best Parameters: {'csp__n_components': 5, 'model__hidden_layer_sizes': (200, 100)}
Best Cross-Validated Accuracy: 0.76


Model: Decision Tree
Best Parameters: {'csp__n_components': 6, 'model__max_depth': 100}
Best Cross-Validated Accuracy: 0.74


Model: XGB
Best Parameters: {'csp__n_components': 6, 'model__learnin