In [55]:
# get data
from sktime_neuro.datasets.bci.BCIDownloader import fetch_datasets_from_contest
from sktime_neuro.datasets.bci.BCI_utils import process_dataset_mat, process_dataset_gdf
# ML stuff
from mne.decoding import CSP
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
# series to series transformer
# Replace the detrender so we can use sktime 0.10+
from sktime_neuro.transformations.multivariate_detrender import ColumnEnsembleDetrender, DumbDetrender

# panel to panel transformers
from sktime_neuro.transformations.panel.baselinecorrection import BaselineCorrectionTransformer as Baseline
from sktime_neuro.transformations.panel.timeslicing import TimeSlicingTransformer
from sktime_neuro.transformations.series.filterforseries import FilterforSeries
from sktime_neuro.transformations.series_to_panel.eeg_epoching import epoch
# Serialise
import pickle

In [56]:
# Set environment variables
from dotenv import *
import os
os.environ["BCI_USERNAME"] = "YOUR EMAIL FOR BCI HERE"
os.environ["BCI_PASSWORD"] = "YOUR PASSWORD FOR BCI HERE"

# if we have a .env then override them.
%load_ext dotenv
%dotenv -o


The dotenv extension is already loaded. To reload it, use:
  %reload_ext dotenv


In [57]:
def preprocessing_procedure(fs, data_train, annotation_train, labels, duration):
    # In general, we want to detrend the data, however due to changes in sktime 0.10
    # the detrender no longer works for multivariate data.
    #detrended_data = data_train
    dt = ColumnEnsembleDetrender(data_train)
    detrended_data = dt.fit_predict()
    filtered_data = FilterforSeries(l_freq=8, h_freq=15, sfreq=int(fs)).fit_transform(
        detrended_data
    )
    data_train, labels_train = epoch(
        Z=filtered_data,
        annotation=annotation_train,
        labels=labels,
        interval=[-0.5, duration],
        sfreq=fs,
    )
    print("shape of epoched data: " + str(data_train.shape))

    bl_corrected = Baseline(upper=0.5, fs=int(fs)).fit_transform(data_train)
    final_training = TimeSlicingTransformer(start=0.5, fs=int(fs)).fit_transform(
        bl_corrected
    )
    return final_training, labels_train

In [58]:
def training_procedure(final_training, labels_train):
    # Assemble a classifier
    csp = CSP(transform_into="average_power")
    lda = LinearDiscriminantAnalysis()
    param_grid = {
        "CSP__n_components": [1, 2, 3, 4, 5, 7, 10],
    }
    from sklearn.ensemble import RandomForestClassifier
    rf = RandomForestClassifier()

    # Use scikit-learn Pipeline with cross_val_score function
    pipe = Pipeline(steps=[("CSP", csp), ("LDA", lda), ("RF", rf)])
    pipe.fit(final_training, labels_train)
    return pipe
    #search = GridSearchCV(pipe, param_grid, n_jobs=-1)
    #search.fit(final_training, labels_train)
    #print("Best parameter (CV score=%0.3f):" % search.best_score_)
    #print(search.best_params_)

In [59]:
def load_training_data():
    if os.path.isfile("train.pickle") is False:
        raw_mat = fetch_datasets_from_contest("BCICIV_1_mat", ["BCICIV_calib_ds1d.mat"])[0]
        fs, data_train, annotation_train = process_dataset_mat(raw_mat)
        final_training, labels = preprocessing_procedure(
            fs, data_train, annotation_train, labels=[-1, 1], duration=2.5
        )
        with open("train.pickle", "wb") as outfile:
            pickle.dump(final_training, outfile)
        with open("lables.pickle", "wb") as outfile:
            pickle.dump(labels, outfile)
    else:
        with open("train.pickle", "rb") as infile:
            final_training = pickle.load(infile)
        with open("lables.pickle", "rb") as infile:
            labels = pickle.load(infile)
    return final_training, labels

In [60]:
def load_test_data():
    if os.path.isfile("test.pickle") is False:
        raw_mat = fetch_datasets_from_contest("BCICIV_1_mat", ["BCICIV_eval_ds1d.mat"])[0]
        fs, data_test, annotation_train = process_dataset_mat(raw_mat)
        final_test, test_labels = preprocessing_procedure(
            fs, data_test, annotation_train, labels=[-1, 1], duration=2.5
        )
        with open("test.pickle", "wb") as outfile:
            pickle.dump(final_test, outfile)
        with open("testlables.pickle", "wb") as outfile:
            pickle.dump(test_labels, outfile)
    else:
        with open("test.pickle", "rb") as infile:
            final_test = pickle.load(infile)
        with open("testlables.pickle", "rb") as infile:
            test_labels = pickle.load(infile)
    return final_test, test_labels

# BCI dataset

In [63]:
final_training, labels = load_training_data()
test_data, test_labels = load_test_data()

pipe = training_procedure(final_training, labels)
pipe.predict(test_data)
pipe.score(test_data, test_labels)

Computing rank from data with rank=None
    Using tolerance 4.5e-13 (2.2e-16 eps * 59 dim * 34  max singular value)
    Estimated rank (mag): 59
    MAG: rank 59 computed from 59 data channels with 0 projectors
Reducing data rank from 59 -> 59
Estimating covariance using EMPIRICAL
Done.
Computing rank from data with rank=None
    Using tolerance 4.1e-13 (2.2e-16 eps * 59 dim * 31  max singular value)
    Estimated rank (mag): 59
    MAG: rank 59 computed from 59 data channels with 0 projectors
Reducing data rank from 59 -> 59
Estimating covariance using EMPIRICAL
Done.


KeyboardInterrupt: 