In [1]:
from src.data_preparation.data_preparation import read_eeg_file
from src.algorithms.csp.CSP import CSP
from scipy import signal
from src.algorithms.fbcsp.MIBIFFeatureSelection import MIBIFFeatureSelection
from sklearn.model_selection import KFold
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import accuracy_score
import numpy as np

Define the constants to the data segmentation

In [8]:
TIME_LENGTH = 750
TIME_WINDOW = 500
EPOCH_SIZE = None

Define the path of the data

In [3]:
DATA_FOLDER = "data/fbcsp/bci-iv-dataset-2b/with-feedback-to-user"

Define the object to store the accuracies

In [4]:
K_FOLD = 10
subjects = range(1, 10)
accuracies = {
    "GNB": np.zeros((len(subjects), K_FOLD)),
    "SVM": np.zeros((len(subjects), K_FOLD)),
    "LDA": np.zeros((len(subjects), K_FOLD))
}

Define the objects with the bands used by the Filter Bank

In [5]:
band_length = 4
min_freq = 4
max_freq = 40
bands = [(x, x+band_length) for x in range(min_freq, max_freq, band_length)]
quantity_bands = len(bands)

del band_length
del min_freq
del max_freq

Define the function to apply the filter bank to the data

In [6]:
def filter_bank(x):
    data = np.zeros((quantity_bands, *x.shape))
    for n_trial in range(x.shape[0]):
        trial = x[n_trial, :, :]
        filter_bank = np.zeros((quantity_bands, *trial.shape))

        for (i, (low_freq, high_freq)) in enumerate(bands):
            # Create a 5 order Chebyshev Type 2 filter to the specific band (low_freq - high_freq)
            b, a = signal.cheby2(5, 48, [low_freq, high_freq], btype="bandpass", fs=250)

            filter_bank[i, :, :] = signal.filtfilt(b, a, trial, axis=0)
        data[:, n_trial, :, :] = filter_bank

    return data

Read the EEG data

In [9]:
print("Loading data ...")
data_by_subject = []

for subject in subjects:
    left_data_file = f"{DATA_FOLDER}/left-hand-subject-{subject}.csv"
    right_data_file = f"{DATA_FOLDER}/right-hand-subject-{subject}.csv"
    data = read_eeg_file(left_data_file, right_data_file, TIME_LENGTH, TIME_WINDOW, EPOCH_SIZE)

    data.X = np.concatenate((data.left_data, data.right_data))
    data_by_subject.append(data)

del subject
del left_data_file
del right_data_file
del data

print("Data loaded")

Loading data ...
Data loaded


Extract the features and classify applying 10 cross-validation

In [10]:
for (i, data) in enumerate(data_by_subject):
    print("Subject: ", i+1)

    cv = KFold(n_splits=K_FOLD, shuffle=True)
    for (k, (train_index, test_index)) in enumerate(cv.split(data.X)):
        trials = len(data.left_data)

        train_left_index = [index for index in train_index if index < trials]
        train_right_index = [index - trials for index in train_index if index >= trials]
        X_left_train, X_right_train = data.left_data[train_left_index], data.right_data[train_right_index]

        test_left_index = [index for index in test_index if index < trials]
        test_right_index = [index - trials for index in test_index if index >= trials]
        X_left_test, X_right_test = data.left_data[test_left_index], data.right_data[test_right_index]

        y_train, y_test = data.labels[train_index], data.labels[test_index]

        # Feature extraction
        N_CSP_COMPONENTS = 2
        csp_by_band = [CSP(average_trial_covariance=False, n_components=N_CSP_COMPONENTS)
                       for _ in bands]

        left_bands_training = filter_bank(X_left_train)
        right_bands_training = filter_bank(X_right_train)
        left_bands_test = filter_bank(X_left_test)
        right_bands_test = filter_bank(X_right_test)

        features_train = None
        features_test = None
        for n_band in range(quantity_bands):
            left_band_training = left_bands_training[n_band]
            right_band_training = right_bands_training[n_band]
            left_band_test = left_bands_test[n_band]
            right_band_test = right_bands_test[n_band]

            csp = csp_by_band[n_band]
            csp.fit(left_band_training, right_band_training)
            x_train = np.concatenate((left_band_training, right_band_training))
            x_test = np.concatenate((left_band_test, right_band_test))

            if n_band == 0:
                features_train = csp.compute_features(x_train)
                features_test = csp.compute_features(x_test)
            else:
                features_train = np.concatenate((features_train, csp.compute_features(x_train)), axis=1)
                features_test = np.concatenate((features_test, csp.compute_features(x_test)), axis=1)

        # Feature Selection
        selected_features = MIBIFFeatureSelection(features_train, features_test, y_train, N_CSP_COMPONENTS, 4, scale=True)

        selected_training_features = selected_features.training_features
        selected_test_features = selected_features.test_features

        # GNB classifier
        gnb = GaussianNB()
        gnb.fit(selected_training_features, y_train)
        gnb_predictions = gnb.predict(selected_test_features)
        gnb_accuracy = accuracy_score(y_test, gnb_predictions)
        accuracies["GNB"][i][k] = gnb_accuracy

        # SVM classifier
        svm = SVC(C=.8, kernel="rbf")
        svm.fit(selected_training_features, y_train)
        svm_predictions = svm.predict(selected_test_features)
        svm_accuracy = accuracy_score(y_test, svm_predictions)
        accuracies["SVM"][i][k] = svm_accuracy

        # LDA classifier
        lda = LinearDiscriminantAnalysis()
        lda.fit(selected_training_features, y_train)
        lda_predictions = lda.predict(selected_test_features)
        lda_accuracy = accuracy_score(y_test, lda_predictions)
        accuracies["LDA"][i][k] = lda_accuracy

Subject:  1
Subject:  2
Subject:  3
Subject:  4
Subject:  5
Subject:  6
Subject:  7
Subject:  8
Subject:  9


Evaluation

In [11]:
for classifier in accuracies:
    print(classifier)
    for subject, cv_accuracies in enumerate(accuracies[classifier]):
        acc_mean = np.mean(cv_accuracies)
        acc_std = np.std(cv_accuracies)
        print(f"\tSubject {subject+1} average accuracy: {acc_mean:.4f} +/- {acc_std:.4f}")
    average_acc_mean = np.mean(accuracies[classifier])
    average_acc_std = np.std(accuracies[classifier])
    print(f"\tAverage accuracy: {average_acc_mean:.4f} +/- {average_acc_std:.4f}")


GNB
	Subject 1 average accuracy: 0.6621 +/- 0.0892
	Subject 2 average accuracy: 0.5135 +/- 0.0779
	Subject 3 average accuracy: 0.5599 +/- 0.0620
	Subject 4 average accuracy: 0.9283 +/- 0.0196
	Subject 5 average accuracy: 0.8414 +/- 0.0701
	Subject 6 average accuracy: 0.7476 +/- 0.0628
	Subject 7 average accuracy: 0.7865 +/- 0.0573
	Subject 8 average accuracy: 0.7918 +/- 0.0694
	Subject 9 average accuracy: 0.7868 +/- 0.0432
	Average accuracy: 0.7353 +/- 0.1412
SVM
	Subject 1 average accuracy: 0.6361 +/- 0.0794
	Subject 2 average accuracy: 0.5083 +/- 0.0975
	Subject 3 average accuracy: 0.5426 +/- 0.0729
	Subject 4 average accuracy: 0.9109 +/- 0.0357
	Subject 5 average accuracy: 0.8150 +/- 0.0602
	Subject 6 average accuracy: 0.7196 +/- 0.0744
	Subject 7 average accuracy: 0.7838 +/- 0.0483
	Subject 8 average accuracy: 0.7887 +/- 0.0705
	Subject 9 average accuracy: 0.7632 +/- 0.0333
	Average accuracy: 0.7187 +/- 0.1412
LDA
	Subject 1 average accuracy: 0.6677 +/- 0.0840
	Subject 2 average ac