In [1]:
from src.data_preparation.data_preparation import read_eeg_file
from src.data_preparation.data_preparation import EEG
from scipy import signal
from sklearn.model_selection import StratifiedKFold
from scipy import linalg
import pyriemann.utils.mean as rie_mean
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import accuracy_score, zero_one_loss
import numpy as np



Define the constants with parameters to apply the algorithm

In [2]:
FS = 100
TIME_LENGTH = 2 * FS
TIME_WINDOW = 2 * FS
EPOCH_SIZE = None
DATA_FOLDER = "data/csp/bci-iii-dataset-iv-a/subject-independent"
CSP_COMPONENTS = 6

Define the objects to store the evaluation data 

In [3]:
K_FOLD = 10
subjects = range(1, 6)
accuracies = {
    "GNB": np.zeros((len(subjects), K_FOLD)),
    "SVM": np.zeros((len(subjects), K_FOLD)),
    "LDA": np.zeros((len(subjects), K_FOLD))
}

final_accuracies = {
    "GNB": np.zeros((len(subjects), K_FOLD)),
    "SVM": np.zeros((len(subjects), K_FOLD)),
    "LDA": np.zeros((len(subjects), K_FOLD))
}

misclassification_rate = {
    "GNB": np.zeros((len(subjects), K_FOLD)),
    "SVM": np.zeros((len(subjects), K_FOLD)),
    "LDA": np.zeros((len(subjects), K_FOLD))
}

final_misclassification_rate = {
    "GNB": np.zeros((len(subjects), K_FOLD)),
    "SVM": np.zeros((len(subjects), K_FOLD)),
    "LDA": np.zeros((len(subjects), K_FOLD))
}

Define the bandpass filter to be used in the pre-processing step

In [4]:
sos = signal.butter(5, [7, 30], analog=False, btype="band", output="sos", fs=FS)

Load the EEG data

In [5]:
data = []
for subject in subjects:
    left_data_file = f"{DATA_FOLDER}/left-hand-subject-{subject}.csv"
    right_data_file = f"{DATA_FOLDER}/right-hand-subject-{subject}.csv"
    data.append(read_eeg_file(left_data_file, right_data_file, TIME_LENGTH, TIME_WINDOW, EPOCH_SIZE))

Define the function to generate the common spatial filter's based on the test data 

In [6]:
def compute_spatial_filters(train_data):
    cov_shape = (train_data.n_channels, train_data.n_channels)
    
    # Estimate the covariance matrix of every trial
    cov = np.zeros((train_data.n_left_trials, *cov_shape))
    for n_trial in range(train_data.n_left_trials):
        trial_filtered = signal.sosfilt(sos, train_data.left_data[n_trial], axis=0)
        cov[n_trial,:,:] = np.cov(np.transpose(trial_filtered))

    # calculate average of covariance matrix
    cov_1 = rie_mean.mean_covariance(cov, metric="riemann")
    
    # Estimate the covariance matrix of every trial
    cov = np.zeros((train_data.n_right_trials, *cov_shape))
    for n_trial in range(train_data.n_right_trials):
        trial_filtered = signal.sosfilt(sos, train_data.right_data[n_trial], axis=0)
        cov[n_trial,:,:] = np.cov(np.transpose(trial_filtered))
        
    # calculate average of covariance matrix
    cov_2 = rie_mean.mean_covariance(cov, metric="riemann")
    
    # Solve the generalized eigenvalue problem
    n_pairs = CSP_COMPONENTS//2
    w, vr = linalg.eig(cov_1, cov_2, right=True)
    w = np.abs(w)
    sorted_indexes = np.argsort(w)
    chosen_indexes = np.zeros(2*n_pairs).astype(int)
    chosen_indexes[0:n_pairs] = sorted_indexes[0:n_pairs]
    chosen_indexes[n_pairs:2*n_pairs] = sorted_indexes[-n_pairs:]

    return vr[:, chosen_indexes]

Define the function to apply the spatial filter and extract the features 

In [7]:
def extract_features(X, W):
    trials = len(X)
    F = np.zeros((trials, CSP_COMPONENTS))
    for n_trial in range(trials):
        trial = X[n_trial]
        Z = np.dot(np.transpose(W), np.transpose(trial))
        Z = signal.sosfilt(sos, Z, axis=1)
        F[n_trial] = np.log(np.divide(np.var(Z, axis=1), np.sum(np.var(Z, axis=1))))
        
    return F

Iterate on the subjects applying the algorithm, 
validating the results using the technique 10x10-fold cross-validation

In [8]:
for subject in subjects:
    print(f"Subject {subject} ...")
    subject_index = subject - 1
    subject_data = data[subject_index]

    for fold in range(K_FOLD):
        cv = StratifiedKFold(n_splits=K_FOLD, shuffle=True)
        for (k, (train_index, test_index)) in enumerate(cv.split(subject_data.X, subject_data.labels)):
            X_train, X_test = subject_data.X[train_index], subject_data.X[test_index]
            y_train, y_test = subject_data.labels[train_index], subject_data.labels[test_index]
            
            train_data = EEG(X_train[y_train == 0], X_train[y_train == 1])
            test_data = EEG(X_test[y_test == 0], X_test[y_test == 1], False)
            
            W = compute_spatial_filters(train_data)
    
            features = dict({
                "train": extract_features(train_data.X, W),
                "test": extract_features(test_data.X, W)
            })
            
            # GNB classifier
            gnb = GaussianNB()
            gnb.fit(features["train"], y_train)
            gnb_predictions = gnb.predict(features["test"])
            accuracies["GNB"][subject_index][k] = accuracy_score(y_test, gnb_predictions)
            misclassification_rate["GNB"][subject_index][k] = zero_one_loss(y_test, gnb_predictions)
    
            # SVM classifier
            svm = SVC(C=.8, kernel="rbf", gamma="scale")
            svm.fit(features["train"], y_train)
            svm_predictions = svm.predict(features["test"])
            accuracies["SVM"][subject_index][k] = accuracy_score(y_test, svm_predictions)
            misclassification_rate["SVM"][subject_index][k] = zero_one_loss(y_test, svm_predictions)
    
            # LDA classifier
            lda = LinearDiscriminantAnalysis()
            lda.fit(features["train"], y_train)
            lda_predictions = lda.predict(features["test"])
            accuracies["LDA"][subject_index][k] = accuracy_score(y_test, lda_predictions)
            misclassification_rate["LDA"][subject_index][k] = zero_one_loss(y_test, lda_predictions)
            
        # Average the accuracies of one single fold cv
        for classifier in accuracies:
            final_accuracies[classifier][subject_index][fold] = np.mean(accuracies[classifier][subject_index])
            final_misclassification_rate[classifier][subject_index][fold] = np.mean(misclassification_rate[classifier][subject_index])

print("\nAccuracy")
for classifier in final_accuracies:
    print(classifier)
    for subject, cv_accuracies in enumerate(final_accuracies[classifier]):
        acc_mean = np.mean(cv_accuracies)
        acc_std = np.std(cv_accuracies)
        print(f"\tSubject {subject+1} average accuracy: {acc_mean:.4f} +/- {acc_std:.4f}")
    average_acc_mean = np.mean(final_accuracies[classifier])
    average_acc_std = np.std(final_accuracies[classifier])
    print(f"\tAverage accuracy: {average_acc_mean:.4f} +/- {average_acc_std:.4f}")

print("\nMisclassification")
for classifier in final_misclassification_rate:
    print(classifier)
    for subject, cv_misclassification_rate in enumerate(final_misclassification_rate[classifier]):
        misclassification_rate_mean = np.mean(cv_misclassification_rate)
        misclassification_rate_std = np.std(cv_misclassification_rate)
        print(f"\tSubject {subject+1} misclassification rate: {misclassification_rate_mean:.4f} +/- {misclassification_rate_std:.4f}")
    average_misclassification_rate_mean = np.mean(misclassification_rate[classifier])
    average_misclassification_rate_std = np.std(misclassification_rate[classifier])
    print(f"\tMisclassification rate: {average_misclassification_rate_mean:.4f} +/- {average_misclassification_rate_std:.4f}")
    

Subject 1 ...
Subject 2 ...
Subject 3 ...
Subject 4 ...
Subject 5 ...

Accuracy
GNB
	Subject 1 average accuracy: 0.7493 +/- 0.0126
	Subject 2 average accuracy: 0.9614 +/- 0.0038
	Subject 3 average accuracy: 0.6575 +/- 0.0111
	Subject 4 average accuracy: 0.7771 +/- 0.0178
	Subject 5 average accuracy: 0.9361 +/- 0.0067
	Average accuracy: 0.8163 +/- 0.1160
SVM
	Subject 1 average accuracy: 0.7700 +/- 0.0092
	Subject 2 average accuracy: 0.9636 +/- 0.0027
	Subject 3 average accuracy: 0.6757 +/- 0.0153
	Subject 4 average accuracy: 0.8018 +/- 0.0113
	Subject 5 average accuracy: 0.9321 +/- 0.0075
	Average accuracy: 0.8286 +/- 0.1067
LDA
	Subject 1 average accuracy: 0.7921 +/- 0.0130
	Subject 2 average accuracy: 0.9621 +/- 0.0024
	Subject 3 average accuracy: 0.6807 +/- 0.0134
	Subject 4 average accuracy: 0.7996 +/- 0.0092
	Subject 5 average accuracy: 0.9357 +/- 0.0051
	Average accuracy: 0.8341 +/- 0.1036

Misclassification
GNB
	Subject 1 misclassification rate: 0.2507 +/- 0.0126
	Subject 2 miscl