In [89]:
from src.data_preparation.data_preparation import read_eeg_file, get_channels_indexes
from scipy import signal
from scipy import linalg
from sklearn.model_selection import StratifiedKFold
from src.algorithms.fbcsp.MIBIFFeatureSelection import MIBIFFeatureSelection
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import accuracy_score
import pyriemann.utils.mean as rie_mean
import numpy as np

Define some parameters

In [105]:
FS = 100
TIME_LENGTH = 2 * FS
TIME_WINDOW = 2 * FS
CSP_COMPONENTS = 8
DATA_FOLDER = "data/fbcsp/bci-iii-dataset-iv-a"
K_FOLD = 10
METRIC_COVARIANCE_ESTIMATION = "euclid"

Make the manual channel selection

In [91]:
channels_names = ["FC2", "FC4", "FC6", "CFC2", "CFC4", "CFC6", "C2", "C4", "C6", 
                  "CCP2", "CCP4", "CCP6", "CP2", "CP4", "CP6", "FC5", "FC3", "FC1", 
                  "CFC5", "CFC3", "CFC1", "C5", "C3", "C1", "CCP5", "CCP3", "CCP1", 
                  "CP5", "CP3", "CP1"]
channels_indexes = get_channels_indexes(f"{DATA_FOLDER}/channels_positions.txt", channels_names)

Define the objects with the bands used by the Filter Bank

In [92]:
band_length = 6
min_freq = 4
max_freq = 40
bands = [(x, x+band_length) for x in range(min_freq, max_freq, band_length)]
quantity_bands = len(bands)

filter_bank = []
for (low_freq, high_freq) in bands:
    filter_bank.append(signal.cheby2(10, 50, [low_freq, high_freq], analog=False, btype="band", output="sos", fs=FS))

Define the function to generate the common spatial filter's based on the test data 

In [93]:
def compute_spatial_filters(left_data, right_data, sos):
    n_channels = left_data.shape[2]
    cov_shape = (n_channels, n_channels)
    
    # Estimate the covariance matrix of every trial
    n_left_trials = left_data.shape[0]
    cov = np.zeros((n_left_trials, *cov_shape))
    for n_trial in range(n_left_trials):
        trial = signal.sosfilt(sos, left_data[n_trial], axis=0)
        cov[n_trial] = np.cov(np.transpose(trial))

    # calculate average of covariance matrix
    cov_1 = rie_mean.mean_covariance(cov, metric=METRIC_COVARIANCE_ESTIMATION)
    
    # Estimate the covariance matrix of every trial
    n_right_trials = right_data.shape[0]
    cov = np.zeros((n_right_trials, *cov_shape))
    for n_trial in range(n_right_trials):
        trial = signal.sosfilt(sos, right_data[n_trial], axis=0)
        cov[n_trial] = np.cov(np.transpose(trial))

    # calculate average of covariance matrix
    cov_2 = rie_mean.mean_covariance(cov, metric=METRIC_COVARIANCE_ESTIMATION)
    
    # Solve the generalized eigenvalue problem
    n_pairs = CSP_COMPONENTS//2
    w, vr = linalg.eig(cov_1, cov_2, right=True)
    w = np.abs(w)
    sorted_indexes = np.argsort(w)
    chosen_indexes = np.zeros(2*n_pairs).astype(int)
    chosen_indexes[0:n_pairs] = sorted_indexes[0:n_pairs]
    chosen_indexes[n_pairs:2*n_pairs] = sorted_indexes[-n_pairs:]

    return vr[:, chosen_indexes]

Define the function to apply the spatial filter and extract the features

In [94]:
def extract_features(X, W, sos):
    trials = len(X)
    F = np.zeros((trials, CSP_COMPONENTS))
    for n_trial in range(trials):
        x = signal.sosfilt(sos, X[n_trial], axis=0)
        z = np.dot(np.transpose(W), np.transpose(x))
        F[n_trial] = np.log(np.divide(np.var(z, axis=1), np.sum(np.var(z, axis=1))))
        
    return F

Define the function to process a fold with the following steps:

- Split the data into train and test
- Generate the spatial filters to each band
- Extract the features to train data
- Extract the features to test data
- Make feature selection on the features
- Classify the features using GNB, LDA and SVM algorithms

In [102]:
def process_fold(X, Y, train_index, test_index):
    train = {
        "X": X[train_index],
        "Y": Y[train_index]
    }
    train["left"] = train["X"][train["Y"] == 0]
    train["right"] = train["X"][train["Y"] == 1]
    train["trials"] = train["X"].shape[0]
    
    test = {
        "X": X[test_index],
        "Y": Y[test_index]
    }
    test["left"] = test["X"][test["Y"] == 0]
    test["right"] = test["X"][test["Y"] == 1]
    test["trials"] = test["X"].shape[0]

    # Feature extraction
    spatial_filters = [compute_spatial_filters(train["left"], train["right"], sos)
                       for sos in filter_bank]

    train["F"] = np.zeros((quantity_bands, train["trials"], CSP_COMPONENTS))
    for n_band in range(quantity_bands):
        sos = filter_bank[n_band]
        W = spatial_filters[n_band]
        train["F"][n_band] = extract_features(train["X"], W, sos)
    train["F"] = np.concatenate(train["F"], axis=1)
    
    test["F"] = np.zeros((quantity_bands, test["trials"], CSP_COMPONENTS))
    for n_band in range(quantity_bands):
        sos = filter_bank[n_band]
        W = spatial_filters[n_band]
        test["F"][n_band] = extract_features(test["X"], W, sos)
    test["F"] = np.concatenate(test["F"], axis=1)
    
    # Feature Selection
    selected_features = MIBIFFeatureSelection(train["F"], test["F"], train["Y"], CSP_COMPONENTS, 4, scale=False)
    train["F"] = selected_features.training_features
    test["F"] = selected_features.test_features

    # GNB classifier
    gnb = GaussianNB(priors=[.5, .5])
    gnb.fit(train["F"], train["Y"])
    gnb_predictions = gnb.predict(test["F"])
    gnb_accuracy = accuracy_score(test["Y"], gnb_predictions)

    # SVM classifier
    svm = SVC(C=.8, gamma="scale", kernel="rbf")
    svm.fit(train["F"], train["Y"])
    svm_predictions = svm.predict(test["F"])
    svm_accuracy = accuracy_score(test["Y"], svm_predictions)

    # LDA classifier
    lda = LinearDiscriminantAnalysis()
    lda.fit(train["F"], train["Y"])
    lda_predictions = lda.predict(test["F"])
    lda_accuracy = accuracy_score(test["Y"], lda_predictions)
    
    return gnb_accuracy, svm_accuracy, lda_accuracy
    

Extract the features and classify applying 10 cross-validation

In [106]:
subjects = range(1, 6)
accuracies = {
    "GNB": np.zeros(len(subjects)),
    "SVM": np.zeros(len(subjects)),
    "LDA": np.zeros(len(subjects))
}

for subject in subjects:
    print("Subject: ", subject)

    left_data_file = f"{DATA_FOLDER}/left-hand-subject-{subject}.csv"
    right_data_file = f"{DATA_FOLDER}/right-hand-subject-{subject}.csv"
    data = read_eeg_file(left_data_file, right_data_file, TIME_LENGTH, TIME_WINDOW, channels_indexes=channels_indexes)
    
    subject_index = subject - 1
    for fold in range(K_FOLD):
        k_fold_accuracies = {
            "GNB": np.zeros((len(subjects), K_FOLD)),
            "SVM": np.zeros((len(subjects), K_FOLD)),
            "LDA": np.zeros((len(subjects), K_FOLD))
        }
        
        cv = StratifiedKFold(n_splits=K_FOLD, shuffle=True)
        for (k, (train_index, test_index)) in enumerate(cv.split(data.X, data.labels)):
            gnb_accuracy, svm_accuracy, lda_accuracy = process_fold(data.X, data.labels, train_index, test_index)
            
            k_fold_accuracies["GNB"][subject_index][k] = gnb_accuracy
            k_fold_accuracies["SVM"][subject_index][k] = svm_accuracy
            k_fold_accuracies["LDA"][subject_index][k] = lda_accuracy
        
        accuracies["GNB"][subject_index] = np.mean(k_fold_accuracies["GNB"][subject_index])
        accuracies["SVM"][subject_index] = np.mean(k_fold_accuracies["SVM"][subject_index])
        accuracies["LDA"][subject_index] = np.mean(k_fold_accuracies["LDA"][subject_index])
        

Subject:  1
Subject:  2
Subject:  3
Subject:  4
Subject:  5


Print the accuracies obtained

In [113]:
for classifier in accuracies:
    print(classifier)
    for subject, accuracy in enumerate(accuracies[classifier]):
        print(f"\tSubject {subject+1} average accuracy: {accuracy*100:.4f}")
    average_acc_mean = np.mean(accuracies[classifier]) * 100
    average_acc_std = np.std(accuracies[classifier]) * 100
    print(f"\tAverage accuracy: {average_acc_mean:.4f} +/- {average_acc_std:.4f}")

GNB
	Subject 1 average accuracy: 86.1111
	Subject 2 average accuracy: 97.9902
	Subject 3 average accuracy: 71.9608
	Subject 4 average accuracy: 96.2582
	Subject 5 average accuracy: 84.3627
	Average accuracy: 87.3366 +/- 9.3793
SVM
	Subject 1 average accuracy: 85.4412
	Subject 2 average accuracy: 98.8562
	Subject 3 average accuracy: 71.0784
	Subject 4 average accuracy: 96.5686
	Subject 5 average accuracy: 83.7582
	Average accuracy: 87.1405 +/- 9.9846
LDA
	Subject 1 average accuracy: 84.6405
	Subject 2 average accuracy: 97.9902
	Subject 3 average accuracy: 69.3301
	Subject 4 average accuracy: 95.3922
	Subject 5 average accuracy: 84.5915
	Average accuracy: 86.3889 +/- 10.1286
