In [1]:
from src.data_preparation.data_preparation import read_eeg_file, get_channels_indexes
from scipy import signal
from scipy import linalg
from scipy.integrate import simps
from scipy import stats
from sklearn.model_selection import StratifiedKFold
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import accuracy_score
import pywt
import pyriemann.utils.mean as rie_mean
import numpy as np



Declaration some parameters

In [2]:
FS = 100
TIME_LENGTH = 3 * FS
TIME_WINDOW = 3 * FS
DATA_FOLDER = "data/specific-band-csp-features/bci-iii-dataset-iv-a"
CSP_COMPONENTS = 8
WAVELET = "coif1"
K_FOLD = 10
METRIC_COVARIANCE_ESTIMATION = "euclid"

Make the manual channel selection

In [3]:
channels_names = ["FC2", "FC4", "FC6", "CFC2", "CFC4", "CFC6", "C2", "C4", "C6", 
                  "CCP2", "CCP4", "CCP6", "CP2", "CP4", "CP6", "FC5", "FC3", "FC1", 
                  "CFC5", "CFC3", "CFC1", "C5", "C3", "C1", "CCP5", "CCP3", "CCP1", 
                  "CP5", "CP3" "CP1"]
channels_indexes = get_channels_indexes(f"{DATA_FOLDER}/channels_positions.txt", channels_names)

Define the bandpass filter

In [5]:
sos = signal.cheby2(10, 50, [7, 30], analog=False, btype="band", output="sos", fs=FS)

Define the function to generate the common spatial filter's based on the test data 

In [6]:
def compute_spatial_filters(left_data, right_data):
    n_channels = left_data.shape[2]
    cov_shape = (n_channels, n_channels)
            
    # Estimate the covariance matrix of every trial
    n_left_trials = left_data.shape[0]
    cov = np.zeros((n_left_trials, *cov_shape))
    for n_trial in range(n_left_trials):
        trial = signal.sosfilt(sos, left_data[n_trial], axis=0)
        cov[n_trial] = np.cov(np.transpose(trial))

    # calculate average of covariance matrix
    cov_1 = rie_mean.mean_covariance(cov, metric=METRIC_COVARIANCE_ESTIMATION)
    
    # Estimate the covariance matrix of every trial
    n_right_trials = right_data.shape[0]
    cov = np.zeros((n_right_trials, *cov_shape))
    for n_trial in range(n_right_trials):
        trial = signal.sosfilt(sos, right_data[n_trial], axis=0)
        cov[n_trial] = np.cov(np.transpose(trial))

    # calculate average of covariance matrix
    cov_2 = rie_mean.mean_covariance(cov, metric=METRIC_COVARIANCE_ESTIMATION)
    
    # Solve the generalized eigenvalue problem
    n_pairs = CSP_COMPONENTS//2
    w, vr = linalg.eig(cov_1, cov_2, right=True)
    w = np.abs(w)
    sorted_indexes = np.argsort(w)
    chosen_indexes = np.zeros(2*n_pairs).astype(int)
    chosen_indexes[0:n_pairs] = sorted_indexes[0:n_pairs]
    chosen_indexes[n_pairs:2*n_pairs] = sorted_indexes[-n_pairs:]
    
    return vr[:, chosen_indexes]

In [7]:
def extract_features(X):
    trials = X.shape[0]
    F = np.zeros((trials, 2, CSP_COMPONENTS))
    for n_trial in range(trials):
        x = X[n_trial]
        x = signal.sosfilt(sos, x, axis=0)
        z = np.dot(np.transpose(W), np.transpose(x))
        
        # Calculate the wavelet features
        for n_feature in range(CSP_COMPONENTS):
            alpha_band, beta_band = pywt.dwt(z[n_feature], WAVELET)
            F[n_trial, 0, n_feature] = np.sum(np.abs(beta_band) ** 2)

        # Calculate the frequency-domain features
        psd_window_size = 100
        psd_window_overlap = psd_window_size//2
        low, high = 13, 30
        for n_feature in range(CSP_COMPONENTS):
            freqs, psd = signal.welch(z[n_feature], fs=FS, window="hann",
                                     nperseg=psd_window_size, noverlap=psd_window_overlap)
            beta_freqs = np.logical_and(freqs >= low, freqs <= high)
            freq_res = freqs[1] - freqs[0]
            F[n_trial, 1, n_feature] = simps(psd[beta_freqs], dx=freq_res)
        
    return F

Iterate on the subjects applying the algorithm, 
validating the results using the technique 10-fold cross-validation

In [8]:
subjects = range(1, 6)
subjects_set = set(subjects)
accuracies = {
    "GNB": np.zeros((len(subjects), K_FOLD)),
    "SVM": np.zeros((len(subjects), K_FOLD)),
    "LDA": np.zeros((len(subjects), K_FOLD))
}

for subject in subjects:
    print("Subject: ", subject)

    # Load data
    left_data_file = f"{DATA_FOLDER}/left-hand-subject-{subject}.csv"
    right_data_file = f"{DATA_FOLDER}/right-hand-subject-{subject}.csv"
    data = read_eeg_file(left_data_file, right_data_file, TIME_LENGTH, TIME_WINDOW, channels_indexes=channels_indexes)
    
    W = compute_spatial_filters(data.X[data.labels == 0], data.X[data.labels == 1])
    
    subject_index = subject - 1
    cv = StratifiedKFold(n_splits=K_FOLD, shuffle=True)
    for (k, (train_index, test_index)) in enumerate(cv.split(data.X, data.labels)):
        X_train, X_test = data.X[train_index], data.X[test_index]
        y_train, y_test = data.labels[train_index], data.labels[test_index]
        
        # Feature extraction
        features_train = extract_features(X_train)
        features_test = extract_features(X_test)
    
        len_features = features_train.shape[1] * features_train.shape[2]
        features_train = np.reshape(features_train, newshape=(features_train.shape[0], len_features))
        features_test = np.reshape(features_test, newshape=(features_test.shape[0], len_features))

        # Feature normalization
        features_train = stats.zscore(features_train, axis=0)
        features_test = stats.zscore(features_test, axis=0)
        
        # GNB classifier
        gnb = GaussianNB(priors=[.5, .5], var_smoothing=1.0)
        gnb.fit(features_train, y_train)
        gnb_predictions = gnb.predict(features_test)
        gnb_accuracy = accuracy_score(y_test, gnb_predictions)
        accuracies["GNB"][subject_index][k] = gnb_accuracy

        # SVM classifier
        svm = SVC(C=.8, kernel="rbf")
        svm.fit(features_train, y_train)
        svm_predictions = svm.predict(features_test)
        svm_accuracy = accuracy_score(y_test, svm_predictions)
        accuracies["SVM"][subject_index][k] = svm_accuracy

        # LDA classifier
        lda = LinearDiscriminantAnalysis()
        lda.fit(features_train, y_train)
        lda_predictions = lda.predict(features_test)
        lda_accuracy = accuracy_score(y_test, lda_predictions)
        accuracies["LDA"][subject_index][k] = lda_accuracy

Subject:  1
Subject:  2
Subject:  3
Subject:  4
Subject:  5


Print the accuracies obtained

In [9]:
for classifier in accuracies:
    print(classifier)
    for subject, cv_accuracies in enumerate(accuracies[classifier]):
        acc_mean = np.mean(cv_accuracies)*100
        acc_std = np.std(cv_accuracies)*100
        print(f"\tSubject {subject+1} average accuracy: {acc_mean:.4f} +/- {acc_std:.4f}")
    average_acc_mean = np.mean(accuracies[classifier])*100
    average_acc_std = np.std(accuracies[classifier])*100
    print(f"\tAverage accuracy: {average_acc_mean:.4f} +/- {average_acc_std:.4f}")

GNB
	Subject 1 average accuracy: 81.0714 +/- 6.3987
	Subject 2 average accuracy: 90.3571 +/- 4.5316
	Subject 3 average accuracy: 68.5714 +/- 6.3487
	Subject 4 average accuracy: 88.9286 +/- 5.6356
	Subject 5 average accuracy: 84.6429 +/- 6.9712
	Average accuracy: 82.7143 +/- 9.8551
SVM
	Subject 1 average accuracy: 83.2143 +/- 5.7698
	Subject 2 average accuracy: 94.2857 +/- 4.5737
	Subject 3 average accuracy: 78.9286 +/- 4.6429
	Subject 4 average accuracy: 93.9286 +/- 3.9286
	Subject 5 average accuracy: 90.7143 +/- 6.0187
	Average accuracy: 88.2143 +/- 7.9299
LDA
	Subject 1 average accuracy: 82.1429 +/- 5.2973
	Subject 2 average accuracy: 93.2143 +/- 2.9667
	Subject 3 average accuracy: 73.9286 +/- 7.6682
	Subject 4 average accuracy: 91.7857 +/- 4.2408
	Subject 5 average accuracy: 90.0000 +/- 7.1071
	Average accuracy: 86.2143 +/- 9.2309
