In [17]:
import numpy as np
import pandas as pd
import pyxdf

# mne imports
import mne
from mne import io
from mne.datasets import sample

# EEGNet-specific imports
from EEGModels import EEGNet
import tensorflow
from tensorflow.keras import utils as np_utils
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras import backend as K
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam

# PyRiemann imports
from pyriemann.estimation import XdawnCovariances
from pyriemann.tangentspace import TangentSpace

#Sklearn imports
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import KFold
from sklearn.utils import compute_class_weight
from sklearn.utils import shuffle
from sklearn.utils import class_weight
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

# tools for plotting confusion matrices
from matplotlib import pyplot as plt
from sklearn.metrics import confusion_matrix
from pyriemann.utils.viz import plot_confusion_matrix

In [453]:
#Cross-validation for the random individual model

kernels, chans, samples = 1, 16, 7398 #Change samples to 1233 for 5 sec epochs, 2466 for 10 sec epochs, 4932 for 20 sec epochs, 7400 for 30 sec epochs

In [454]:
model = EEGNet(nb_classes = 1, Chans = chans, Samples = samples, 
               dropoutRate = 0.5, kernLength = 125, F1 = 8, D = 2, F2 = 16, 
               dropoutType = 'Dropout')

model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.001),
              metrics = ['accuracy'])

In [455]:
X_train = np.loadtxt("X_train_P06_30.csv")
Y_train = np.loadtxt("Y_train_P06_30.csv")

X_train = X_train.reshape(
     X_train.shape[0], X_train.shape[1] // samples, samples)

In [None]:
num_folds = 5
kf = KFold(n_splits=num_folds, shuffle=False)
i = 0
accuracy = np.zeros(num_folds)
F1_score = np.zeros(num_folds)
precision = np.zeros(num_folds)
recall = np.zeros(num_folds)
specificity = np.zeros(num_folds)

for train_index, test_index in kf.split(X_train, Y_train):

    X_training = np.take(X_train, train_index, axis=0)
    X_val = np.take(X_train, test_index, axis=0)
    y_train = np.take(Y_train, train_index)
    y_val = np.take(Y_train, test_index)

    scaler = StandardScaler()
    X_training = scaler.fit_transform(X_training.reshape(X_training.shape[0], -1)).reshape(X_training.shape)
    X_val = scaler.fit_transform(X_val.reshape(X_val.shape[0], -1)).reshape(X_val.shape)

    randomize_train = np.arange(len(X_training))
    np.random.shuffle(randomize_train)
    X_training = X_training[randomize_train]
    y_train = y_train[randomize_train]

    randomize_val = np.arange(len(X_val))
    np.random.shuffle(randomize_val)
    X_val = X_val[randomize_val]
    y_val = y_val[randomize_val]


    # convert data to (trials, channels, samples, kernels) format. Data 
    # contains 16 channels and 7400 time-points. Set the number of kernels to 1.

    X_training  = X_training.reshape(X_training.shape[0], chans, samples, kernels)
    X_val   = X_val.reshape(X_val.shape[0], chans, samples, kernels)

    #Train the model
    fittedModel = model.fit(X_training, y_train, batch_size = 20, epochs = 20)
    
    score = model.evaluate(X_val, y_val)

    print(f"Accuracy for the fold no. {i+1} on the test set: {score[1]}")

    pred_val = model.predict(X_val)
    pred_val = np.where(pred_val > 0.5, 1, 0)
    
    #How many occurances appear in the train set
    unique_train, counts_train = np.unique(y_train, return_counts=True)
    unique_test, counts_test = np.unique(y_val, return_counts=True)
    print(np.asarray((unique_train, counts_train)).T)
    print(np.asarray((unique_test, counts_test)).T)

    #Plotting confusion matrix
    labels = ["Non-Alcohol", "Alcohol"]
    cm = confusion_matrix(y_val, pred_val)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)
    disp.plot(cmap=plt.cm.Blues)
    plt.show()

    #Calculating metrices

    tn = cm[0][0] #true negatives
    fn = cm[0][1] #false positives

    accuracy[i] = score[1]
    F1_score[i] = f1_score(y_true=y_val, y_pred=pred_val)
    precision[i] = precision_score(y_true=y_val, y_pred=pred_val)
    recall[i] = recall_score(y_true=y_val, y_pred=pred_val)
    specificity[i] = tn/(tn+fn)

    plt.plot(fittedModel.history["loss"])
    plt.ylabel("Loss")
    plt.xlabel("Epochs")
    plt.legend(["Train"], loc="upper left")
    plt.show()

    i += 1

standard_deviation = np.std(accuracy)

print("The accuracy of the model with cross validation is", accuracy.mean())
print("The precision score of the model with cross validation is", precision.mean())
print("The recall score of the model with cross validation is", recall.mean())
print("The F1 score of the model with cross validation is", F1_score.mean())
print("The specificity score of the model with cross validation is", specificity.mean())

print("The standard deviation of the accuracy of the model with cross validation is", standard_deviation)