In [92]:
import os
import sys
import re
import numpy as np
import pandas as pd
import scipy.io as sio
import torch
import matplotlib.pyplot as plt
from scipy.stats import kurtosis, skew
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from datetime import datetime

In [93]:
folder_path = {"Long_words": "/home/tseringj/final_project/Long_Words",
               "Short_Long_words": "/home/tseringj/final_project/Short_Long_words",
               "Short_words": "/home/tseringj/final_project/Short_words",
               "Vowels": "/home/tseringj/final_project/Vowels"}

words_dict = {
    "Long_words": ["cooperate", "independent"],
    "Short_Long_words": ["cooperate", "in"],
    "Short_words": ["out", "in", "up"],
    "Vowels": ["a", "i", "u"]
}

numeric_labels = {
    "Long_words": {"cooperate": 0, "independent": 1},
    "Short_Long_words": {"cooperate": 0, "in": 1},
    "Short_words": {"out": 0, "in": 1, "up": 2},
    "Vowels": {"a": 0, "i": 1, "u": 2}
}

In [94]:
datasub2 = sio.loadmat('Long_Words/sub_2b_ch64_l_eog_removed_256Hz.mat')['eeg_data_wrt_task_rep_no_eog_256Hz_last_beep']
datasub3 = sio.loadmat('Long_Words/sub_3b_ch80_l_eog_removed_256Hz.mat')['eeg_data_wrt_task_rep_no_eog_256Hz_last_beep']
datasub6 = sio.loadmat('Long_Words/sub_6_ch64_l_eog_removed_256Hz.mat')['eeg_data_wrt_task_rep_no_eog_256Hz_last_beep']
datasub7 = sio.loadmat('Long_Words/sub_7_ch64_l_eog_removed_256Hz.mat')['eeg_data_wrt_task_rep_no_eog_256Hz_last_beep']
datasub9 = sio.loadmat('Long_Words/sub_9c_ch64_l_eog_removed_256Hz.mat')['eeg_data_wrt_task_rep_no_eog_256Hz_last_beep']
datasub11 = sio.loadmat('Long_Words/sub_11b_ch64_l_eog_removed_256Hz.mat')['eeg_data_wrt_task_rep_no_eog_256Hz_last_beep']

In [95]:
import numpy as np
from mne.decoding import CSP
from sklearn.model_selection import train_test_split

In [96]:

matrix_to_load = "eeg_data_wrt_task_rep_no_eog_256Hz_last_beep"

def load_EEG(type, subject_no):
    path = folder_path[type]
    words = words_dict[type]
    for subject_file in os.scandir(path):
        if not (subject_file.is_file() and subject_file.name.endswith('.mat') and
                int(re.search("[0-9]+", subject_file.name).group(0)) == subject_no):
            continue
        mat = sio.loadmat(subject_file.path)[matrix_to_load]
        
        temp = f"{path}/temp_files3"
        if not os.path.exists(temp):
            os.mkdir(temp)
        temp = f"{temp}/{subject_no}"

        if not os.path.exists(temp):
            os.mkdir(temp)
        X = []
        Y = []
        for index, eeg in np.ndenumerate(mat):
            temp2 = f"{temp}/{words[index[0]]}_{index[1] + 1}.npy" #storing each trial
            X.append(temp2)
            Y.append(words[index[0]])
            if not os.path.exists(temp2):
                np.save(temp2, eeg)
    return np.array(X), np.array(Y)

In [97]:
# # function for data augmentation

# def train_augmentation(X,Y):

#     final_X=np.empty((0,64,1280))
#     label=np.empty((0,1))

    
#     for i in range(len(X)):
#         #result=np.empty((4,64,512))
#         with open(X[i], 'rb') as f:
#             data = np.load(f)
#             # indices = np.arange(1, 17)
#             # indices = np.append(indices, np.arange(33, 49))
#             # indices = np.delete(indices, 9)

#             # Loop through the data with a stride of 64 samples

            

#             final_X = np.vstack((final_X,np.expand_dims(data,axis=0)))        
#             if numeric_labels[type][Y[i]]==0:
#                 label=np.vstack((label, np.zeros((1,1))))
#             else:
#                 label=np.vstack((label, np.ones((1,1))))
            
    
    
        
#     return final_X, label

In [98]:
# function for data augmentation

def train_augmentation(X,Y):

    total_samples=1152
    stride=250
    epoch_size=512
    final_X=np.empty((0,60,epoch_size))
    label=np.empty((0,1))
    print(f'with total_sample: {total_samples}, epoch size: {epoch_size} and strides: {stride}')
    
    for i in range(len(X)):
        #result=np.empty((4,64,512))
        with open(X[i], 'rb') as f:
            data = np.load(f)
            indices = np.arange(1, 32)
            indices = np.append(indices, np.arange(33, 63))
            indices = np.delete(indices, 8)
            data=data[indices,:total_samples]
            # Loop through the data with a stride of 64 samples
            
            for j in range(0, total_samples, stride):
                if j+epoch_size >= total_samples:
                    break
            

                final_X = np.vstack((final_X,np.expand_dims(data[:, j:j+epoch_size],axis=0)))        
                if numeric_labels[type][Y[i]]==0:
                    label=np.vstack((label, np.zeros((1,1))))
                else:
                    label=np.vstack((label, np.ones((1,1))))
            
    
    
        
    return final_X, label

In [99]:
def calculate_performance(y_test, y_pred):
    accuracy = accuracy_score(y_test, y_pred)
    # precision = precision_score(y_test, y_pred)
    # recall = recall_score(y_test, y_pred)
    # f1 = f1_score(y_test, y_pred)
    #print(f'accuracy: {accuracy}, precision: {precision}, recall: {recall}, f1 {f1}')
    return accuracy

In [100]:
def train_model(X_train, X_test, y_train, y_test):

  # Import other classifiers as needed

  # Train classifiers with different n_components values
  model_accuracies={}
  svm_rbf=SVC(kernel='rbf')

  svm_rbf.fit(X_train, y_train)
  y_pred_rbf=svm_rbf.predict(X_test)
  accuracy_rbf=calculate_performance(y_test, y_pred_rbf)
  model_accuracies['svm_rbf']=accuracy_rbf
  
  svm_linear = SVC(kernel='linear')
  svm_linear.fit(X_train, y_train)
  y_pred_linear = svm_linear.predict(X_test)
  print("pca linear performance: ")
  accuracy_linear=calculate_performance(y_test, y_pred_linear)
  model_accuracies['svm_linear']=accuracy_linear

  
  svm_poly = SVC(kernel='poly')
  svm_poly.fit(X_train, y_train)
  y_pred_poly = svm_poly.predict(X_test)
  print("pca linear performance: ")
  accuracy_poly=calculate_performance(y_test, y_pred_poly)
  model_accuracies['svm_poly']=accuracy_poly


  
  rfc = RandomForestClassifier()
  rfc.fit(X_train, y_train)
  y_pred_rfc = rfc.predict(X_test)
  print("Random Forest performance: ")
  accuracy_rfc=calculate_performance(y_test, y_pred_rfc)
  model_accuracies['rfc']=accuracy_rfc
 
  k = 5  
  knn = KNeighborsClassifier(n_neighbors=k)
  knn.fit(X_train, y_train)
  y_pred_knn = knn.predict(X_test)
  print("KNN: ")
  accuracy_knn=calculate_performance(y_test, y_pred_knn)
  model_accuracies['knn']=accuracy_knn


  mlp = MLPClassifier(hidden_layer_sizes=(100,), max_iter=2000,activation='relu', solver='adam', random_state=42)
  mlp.fit(X_train, y_train)
  y_pred = mlp.predict(X_test)
  y_pred_mlp = [round(value) for value in y_pred]
  print('MLP performance: ')
  accuracy_mlp=calculate_performance(y_test, y_pred_mlp)
  model_accuracies['mlp']=accuracy_mlp


  return model_accuracies

In [101]:
from numpy.linalg import LinAlgError
from sklearn.model_selection import LeaveOneOut
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [102]:

# def get_data(type,subject_no):
    
#     X,Y=load_EEG(type, subject_no)
#     loo = LeaveOneOut()

#     average_accuracies={}

#     for i, (train_index, test_index) in enumerate(loo.split(X, Y)):
#         train_X = X[train_index]
#         train_y = Y[train_index]
#         test_X = X[test_index]
#         test_y = Y[test_index]
#         X_train, y_train  = train_augmentation(train_X, train_y)
#         X_test, y_test = train_augmentation(test_X, test_y)
#         y_train=y_train.reshape((-1))
#         y_test=y_test.reshape((-1))


#         try:
#             csp = CSP(n_components=4, reg=0.0003, log=False, norm_trace=False)
#             csp.fit(X_train, y_train)
#             X_train_csp = csp.transform(X_train)
#             X_test_csp = csp.transform(X_test)
#         except LinAlgError:
#             print("LinAlgError occurred. Adjusting regularization parameter...")
#             try:
#                 csp = CSP(n_components=4, reg=0.0002, log=False, norm_trace=False)
#                 csp.fit(X_train, y_train)
#                 X_train_csp = csp.transform(X_train)
#                 X_test_csp = csp.transform(X_test)
#             except LinAlgError:
#                 print("LinAlgError occurred again. Consider further adjustments or preprocessing steps.")
                
#                 try:
#                     csp = CSP(n_components=4, reg=0.0001, log=False, norm_trace=False)
#                     csp.fit(X_train, y_train)
#                     X_train_csp = csp.transform(X_train)
#                     X_test_csp = csp.transform(X_test)
#                 except LinAlgError:
#                     print("LinAlgError occurred again. Consider further adjustments or preprocessing steps.")
#         print(f'fold {i} performance: ')
#         accuracies=train_model(X_train_csp, X_test_csp, y_train, y_test)
#         for model_name, accuracy in accuracies.items():
#             if model_name not in average_accuracies:
#                 average_accuracies[model_name] = []
#             average_accuracies[model_name].append(accuracy)
#     for model_name, accuracies in average_accuracies.items():
#         average_accuracy = np.mean(accuracies)
#         std_deviation = np.std(accuracies)
#         print(f"{model_name} Average Accuracy: {average_accuracy}")
#         print(f"{model_name} Standard Deviation: {std_deviation}")


#     return 

In [103]:
from pyriemann.classification import MDM, TSclassifier, FgMDM
from pyriemann.estimation import Covariances
from sklearn.model_selection import cross_val_score, KFold
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from pyriemann.channelselection import ElectrodeSelection
from pyriemann.channelselection import FlatChannelRemover

from pyriemann.tangentspace import FGDA

In [104]:
# def get_data(type,subject_no):
    
#     X,Y=load_EEG(type, subject_no)


#     X_train, y_train  = train_augmentation(X, Y)

#     y_train=y_train.reshape((-1))

#     cv = KFold(n_splits=10, shuffle=True, random_state=42)
#     cov_data_train = Covariances(estimator='lwf').transform(X_train)
    
#     mdm = MDM(metric=dict(mean='riemann', distance='riemann'))
    

#     scores_mdm = cross_val_score(mdm, cov_data_train, y_train, cv=cv, n_jobs=1)
#     print(f"MDM Classification Average Accuracy: {np.mean(scores_mdm)}, MDM Classification Average Std: {np.std(scores_mdm)}")

#     tsc = TSclassifier()
#     # Use scikit-learn Pipeline with cross_val_score function
#     scores_tsc = cross_val_score(tsc, cov_data_train, y_train, cv=cv, n_jobs=1)
#     print(f"TSC Classification Average Accuracy: {np.mean(scores_tsc)}, TSC Classification Average Std: {np.std(scores_tsc)}")

#     lr = LogisticRegression()
#     csp = CSP(n_components=4, reg='ledoit_wolf', log=True)
#     csp = Pipeline([('CSP', csp), ('LogisticRegression', lr)])
#     scores_csp = cross_val_score(csp, X_train, y_train, cv=cv, n_jobs=1)
#     print(f" Classification Average Accuracy: {np.mean(scores_csp)}, MDM Classification Average Std: {np.std(scores_csp)}")

#     return 


In [105]:
# type="Long_words"
# subject_no=9
# get_data(type, subject_no)

In [106]:
def get_data(type,subject_no):
    
    X,Y=load_EEG(type, subject_no)
    kfold=5
    skf = StratifiedKFold(n_splits=kfold, random_state=42, shuffle=True)

    mdm_test_scores=[]
    tsc_test_scores=[]
    fgmdm_test_scores=[]
    
    for i, (train_index, test_index) in enumerate(skf.split(X, Y)):
      train_X=X[train_index]
      train_y=Y[train_index]
      test_X=X[test_index]
      test_y=Y[test_index]

      X_train, y_train  = train_augmentation(train_X, train_y)
      X_test, y_test = train_augmentation(test_X, test_y)
      y_train=y_train.reshape((-1))
      y_test=y_test.reshape((-1))
      #cv = KFold(n_splits=10, shuffle=True, random_state=42)

      channel_remover = FlatChannelRemover()

      X_train = channel_remover.fit_transform(X_train, y_train)
      X_test =channel_remover.transform(X_test)
      print(X_train.shape)
      
      cov_data_train = Covariances(estimator='lwf').transform(X_train)
      cov_data_test=Covariances(estimator='lwf').transform(X_test)
      ecs=ElectrodeSelection(nelec=20, metric='riemann', n_jobs=1)
      ecs.fit(cov_data_train, y_train)
      cov_data_train=ecs.transform(cov_data_train)
      cov_data_test=ecs.transform(cov_data_test)

      # fgda=FGDA(metric='riemann', tsupdate=False)
      # cov_data_train=fgda.fit_transform(cov_data_train, y_train)
      # cov_data_test=fgda.transform(cov_data_test)
      
      mdm = MDM(metric=dict(mean='riemann', distance='riemann'))
      mdm.fit(cov_data_train, y_train)
      #scores_mdm = cross_val_score(mdm, cov_data_train, y_train, cv=cv, n_jobs=1)
      predict_mdm = mdm.predict(cov_data_test)
      mdm_test_scores.append(accuracy_score(y_test, predict_mdm))


      #print(f"MDM Classification Average Accuracy: {np.mean(scores_mdm)}, MDM Classification Average Std: {np.std(scores_mdm)}")
      svm_rbf=SVC(kernel='rbf')
      tsc = TSclassifier(tsupdate=True, clf=svm_rbf)
      tsc.fit(cov_data_train, y_train)
      #scores_tsc = cross_val_score(tsc, cov_data_train, y_train, cv=cv, n_jobs=1)
      predict_tsc = tsc.predict(cov_data_test)
      tsc_test_scores.append(accuracy_score(y_test, predict_tsc))

      fgmdm = FgMDM(metric=dict(mean='riemann', distance='riemann', map='riemann'), tsupdate=True)
      fgmdm.fit(cov_data_train, y_train)
      #scores_mdm = cross_val_score(mdm, cov_data_train, y_train, cv=cv, n_jobs=1)
      predict_fgmdm = fgmdm.predict(cov_data_test)
      fgmdm_test_scores.append(accuracy_score(y_test, predict_fgmdm))

      

      #print(f"MDM Classification Average Accuracy: {np.mean(scores_tsc)}, MDM Classification Average Std: {np.std(scores_tsc)}")

      # lr = LogisticRegression()
      # csp = CSP(n_components=4, reg='ledoit_wolf', log=True)
      # csp = Pipeline([('CSP', csp), ('LogisticRegression', lr)])
      # scores_csp = cross_val_score(csp, X_train, y_train, cv=cv, n_jobs=1)
      # predict_csp = csp.predict(cov_data_test)
      # csp_test_scores.append(accuracy_score(y_test, predict_csp))
      #print(f" Classification Average Accuracy: {np.mean(scores_csp)}, MDM Classification Average Std: {np.std(scores_csp)}")
    print(f"MDM Classification Average Test Accuracy: {np.mean(mdm_test_scores)}, MDM Classification Average Test Std: {np.std(mdm_test_scores)}")
    print(f"TSC Classification Average Test Accuracy: {np.mean(tsc_test_scores)}, TSC Classification Average Test Std: {np.std(tsc_test_scores)}")
    print(f"FgMDM Classification Average Test Accuracy: {np.mean(fgmdm_test_scores)}, FgMDM Classification Average Test Std: {np.std(fgmdm_test_scores)}")
    



    return 


In [107]:
# type="Long_words"
# subject_no=7
# get_data(type, subject_no)

svm_rbf Average Accuracy: 0.52
svm_rbf Standard Deviation: 0.030207614933986434
svm_linear Average Accuracy: 0.5199999999999999
svm_linear Standard Deviation: 0.034776069358108896
svm_poly Average Accuracy: 0.50875
svm_poly Standard Deviation: 0.004999999999999982
rfc Average Accuracy: 0.53
rfc Standard Deviation: 0.03999999999999999
knn Average Accuracy: 0.51875
knn Standard Deviation: 0.050466573095465865
mlp Average Accuracy: 0.5625
mlp Standard Deviation: 0.042938910093294175

In [108]:

# now = datetime.now()
# dt_string = now.strftime("%d/%m/%Y %H:%M:%S")
# with open('csp_Attempts3_copy.txt', 'a') as file:
#     sys.stdout = file
#     print('*******************************************************************************')
#     type="Long_words"
#     subject_no=[2,6,7,9,11]
#     for i in range(len(subject_no)):
#         print(f'subject no: {subject_no[i]}')
#         get_data(type, subject_no[i])

#     sys.stdout = sys.__stdout__

In [None]:
now = datetime.now()
dt_string = now.strftime("%d/%m/%Y %H:%M:%S")
with open('csp_Attempts3_copy.txt', 'a') as file:
    sys.stdout = file
    print('*******************************************************************************')
    type="Short_Long_words"
    subject_no=[1,5,8,9,10,14]
    for i in range(len(subject_no)):
        print(f'subject no: {subject_no[i]}')
        get_data(type, subject_no[i])

    sys.stdout = sys.__stdout__