In [16]:
import os
import sys
import re
import numpy as np
import pandas as pd
import scipy.io as sio
import torch
import matplotlib.pyplot as plt
from scipy.stats import kurtosis, skew
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.neural_network import MLPClassifier

In [17]:
folder_path = {"Long_words": "/home/tseringj/final_project/Long_Words",
               "Short_Long_words": "/home/tseringj/final_project/Short_Long_words",
               "Short_words": "/home/tseringj/final_project/Short_words",
               "Vowels": "/home/tseringj/final_project/Vowels"}

words_dict = {
    "Long_words": ["cooperate", "independent"],
    "Short_Long_words": ["cooperate", "in"],
    "Short_words": ["out", "in", "up"],
    "Vowels": ["a", "i", "u"]
}

numeric_labels = {
    "Long_words": {"cooperate": 0, "independent": 1},
    "Short_Long_words": {"cooperate": 0, "in": 1},
    "Short_words": {"out": 0, "in": 1, "up": 2},
    "Vowels": {"a": 0, "i": 1, "u": 2}
}

In [18]:
datasub2 = sio.loadmat('Long_Words/sub_2b_ch64_l_eog_removed_256Hz.mat')['eeg_data_wrt_task_rep_no_eog_256Hz_last_beep']
datasub3 = sio.loadmat('Long_Words/sub_3b_ch80_l_eog_removed_256Hz.mat')['eeg_data_wrt_task_rep_no_eog_256Hz_last_beep']
datasub6 = sio.loadmat('Long_Words/sub_6_ch64_l_eog_removed_256Hz.mat')['eeg_data_wrt_task_rep_no_eog_256Hz_last_beep']
datasub7 = sio.loadmat('Long_Words/sub_7_ch64_l_eog_removed_256Hz.mat')['eeg_data_wrt_task_rep_no_eog_256Hz_last_beep']
datasub9 = sio.loadmat('Long_Words/sub_9c_ch64_l_eog_removed_256Hz.mat')['eeg_data_wrt_task_rep_no_eog_256Hz_last_beep']
datasub2 = sio.loadmat('Long_Words/sub_11b_ch64_l_eog_removed_256Hz.mat')['eeg_data_wrt_task_rep_no_eog_256Hz_last_beep']

In [19]:
import numpy as np
from mne.decoding import CSP
from sklearn.model_selection import train_test_split

In [20]:
def data_fixer(data, n_epoch):
    row,col=data[0][0].shape
    X=np.empty((n_epoch, row, col))
    for i in range(n_epoch):
        X[i,:,:]=data[i][0]
    return X

In [21]:
def Epoch_fixer(data):
    row, col=data.shape
    for i in range(row):
        for j in range(col):
            data[i][j]=data[i][j][:800]
    return data


In [22]:
def get_data(data, n_comp):
    
    X = data.transpose()  # Transpose the data to have shape (100, 2)
    y = np.zeros((2,100))
    y[1,:]=1
    y=y.transpose()
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
    X_train=X_train.reshape((160, -1))
    X_test=X_test.reshape((40,-1))
    y_train=y_train.reshape((160,-1)).reshape((-1))
    y_test=y_test.reshape((40,-1)).reshape((-1))
    X_train=data_fixer(X_train, X_train.shape[0])
    X_test=data_fixer(X_test, X_test.shape[0])
    
    csp = CSP(n_components=n_comp, reg=0.001, log=False, norm_trace=False)
    csp.fit(X_train, y_train)
    X_train_csp = csp.transform(X_train)
    X_test_csp = csp.transform(X_test)





    return X_train_csp, X_test_csp, y_train, y_test


In [23]:
def calculate_performance(y_test, y_pred):
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    print(f'accuracy: {accuracy}, precision: {precision}, recall: {recall}, f1 {f1}')

In [24]:
def train_model(X_train, X_test, y_train, y_test):
  scaler = StandardScaler()
  train_pca = scaler.fit_transform(X_train)
  test_pca = scaler.transform(X_test)
  # train_data=X_train
  # test_data=X_test

  
  y_train=y_train.astype(int)
  y_test=y_test.astype(int)
  print(sum(y_train), sum(y_test))
  # Import other classifiers as needed

  # Train classifiers with different n_components values
  model=SVC(kernel='rbf')
  model.fit(train_pca, y_train)
  y_pred=model.predict(test_pca)
  calculate_performance(y_test, y_pred)
  
  clf1 = SVC(kernel='linear')
  clf1.fit(train_pca, y_train)
  y_pred_pca = clf1.predict(test_pca)
  print("pca linear performance: ")
  calculate_performance(y_test, y_pred_pca)


  
  clf2 = RandomForestClassifier()
  clf2.fit(train_pca, y_train)
  y_pred_rfc = clf2.predict(test_pca)
  print("Random Forest performance: ")
  calculate_performance(y_test, y_pred_rfc)

  model = MLPClassifier(hidden_layer_sizes=(100,), max_iter=500,activation='relu', solver='adam', random_state=42)
  model.fit(train_pca, y_train)
  y_pred = model.predict(test_pca)
  y_pred_mlp = [round(value) for value in y_pred]
  print('MLP performance: ')
  calculate_performance(y_test, y_pred_mlp)

  return

In [25]:
from datetime import datetime

In [26]:
datasub2=Epoch_fixer(datasub2)

In [27]:


now = datetime.now()
dt_string = now.strftime("%d/%m/%Y %H:%M:%S")
with open('csp_Attempts.txt', 'a') as file:
    sys.stdout = file
    n_components=[2,4,6,8,10]
    for i in range(len(n_components)):
        print("time: ", dt_string)
        print(f'n_components: {n_components[i]}')
        X_train, X_test, y_train, y_test=get_data(datasub2,n_components[i])
        train_model(X_train, X_test, y_train, y_test)
    sys.stdout = sys.__stdout__
    

