In [1]:
import os
import sys
import re
import numpy as np
import pandas as pd
#import matplotlib.pyplot as plt
from sklearn.model_selection import StratifiedKFold, train_test_split
import scipy.io as sio
from scipy import signal
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torchvision
from torch import nn
import concurrent.futures

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#Importing the data
datasub2 = sio.loadmat('Long_Words/sub_2b_ch64_l_eog_removed_256Hz.mat')['eeg_data_wrt_task_rep_no_eog_256Hz_last_beep']
datasub3 = sio.loadmat('Long_Words/sub_3b_ch80_l_eog_removed_256Hz.mat')['eeg_data_wrt_task_rep_no_eog_256Hz_last_beep']
datasub6 = sio.loadmat('Long_Words/sub_6_ch64_l_eog_removed_256Hz.mat')['eeg_data_wrt_task_rep_no_eog_256Hz_last_beep']
datasub7 = sio.loadmat('Long_Words/sub_7_ch64_l_eog_removed_256Hz.mat')['eeg_data_wrt_task_rep_no_eog_256Hz_last_beep']
datasub9 = sio.loadmat('Long_Words/sub_9c_ch64_l_eog_removed_256Hz.mat')['eeg_data_wrt_task_rep_no_eog_256Hz_last_beep']
datasub2 = sio.loadmat('Long_Words/sub_11b_ch64_l_eog_removed_256Hz.mat')['eeg_data_wrt_task_rep_no_eog_256Hz_last_beep']

In [3]:
folder_path = {"Long_words": "/home/tseringj/final_project/Long_Words",
               "Short_Long_words": "/home/tseringj/final_project/Short_Long_words",
               "Short_words": "/home/tseringj/final_project/Short_words",
               "Vowels": "/home/tseringj/final_project/Vowels"}

words_dict = {
    "Long_words": ["cooperate", "independent"],
    "Short_Long_words": ["cooperate", "in"],
    "Short_words": ["out", "in", "up"],
    "Vowels": ["a", "i", "u"]
}

numeric_labels = {
    "Long_words": {"cooperate": 0, "independent": 1},
    "Short_Long_words": {"cooperate": 0, "in": 1},
    "Short_words": {"out": 0, "in": 1, "up": 2},
    "Vowels": {"a": 0, "i": 1, "u": 2}
}

In [4]:
row, col=datasub2.shape
print(datasub2[0][0].shape)

(64, 1280)


In [5]:

matrix_to_load = "eeg_data_wrt_task_rep_no_eog_256Hz_last_beep"

def load_EEG(type, subject_no):
    path = folder_path[type]
    words = words_dict[type]
    for subject_file in os.scandir(path):
        if not (subject_file.is_file() and subject_file.name.endswith('.mat') and
                int(re.search("[0-9]+", subject_file.name).group(0)) == subject_no):
            continue
        mat = sio.loadmat(subject_file.path)[matrix_to_load]
        
        temp = f"{path}/temp_files"
        if not os.path.exists(temp):
            os.mkdir(temp)
        temp = f"{temp}/{subject_no}"

        if not os.path.exists(temp):
            os.mkdir(temp)
        X = []
        Y = []
        for index, eeg in np.ndenumerate(mat):
            temp2 = f"{temp}/{words[index[0]]}_{index[1] + 1}.npy" #storing each trial
            X.append(temp2)
            Y.append(words[index[0]])
            if not os.path.exists(temp2):
                np.save(temp2, eeg)
    return np.array(X), np.array(Y)

In [6]:
type="Long_words"
subject_no=2
X,Y=load_EEG(type, subject_no)

In [7]:
#feature extraction


# retrieves the MPC(Mean Phase Coherance) feature matrix for given EEG 64 channel
def MPC(eeg):
    channels = eeg.shape[0]
    mpc_matrix = np.zeros((channels, channels), dtype=float)

    def MPC_feature(i, j):
        signal_a = np.unwrap(np.angle(signal.hilbert(eeg[i])))
        signal_b = np.unwrap(np.angle(signal.hilbert(eeg[j])))
        phase_diff = np.mean(np.exp((signal_a - signal_b) * -1j))
        return np.absolute(phase_diff)

    for i in range(channels):
        for j in range(channels):
            if i <= j:
                mpc_matrix[i, j] = MPC_feature(i, j)
            else:
                mpc_matrix[i, j] = mpc_matrix[j, i]
    return mpc_matrix


# retrieves the MSC(Magnitude Phase Coherance) feature matrix for given EEG 64 channel
def MSC(eeg):
    channels = eeg.shape[0]
    msc_matrix = np.zeros((channels, channels, 3), dtype=float)
    for i in range(channels):
        for j in range(channels):
            if i <= j:
                temp = signal.coherence(
                    eeg[i], eeg[j], window = signal.windows.hamming(51) , nfft = 256, fs=256)
                t1 = (temp[0] <= 8).astype(bool)
                t2 = (temp[0] <= 13).astype(bool)
                t3 = (temp[0] <= 30).astype(bool)
                t4 = (temp[0] <= 70).astype(bool)
                alpha = np.mean(temp[1][~t1 & t2])
                beta = np.mean(temp[1][~t2 & t3])
                gamma = np.mean(temp[1][~t3 & t4])
                msc_matrix[i,j,0] = alpha
                msc_matrix[i,j,1] = beta
                msc_matrix[i,j,2] = gamma
            else:
                msc_matrix[i, j, 0] = msc_matrix[j, i, 0]
                msc_matrix[i, j, 1] = msc_matrix[j, i, 1]
                msc_matrix[i, j, 2] = msc_matrix[j, i, 2]
    return msc_matrix


# alpha beta gamma filtering for every eeg electrode
def alpha_beta_gamma_extractor(eeg):
    a = signal.butter(10, [8, 13], 'bandpass', fs=256, output='sos')
    b = signal.butter(10, [13, 30], 'bandpass', fs=256, output='sos')
    g = signal.butter(10, [30, 70], 'bandpass', fs=256, output='sos')

    alpha = signal.sosfilt(a, eeg, axis = 1)
    beta = signal.sosfilt(b, eeg, axis = 1)
    gamma = signal.sosfilt(g, eeg, axis = 1)

    return [alpha, beta, gamma]


# reutrn Image form of the eeg from alpha beta gamma bands and MPC and MSC feature matrix
def EEG_Image(eeg, **kwargs):
    eeg_channles = alpha_beta_gamma_extractor(eeg)
    Image = MSC(eeg)
    for i in range(3):
        eeg_mpc = MPC(eeg_channles[i])
        n = eeg_mpc.shape[0]
        for p in range(n):
            Image[p,p,i] = 0
            for q in range(p + 1, n):
                Image[p, q, i] = eeg_mpc[p, q]
    return Image



    # print(EEG_Image(eeg)[:,:,0])

In [8]:
def get_train_preprocessed_data(X,Y, pre_process, transformer):
    window_size = 256
    stride = 64
    new_X = []
    new_Y = []

    with concurrent.futures.ProcessPoolExecutor(max_workers = int(os.cpu_count()*0.80)) as executor:
        futures = []
        for i in range(len(X)):
            futures.append(executor.submit(
                pre_process, X[i], Y[i], transformer, window_size, stride))
        for future in concurrent.futures.as_completed(futures):
            new_X.extend(future.result()[0])
            new_Y.extend(future.result()[1])

    return new_X, new_Y


def train_pre_process(X,Y, transformer,  window_size, stride):
    new_X = []
    with open(X, 'rb') as f:
        eeg = np.load(f)
    temp = X.replace(".npy","")
    for start in range(0, eeg.shape[1] - window_size + 1, stride):
        new_X.append(f"{temp}_{start//stride + 1}.npy")
        if os.path.exists(new_X[-1]): continue
        with open(new_X[-1], "wb") as f:
            np.save(f, transformer(eeg[:, start:start + window_size]))
    return new_X, [Y]*len(new_X)

In [9]:
# X_train, Y_train=get_train_preprocessed_data(X, Y, train_pre_process, EEG_Image)

In [10]:
def get_test_preprocessed_data(X, Y, pre_process, transformer):
    window_size = 256
    stride = 64
    new_X = []
    new_Y = []

    with concurrent.futures.ProcessPoolExecutor(max_workers = int(os.cpu_count()*0.80)) as executor:
        futures = []
        for i in range(len(X)):
            futures.append(executor.submit(
                pre_process, X[i], Y[i],transformer, window_size, stride))
        for future in concurrent.futures.as_completed(futures):
            
            new_X.append(future.result()[0])
            new_Y.append(future.result()[1])
    

    return new_X, new_Y


def test_pre_process(X,Y, transformer,  *arg):

    with open(X, 'rb') as f:
        eeg = np.load(f)
    temp = X.replace(".npy","")
    new_X = f"{temp}_test.npy"
    if not os.path.exists(new_X):
        np.save(new_X, transformer(eeg))
    return new_X, Y

In [11]:
# X_test, Y_test=get_test_preprocessed_data(X,Y, test_pre_process, EEG_Image)

In [12]:
# class EEG_Dataset(Dataset):
#     def __init__(self, X, Y, transform=None, target_transform=None):
#         self.X=X
#         self.Y =Y
#         self.transform = transform
#         self.target_transform = target_transform

#     def __len__(self):
#         return len(self.Y)

#     def __getitem__(self, idx):
        
#         with open(self.X[idx], 'rb') as f:
#             image = np.load(f).transpose(2, 0, 1).astype(np.float32)
#         return image, Y[idx] 
    

# def EEG_Dataloader(X, Y,  batch_size = 4):
#     return DataLoader(
#         EEG_Dataset(X, Y, None, None),
#         batch_size=batch_size,
#         shuffle=True,
#         num_workers=20,
#     )

In [13]:

class Train_Dataset(Dataset):

    def __init__(self, X, Y, type):

        
        X, Y = get_train_preprocessed_data(
            X,Y, train_pre_process, EEG_Image)
        self.X = X
        self.Y = [numeric_labels[type][y] for y in Y]

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        image=None
        with open(self.X[idx], 'rb') as f:
            image = np.load(f, allow_pickle=False).transpose(2, 0, 1).astype(np.float32)
            # try:
            #     image = np.load(f, allow_pickle=False).transpose(2, 0, 1).astype(np.float32)
            # except ValueError:
            #     image = np.load(f, allow_pickle=True).transpose(2, 0, 1).astype(np.float32)

        return image, self.Y[idx]
    

    
class Test_Dataset(Dataset):

    def __init__(self, X, Y, type):
        
        X, Y = get_test_preprocessed_data(
            X,Y,test_pre_process, EEG_Image)
        self.X = X
        self.Y = [numeric_labels[type][y] for y in Y]

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        image=None
        with open(self.X[idx], 'rb') as f:
            image = np.load(f, allow_pickle=False).transpose(2, 0, 1).astype(np.float32)
            # try:
            #     image = np.load(f, allow_pickle=False).transpose(2, 0, 1).astype(np.float32)
            # except ValueError:
            #     image = np.load(f, allow_pickle=True).transpose(2, 0, 1).astype(np.float32)

        return image, self.Y[idx]


def Train_Dataloader(X, Y,  type, batch_size = 4):
    return DataLoader(
        Train_Dataset(X, Y, type),
        batch_size=batch_size,
        shuffle=True,
        num_workers=20,
    )
def Test_Dataloader(X, Y,  type, batch_size = 4):
    return DataLoader(
        Test_Dataset(X, Y, type),
        batch_size=batch_size,
        shuffle=True,
        num_workers=20,
    )

In [14]:
import copy
import torch.nn as nn
import torch.optim as optim
from torchvision import models
vgg_model = models.vgg16(weights='DEFAULT')
for param in vgg_model.parameters():
    param.requires_grad = False
vgg_features = nn.Sequential(*list(vgg_model.features[:24].children()))
vgg_features.eval()

def extract_vgg_features(image):
    
    with torch.no_grad():
        x = vgg_features(image)
        x = torch.flatten(x, start_dim=1)
    return x


In [15]:
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score
import warnings

In [16]:
def TrainSVM( train_loader, test_loader):
    warnings.filterwarnings("ignore", message="Liblinear failed to converge.*")
    X_train=np.empty((0, 12288))
    Y_train=[]
    for x, y in train_loader:
        
        
        feature=x.numpy()
        feature=feature.reshape(feature.shape[0],-1)
        
        
        X_train=np.concatenate((X_train, feature), axis=0)
        
        y=y.numpy()
        Y_train=np.concatenate((Y_train, y), axis=0)
    X_test=np.empty((0, 12288))
    Y_test=[]
    for x, y in test_loader:
        
        
        feature=x.numpy()
        feature=feature.reshape(feature.shape[0],-1)
        
        
        X_test=np.concatenate((X_test, feature), axis=0)
        
        y=y.numpy()
        Y_test=np.concatenate((Y_test, y), axis=0)
    print(X_train.shape)
    print(X_test.shape)


    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    pca_component=150
    pca = PCA(n_components=pca_component)
    X_train = pca.fit_transform(X_train)
    X_test= pca.transform(X_test)

    clf = svm.SVC(kernel='rbf',C=1)
    clf = clf.fit(X_train,Y_train)
    predictions_tr = clf.predict(X_test)
    scores = cross_val_score(clf, X_train, Y_train, cv=10)
    print('Cross validation Scores: ', scores)
    score=accuracy_score(Y_test,predictions_tr)
    print(score)
    return score
    

        

In [17]:
def K_fold_evaluation(type,subject_no, batch_size, random_seed):
    X,Y=load_EEG(type, subject_no)
    kfolds=10
    #device='cpu'
    device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    torch.cuda.set_device(1)
    skf = StratifiedKFold(
        n_splits=5, random_state=random_seed, shuffle=True)

    score = [0]*kfolds
    for i, (train_index, test_index) in enumerate(skf.split(X, Y)):
        print(f"------------------------fold {i}------------------------")
        # val1=[y for y in Y[train_index] if y=='cooperate'] 
        # val2=[y for y in Y[train_index] if y=='independent']
        # print(len(val1))
        # print(len(val2))
        train_X, train_Y=X[train_index], Y[train_index]
        test_X, test_Y = X[test_index], Y[test_index]  

        train_loader = Train_Dataloader(train_X, train_Y, type)
        test_loader = Test_Dataloader(test_X,test_Y, type)


            
        
        score[i]=TrainSVM( train_loader, test_loader )

        

        
        torch.cuda.empty_cache() 
        
    return np.mean(score), np.std(score)
    


In [18]:
if __name__ == "__main__":
    from datetime import datetime

    now = datetime.now()
    dt_string = now.strftime("%d/%m/%Y %H:%M:%S")

    

    type="Long_words"
    subject_no=2
    batch_size=64
    random_seed=123
    # K_fold_evaluation(type, subject_no, batch_size,random_seed)
    mean, variance = K_fold_evaluation(type, subject_no, batch_size,random_seed)
    print(f"{subject_no} K-fold mean: ", mean*100)
    print(f"{subject_no} K-fold variance: ", variance*100)
   
        

------------------------fold 0------------------------
(2720, 12288)
(40, 12288)
Cross validation Scores:  [0.83455882 0.84558824 0.86029412 0.84926471 0.83455882 0.79044118
 0.86764706 0.77573529 0.79779412 0.80882353]
0.6
------------------------fold 1------------------------
(2720, 12288)
(40, 12288)
Cross validation Scores:  [0.86029412 0.86764706 0.82352941 0.86397059 0.87867647 0.87132353
 0.82720588 0.8125     0.83088235 0.86764706]
0.475
------------------------fold 2------------------------
(2720, 12288)
(40, 12288)
Cross validation Scores:  [0.84558824 0.87867647 0.88970588 0.79411765 0.84191176 0.83823529
 0.86764706 0.79779412 0.80147059 0.87132353]
0.5
------------------------fold 3------------------------
(2720, 12288)
(40, 12288)
Cross validation Scores:  [0.82352941 0.82720588 0.80882353 0.81985294 0.83823529 0.85294118
 0.79779412 0.80147059 0.84926471 0.81985294]
0.575
------------------------fold 4------------------------
(2720, 12288)
(40, 12288)
Cross validation Sc

In [19]:
# with open('/home/tseringj/final_project/Long_Words/temp_files/2/cooperate_53_1.npy', 'rb') as f:
#     data=np.load(f,).transpose(2,0,1).astype(np.float32)

In [20]:
#  with open('svmresult.txt', 'w') as file:
#         sys.stdout = file
#         print("time: ", dt_string)
#         
#     file.close()