In [50]:
import os
import re
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import StratifiedKFold, train_test_split
import scipy.io as sio
from scipy import signal
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torchvision
from torch import nn
import concurrent.futures

In [51]:
#Importing the data
datasub2 = sio.loadmat('Long_Words/sub_2b_ch64_l_eog_removed_256Hz.mat')['eeg_data_wrt_task_rep_no_eog_256Hz_last_beep']
datasub3 = sio.loadmat('Long_Words/sub_3b_ch80_l_eog_removed_256Hz.mat')['eeg_data_wrt_task_rep_no_eog_256Hz_last_beep']
datasub6 = sio.loadmat('Long_Words/sub_6_ch64_l_eog_removed_256Hz.mat')['eeg_data_wrt_task_rep_no_eog_256Hz_last_beep']
datasub7 = sio.loadmat('Long_Words/sub_7_ch64_l_eog_removed_256Hz.mat')['eeg_data_wrt_task_rep_no_eog_256Hz_last_beep']
datasub9 = sio.loadmat('Long_Words/sub_9c_ch64_l_eog_removed_256Hz.mat')['eeg_data_wrt_task_rep_no_eog_256Hz_last_beep']
datasub2 = sio.loadmat('Long_Words/sub_11b_ch64_l_eog_removed_256Hz.mat')['eeg_data_wrt_task_rep_no_eog_256Hz_last_beep']

In [52]:
folder_path = {"Long_words": "/home/tseringj/final_project/Long_Words",
               "Short_Long_words": "/home/tseringj/final_project/Short_Long_words",
               "Short_words": "/home/tseringj/final_project/Short_words",
               "Vowels": "/home/tseringj/final_project/Vowels"}

words_dict = {
    "Long_words": ["cooperate", "independent"],
    "Short_Long_words": ["cooperate", "in"],
    "Short_words": ["out", "in", "up"],
    "Vowels": ["a", "i", "u"]
}

numeric_labels = {
    "Long_words": {"cooperate": 0, "independent": 1},
    "Short_Long_words": {"cooperate": 0, "in": 1},
    "Short_words": {"out": 0, "in": 1, "up": 2},
    "Vowels": {"a": 0, "i": 1, "u": 2}
}

In [53]:
row, col=datasub2.shape
print(datasub2[0][0].shape)

(64, 1280)


In [54]:

matrix_to_load = "eeg_data_wrt_task_rep_no_eog_256Hz_last_beep"

def load_EEG(type, subject_no):
    path = folder_path[type]
    words = words_dict[type]
    for subject_file in os.scandir(path):
        if not (subject_file.is_file() and subject_file.name.endswith('.mat') and
                int(re.search("[0-9]+", subject_file.name).group(0)) == subject_no):
            continue
        mat = sio.loadmat(subject_file.path)[matrix_to_load]
        
        temp = f"{path}/temp_files"
        if not os.path.exists(temp):
            os.mkdir(temp)
        temp = f"{temp}/{subject_no}"

        if not os.path.exists(temp):
            os.mkdir(temp)
        X = []
        Y = []
        for index, eeg in np.ndenumerate(mat):
            temp2 = f"{temp}/{words[index[0]]}_{index[1] + 1}.npy" #storing each trial
            X.append(temp2)
            Y.append(words[index[0]])
            if not os.path.exists(temp2):
                np.save(temp2, eeg)
    return np.array(X), np.array(Y)

In [55]:
type="Long_words"
subject_no=2
X,Y=load_EEG(type, subject_no)

In [56]:
#feature extraction


# retrieves the MPC(Mean Phase Coherance) feature matrix for given EEG 64 channel
def MPC(eeg):
    channels = eeg.shape[0]
    mpc_matrix = np.zeros((channels, channels), dtype=float)

    def MPC_feature(i, j):
        signal_a = np.unwrap(np.angle(signal.hilbert(eeg[i])))
        signal_b = np.unwrap(np.angle(signal.hilbert(eeg[j])))
        phase_diff = np.mean(np.exp((signal_a - signal_b) * -1j))
        return np.absolute(phase_diff)

    for i in range(channels):
        for j in range(channels):
            if i <= j:
                mpc_matrix[i, j] = MPC_feature(i, j)
            else:
                mpc_matrix[i, j] = mpc_matrix[j, i]
    return mpc_matrix


# retrieves the MSC(Magnitude Phase Coherance) feature matrix for given EEG 64 channel
def MSC(eeg):
    channels = eeg.shape[0]
    msc_matrix = np.zeros((channels, channels, 3), dtype=float)
    for i in range(channels):
        for j in range(channels):
            if i <= j:
                temp = signal.coherence(
                    eeg[i], eeg[j], window = signal.windows.hamming(51) , nfft = 256, fs=256)
                t1 = (temp[0] <= 8).astype(bool)
                t2 = (temp[0] <= 13).astype(bool)
                t3 = (temp[0] <= 30).astype(bool)
                t4 = (temp[0] <= 70).astype(bool)
                alpha = np.mean(temp[1][~t1 & t2])
                beta = np.mean(temp[1][~t2 & t3])
                gamma = np.mean(temp[1][~t3 & t4])
                msc_matrix[i,j,0] = alpha
                msc_matrix[i,j,1] = beta
                msc_matrix[i,j,2] = gamma
            else:
                msc_matrix[i, j, 0] = msc_matrix[j, i, 0]
                msc_matrix[i, j, 1] = msc_matrix[j, i, 1]
                msc_matrix[i, j, 2] = msc_matrix[j, i, 2]
    return msc_matrix


# alpha beta gamma filtering for every eeg electrode
def alpha_beta_gamma_extractor(eeg):
    a = signal.butter(10, [8, 13], 'bandpass', fs=256, output='sos')
    b = signal.butter(10, [13, 30], 'bandpass', fs=256, output='sos')
    g = signal.butter(10, [30, 70], 'bandpass', fs=256, output='sos')

    alpha = signal.sosfilt(a, eeg, axis = 1)
    beta = signal.sosfilt(b, eeg, axis = 1)
    gamma = signal.sosfilt(g, eeg, axis = 1)

    return [alpha, beta, gamma]


# reutrn Image form of the eeg from alpha beta gamma bands and MPC and MSC feature matrix
def EEG_Image(eeg, **kwargs):
    eeg_channles = alpha_beta_gamma_extractor(eeg)
    Image = MSC(eeg)
    for i in range(3):
        eeg_mpc = MPC(eeg_channles[i])
        n = eeg_mpc.shape[0]
        for p in range(n):
            Image[p,p,i] = 0
            for q in range(p + 1, n):
                Image[p, q, i] = eeg_mpc[p, q]
    return Image



    # print(EEG_Image(eeg)[:,:,0])

In [57]:
def get_train_preprocessed_data(X,Y, pre_process, transformer):
    window_size = 256
    stride = 64
    new_X = []
    new_Y = []

    with concurrent.futures.ProcessPoolExecutor(max_workers = int(os.cpu_count()*0.80)) as executor:
        futures = []
        for i in range(len(X)):
            futures.append(executor.submit(
                pre_process, X[i], Y[i], transformer, window_size, stride))
        for future in concurrent.futures.as_completed(futures):
            new_X.extend(future.result()[0])
            new_Y.extend(future.result()[1])

    return new_X, new_Y


def train_pre_process(X,Y, transformer,  window_size, stride):
    new_X = []
    with open(X, 'rb') as f:
        eeg = np.load(f)
    temp = X.replace(".npy","")
    for start in range(0, eeg.shape[1] - window_size + 1, stride):
        new_X.append(f"{temp}_{start//stride + 1}.npy")
        if os.path.exists(new_X[-1]): continue
        with open(new_X[-1], "wb") as f:
            np.save(f, transformer(eeg[:, start:start + window_size]))
    return new_X, [Y]*len(new_X)

In [58]:
# X_train, Y_train=get_train_preprocessed_data(X, Y, train_pre_process, EEG_Image)

In [59]:
def get_test_preprocessed_data(X, Y, pre_process, transformer):
    window_size = 256
    stride = 64
    new_X = []
    new_Y = []

    with concurrent.futures.ProcessPoolExecutor(max_workers = int(os.cpu_count()*0.80)) as executor:
        futures = []
        for i in range(len(X)):
            futures.append(executor.submit(
                pre_process, X[i], Y[i],transformer, window_size, stride))
        for future in concurrent.futures.as_completed(futures):
            
            new_X.append(future.result()[0])
            new_Y.append(future.result()[1])
    

    return new_X, new_Y


def test_pre_process(X,Y, transformer,  *arg):
    with open(X, 'rb') as f:
        eeg = np.load(f)
    temp = X.replace(".npy","")
    new_X = f"{temp}_test.npy"
    if not os.path.exists(new_X):
        np.save(new_X, transformer(eeg))
    return new_X, Y

In [60]:
# X_test, Y_test=get_test_preprocessed_data(X,Y, test_pre_process, EEG_Image)

In [61]:
# class EEG_Dataset(Dataset):
#     def __init__(self, X, Y, transform=None, target_transform=None):
#         self.X=X
#         self.Y =Y
#         self.transform = transform
#         self.target_transform = target_transform

#     def __len__(self):
#         return len(self.Y)

#     def __getitem__(self, idx):
        
#         with open(self.X[idx], 'rb') as f:
#             image = np.load(f).transpose(2, 0, 1).astype(np.float32)
#         return image, Y[idx] 
    

# def EEG_Dataloader(X, Y,  batch_size = 4):
#     return DataLoader(
#         EEG_Dataset(X, Y, None, None),
#         batch_size=batch_size,
#         shuffle=True,
#         num_workers=20,
#     )

In [62]:

class Train_Dataset(Dataset):

    def __init__(self, X, Y, type):

        
        X, Y = get_train_preprocessed_data(
            X,Y, train_pre_process, EEG_Image)
        self.X = X
        self.Y = [numeric_labels[type][y] for y in Y]

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        image=None
        with open(self.X[idx], 'rb') as f:
            image = np.load(f, allow_pickle=False).transpose(2, 0, 1).astype(np.float32)
            # try:
            #     image = np.load(f, allow_pickle=False).transpose(2, 0, 1).astype(np.float32)
            # except ValueError:
            #     image = np.load(f, allow_pickle=True).transpose(2, 0, 1).astype(np.float32)

        return image, self.Y[idx]
    

    
class Test_Dataset(Dataset):

    def __init__(self, X, Y, type):
        
        X, Y = get_test_preprocessed_data(
            X,Y,test_pre_process, EEG_Image)
        self.X = X
        self.Y = [numeric_labels[type][y] for y in Y]

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        image=None
        with open(self.X[idx], 'rb') as f:
            image = np.load(f, allow_pickle=False).transpose(2, 0, 1).astype(np.float32)
            # try:
            #     image = np.load(f, allow_pickle=False).transpose(2, 0, 1).astype(np.float32)
            # except ValueError:
            #     image = np.load(f, allow_pickle=True).transpose(2, 0, 1).astype(np.float32)

        return image, self.Y[idx]


def Train_Dataloader(X, Y,  type, batch_size = 4):
    return DataLoader(
        Train_Dataset(X, Y, type),
        batch_size=batch_size,
        shuffle=True,
        num_workers=20,
    )
def Test_Dataloader(X, Y,  type, batch_size = 4):
    return DataLoader(
        Test_Dataset(X, Y, type),
        batch_size=batch_size,
        shuffle=True,
        num_workers=20,
    )

In [63]:
import copy
import torch.nn as nn
import torch.optim as optim
from torchvision import models
def model_maker(num_classes):
    Jerrin_model = models.vgg19(weights='VGG19_Weights.DEFAULT')
    for param in Jerrin_model.parameters():
        param.requires_grad = False
    Jerrin_model.fc = nn.Sequential(
        nn.Linear(Jerrin_model.classifier[6].in_features, 512),
        nn.RReLU(inplace=True),
        #nn.Dropout(p=0.3),
        nn.Linear(512, 64),
        nn.RReLU(inplace=True),
        #nn.Dropout(p=0.3),
        nn.Linear(64, num_classes)
    )
    Jerrin_model.classifier[6]=Jerrin_model.fc
    # return nn.DataParallel(Jerrin_model)
    return Jerrin_model


def train_model(model, train_loader, val_loader, device, max_epoc=100, patience=30):

    best_model_wts = copy.deepcopy(model.state_dict())
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-4)
    loss = nn.CrossEntropyLoss() #reduction = 'sum'
    best_loss = 1
    temp_patience = patience
    for ep in range(max_epoc):

        training_loss = 0
        correct = 0
        model.train()
        for X, Y in train_loader:
            X, Y = X.to(device), Y.to(device)
            
            optimizer.zero_grad()
            pred = model(X)
            loss_batch = loss(pred, Y)
            correct += (pred.argmax(dim=1) == Y).sum().item()
            training_loss += loss_batch.item()
            loss_batch.backward()
            optimizer.step()

        training_loss = training_loss / len(train_loader.dataset)
        training_acc = correct / len(train_loader.dataset)

        val_loss = 0
        correct = 0
        model.eval()
        for X, Y in val_loader:
            X, Y = X.to(device), Y.to(device)
            pred = model(X)
            loss_batch = loss(pred, Y)
            correct += (pred.argmax(dim=1) == Y).sum().item()
            val_loss += loss_batch.item()

        validation_acc = correct / len(val_loader.dataset)
        validation_loss = val_loss / len(val_loader.dataset)

        if ep % 5 == 0 or ep == (max_epoc - 1):
            print(f'\t epoch:{ep}, T.acc:{training_acc*100:.3f}, V.acc:{validation_acc*100:.3f}')
            print(f'\t\t T.loss:{training_loss:.5f}, V.loss:{validation_loss:.5f}')


        if validation_loss > best_loss :
            patience -= 1
            if patience <= 0:
                print('Early stopping :(')
                print(f'\t epoch:{ep}, T.acc:{training_acc*100:.3f}, V.acc:{validation_acc*100:.3f}')
                print(f'\t\t T.loss:{training_loss:.5f}, V.loss:{validation_loss:.5f}')
                break
        else:
            best_loss = validation_loss
            patience = temp_patience
            best_model_wts = copy.deepcopy(model.state_dict())

    model.load_state_dict(best_model_wts)
    return model


def test_model(model, test_loader, device):
    model.to(device)
    model.eval()
    correct = 0
    loss = nn.CrossEntropyLoss()
    total_loss = 0
    for X, Y in test_loader:
        X, Y = X.to(device), Y.to(device)
        pred = model(X)
        total_loss += loss(pred, Y) * X.size(0)
        correct += (pred.argmax(dim=1) == Y).sum().item()

    total_loss = total_loss / len(test_loader.dataset)
    accuracy = correct / len(test_loader.dataset)
    print(f'Test_fold: Tloss: {total_loss:.5f}, Tacc: {accuracy*100:.3f}')
    return accuracy

In [64]:
def K_fold_evaluation(type,subject_no, batch_size, random_seed):
    X,Y=load_EEG(type, subject_no)
    kfolds=10
    device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    torch.cuda.set_device(2)
    skf = StratifiedKFold(
        n_splits=10, random_state=random_seed, shuffle=True)

    score = [0]*kfolds
    for i, (train_index, test_index) in enumerate(skf.split(X, Y)):
        print(f"------------------------fold {i}------------------------")
        # val1=[y for y in Y[train_index] if y=='cooperate'] 
        # val2=[y for y in Y[train_index] if y=='independent']
        # print(len(val1))
        # print(len(val2))
        train_X, val_X, train_Y, val_Y = train_test_split(X[train_index], Y[train_index], test_size=0.15, stratify= Y[train_index], random_state=random_seed)
        test_X, test_Y = X[test_index], Y[test_index]   
        train_loader = Train_Dataloader(train_X, train_Y, type, batch_size)
        val_loader = Train_Dataloader(val_X, val_Y, type, batch_size)
        test_loader = Test_Dataloader(test_X, test_Y, type )
        model = model_maker(len(numeric_labels[type]))
        model = train_model(model, train_loader, val_loader, device, max_epoc=150)
        score[i] = test_model(model, test_loader, device)
        del model
        torch.cuda.empty_cache() 
    return np.mean(score),np.std(score)
    


In [65]:
#print(torch.cuda.current_device())


In [66]:
if __name__ == "__main__":
    type="Long_words"
    subject_no=2
    batch_size=16
    random_seed=123
    mean, variance = K_fold_evaluation(type, subject_no, batch_size, random_seed)
    print(f"{subject_no} K-fold mean: ", mean*100)
    print(f"{subject_no} K-fold variance: ", variance*100)

------------------------fold 0------------------------
	 epoch:0, T.acc:51.826, V.acc:49.020
		 T.loss:0.04375, V.loss:0.04418
	 epoch:5, T.acc:55.325, V.acc:48.802
		 T.loss:0.04268, V.loss:0.04438
	 epoch:10, T.acc:58.439, V.acc:49.020
		 T.loss:0.04203, V.loss:0.04495
	 epoch:15, T.acc:57.978, V.acc:49.237
		 T.loss:0.04180, V.loss:0.04520
	 epoch:20, T.acc:58.862, V.acc:48.584
		 T.loss:0.04139, V.loss:0.04524
	 epoch:25, T.acc:61.246, V.acc:51.416
		 T.loss:0.04102, V.loss:0.04522
	 epoch:30, T.acc:61.246, V.acc:49.237
		 T.loss:0.04053, V.loss:0.04586
Early stopping :(
	 epoch:32, T.acc:62.015, V.acc:51.634
		 T.loss:0.03999, V.loss:0.04485
Test_fold: Tloss: 0.72998, Tacc: 35.000
------------------------fold 1------------------------
	 epoch:0, T.acc:53.095, V.acc:47.930
		 T.loss:0.04343, V.loss:0.04420
	 epoch:5, T.acc:57.939, V.acc:52.288
		 T.loss:0.04245, V.loss:0.04435
	 epoch:10, T.acc:57.478, V.acc:50.327
		 T.loss:0.04194, V.loss:0.04451
	 epoch:15, T.acc:60.131, V.acc:4

In [67]:
# with open('/home/tseringj/final_project/Long_Words/temp_files/2/cooperate_53_1.npy', 'rb') as f:
#     data=np.load(f,).transpose(2,0,1).astype(np.float32)