In [15]:
import os
import re
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
from scipy import signal
import argparse
from sklearn.model_selection import train_test_split, StratifiedKFold
import concurrent.futures
import scipy.io as sio
import torch
from torch.utils.data import Dataset, DataLoader

In [16]:
#global variable
folder_path = {"Long_words": "/home/tseringj/final_project/Long_Words",
               "Short_Long_words": "/home/tseringj/final_project/Short_Long_words",
               "Short_words": "/home/tseringj/final_project/Short_words",
               "Vowels": "/home/tseringj/final_project/Vowels"}

words_dict = {
    "Long_words": ["cooperate", "independent"],
    "Short_Long_words": ["cooperate", "in"],
    "Short_words": ["out", "in", "up"],
    "Vowels": ["a", "i", "u"]
}

numeric_labels = {
    "Long_words": {"cooperate": 0, "independent": 1},
    "Short_Long_words": {"cooperate": 0, "in": 1},
    "Short_words": {"out": 0, "in": 1, "up": 2},
    "Vowels": {"a": 0, "i": 1, "u": 2}
}

In [17]:
#feature extraction


# retrieves the MPC(Mean Phase Coherance) feature matrix for given EEG 64 channel
def MPC(eeg):
    channels = eeg.shape[0]
    mpc_matrix = np.zeros((channels, channels), dtype=float)

    def MPC_feature(i, j):
        signal_a = np.unwrap(np.angle(signal.hilbert(eeg[i])))
        signal_b = np.unwrap(np.angle(signal.hilbert(eeg[j])))
        phase_diff = np.mean(np.exp((signal_a - signal_b) * -1j))
        return np.absolute(phase_diff)

    for i in range(channels):
        for j in range(channels):
            if i <= j:
                mpc_matrix[i, j] = MPC_feature(i, j)
            else:
                mpc_matrix[i, j] = mpc_matrix[j, i]
    return mpc_matrix


# retrieves the MSC(Magnitude Phase Coherance) feature matrix for given EEG 64 channel
def MSC(eeg):
    channels = eeg.shape[0]
    msc_matrix = np.zeros((channels, channels, 3), dtype=float)
    for i in range(channels):
        for j in range(channels):
            if i <= j:
                temp = signal.coherence(
                    eeg[i], eeg[j], window = signal.windows.hamming(51) , nfft = 256, fs=256)
                t1 = (temp[0] <= 8).astype(bool)
                t2 = (temp[0] <= 13).astype(bool)
                t3 = (temp[0] <= 30).astype(bool)
                t4 = (temp[0] <= 70).astype(bool)
                alpha = np.mean(temp[1][~t1 & t2])
                beta = np.mean(temp[1][~t2 & t3])
                gamma = np.mean(temp[1][~t3 & t4])
                msc_matrix[i,j,0] = alpha
                msc_matrix[i,j,1] = beta
                msc_matrix[i,j,2] = gamma
            else:
                msc_matrix[i, j, 0] = msc_matrix[j, i, 0]
                msc_matrix[i, j, 1] = msc_matrix[j, i, 1]
                msc_matrix[i, j, 2] = msc_matrix[j, i, 2]
    return msc_matrix


# alpha beta gamma filtering for every eeg electrode
def alpha_beta_gamma_extractor(eeg):
    a = signal.butter(10, [8, 13], 'bandpass', fs=256, output='sos')
    b = signal.butter(10, [13, 30], 'bandpass', fs=256, output='sos')
    g = signal.butter(10, [30, 70], 'bandpass', fs=256, output='sos')

    alpha = signal.sosfilt(a, eeg, axis = 1)
    beta = signal.sosfilt(b, eeg, axis = 1)
    gamma = signal.sosfilt(g, eeg, axis = 1)

    return [alpha, beta, gamma]


# reutrn Image form of the eeg from alpha beta gamma bands and MPC and MSC feature matrix
def EEG_Image(eeg, **kwargs):
    eeg_channles = alpha_beta_gamma_extractor(eeg)
    Image = MSC(eeg)
    for i in range(3):
        eeg_mpc = MPC(eeg_channles[i])
        n = eeg_mpc.shape[0]
        for p in range(n):
            Image[p,p,i] = 0
            for q in range(p + 1, n):
                Image[p, q, i] = eeg_mpc[p, q]
    return Image



    # print(EEG_Image(eeg)[:,:,0])

In [18]:
#data preprocessing


matrix_to_load = "eeg_data_wrt_task_rep_no_eog_256Hz_last_beep"

def load_EEG(type, subject_no):
    path = folder_path[type]
    words = words_dict[type]
    for subject_file in os.scandir(path):
        if not (subject_file.is_file() and subject_file.name.endswith('.mat') and
                int(re.search("[0-9]+", subject_file.name).group(0)) == subject_no):
            continue
        mat = sio.loadmat(subject_file.path)[matrix_to_load]
        
        temp = f"{path}/temp_files"
        if not os.path.exists(temp):
            os.mkdir(temp)
        temp = f"{temp}/{subject_no}"

        if not os.path.exists(temp):
            os.mkdir(temp)
        X = []
        Y = []
        for index, eeg in np.ndenumerate(mat):
            temp2 = f"{temp}/{words[index[0]]}_{index[1] + 1}.npy" #storing each trial
            X.append(temp2)
            Y.append(words[index[0]])
            if not os.path.exists(temp2):
                np.save(temp2, eeg)
    return np.array(X), np.array(Y)


def get_train_preprocessed_data(X_test,Y_test, pre_process, transformer):
    window_size = 256
    stride = 64
    new_X = []
    new_Y = []

    with concurrent.futures.ProcessPoolExecutor(max_workers = int(os.cpu_count()*0.80)) as executor:
        futures = []
        for i in range(len(X_test)):
            futures.append(executor.submit(
                pre_process, X_test[i], Y_test[i], transformer, window_size, stride))
        for future in concurrent.futures.as_completed(futures):
            new_X.extend(future.result()[0])
            new_Y.extend(future.result()[1])

    return new_X, new_Y


def get_test_preprocessed_data(X_test, Y_test, pre_process, transformer):
    window_size = 256
    stride = 64
    new_X = []
    new_Y = []

    with concurrent.futures.ProcessPoolExecutor(max_workers = int(os.cpu_count()*0.80)) as executor:
        futures = []
        for i in range(len(X_test)):
            futures.append(executor.submit(
                pre_process, X_test[i], Y_test[i],transformer, window_size, stride))
        for future in concurrent.futures.as_completed(futures):
            new_X.extend(future.result()[0])
            new_Y.extend(future.result()[1])

    return new_X, new_Y


def train_pre_process(X,Y, transformer,  window_size, stride):
    new_X = []
    with open(X, 'rb') as f:
        eeg = np.load(f)
    temp = X.replace(".npy","")
    for start in range(0, eeg.shape[1] - window_size + 1, stride):
        new_X.append(f"{temp}_{start//stride + 1}.npy")
        if os.path.exists(new_X[-1]): continue
        with open(new_X[-1], "wb") as f:
            np.save(f, transformer(eeg[:, start:start + window_size]))
    return new_X, [Y]*len(new_X)


def test_pre_process(X,Y, transformer,  *arg):
    with open(X, 'rb') as f:
        eeg = np.load(f)
    temp = X.replace(".npy","")
    new_X = f"{temp}_test.npy"
    if not os.path.exists(new_X):
        np.save(new_X, transformer(eeg))
    return new_X, Y


In [19]:
y

array(['cooperate', 'cooperate', 'cooperate', 'cooperate', 'cooperate',
       'cooperate', 'cooperate', 'cooperate', 'cooperate', 'cooperate',
       'cooperate', 'cooperate', 'cooperate', 'cooperate', 'cooperate',
       'cooperate', 'cooperate', 'cooperate', 'cooperate', 'cooperate',
       'cooperate', 'cooperate', 'cooperate', 'cooperate', 'cooperate',
       'cooperate', 'cooperate', 'cooperate', 'cooperate', 'cooperate',
       'cooperate', 'cooperate', 'cooperate', 'cooperate', 'cooperate',
       'cooperate', 'cooperate', 'cooperate', 'cooperate', 'cooperate',
       'cooperate', 'cooperate', 'cooperate', 'cooperate', 'cooperate',
       'cooperate', 'cooperate', 'cooperate', 'cooperate', 'cooperate',
       'cooperate', 'cooperate', 'cooperate', 'cooperate', 'cooperate',
       'cooperate', 'cooperate', 'cooperate', 'cooperate', 'cooperate',
       'cooperate', 'cooperate', 'cooperate', 'cooperate', 'cooperate',
       'cooperate', 'cooperate', 'cooperate', 'cooperate', 'coop

In [20]:
datasub2 = sio.loadmat('Long_Words/sub_2b_ch64_l_eog_removed_256Hz.mat')['eeg_data_wrt_task_rep_no_eog_256Hz_last_beep']
datasub3 = sio.loadmat('Long_Words/sub_3b_ch80_l_eog_removed_256Hz.mat')['eeg_data_wrt_task_rep_no_eog_256Hz_last_beep']
datasub6 = sio.loadmat('Long_Words/sub_6_ch64_l_eog_removed_256Hz.mat')['eeg_data_wrt_task_rep_no_eog_256Hz_last_beep']
datasub7 = sio.loadmat('Long_Words/sub_7_ch64_l_eog_removed_256Hz.mat')['eeg_data_wrt_task_rep_no_eog_256Hz_last_beep']
datasub9 = sio.loadmat('Long_Words/sub_9c_ch64_l_eog_removed_256Hz.mat')['eeg_data_wrt_task_rep_no_eog_256Hz_last_beep']
datasub2 = sio.loadmat('Long_Words/sub_11b_ch64_l_eog_removed_256Hz.mat')['eeg_data_wrt_task_rep_no_eog_256Hz_last_beep']

In [21]:
#Data loader


class EEG_Dataset(Dataset):

    def __init__(self, X, Y, type, test = False):
        if test:
            X, Y = get_test_preprocessed_data(
                X,Y,test_pre_process, EEG_Image)
        else:
            X, Y = get_train_preprocessed_data(
                X,Y, train_pre_process, EEG_Image)
        self.X = X
        self.Y = [numeric_labels[type][y] for y in Y]

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        image=None
        try:
            with open(self.X[idx], 'rb') as f:
                image = np.load(f, allow_pickle=False).transpose(2, 0, 1).astype(np.float32)
        except (ValueError, TypeError, OSError):
            try:
                with open(self.X[idx], 'rb') as f:
                    image = np.load(f, allow_pickle=True).transpose(2, 0, 1).astype(np.float32)
            except OSError as e:
                print(f"Failed to load file as pickled object: {e}")
        else:
            print('Successful')
                  

        return image, self.Y[idx]


def EEG_Dataloader(X, Y,  type, batch_size = 4, test = True):
    return DataLoader(
        EEG_Dataset(X, Y, type),
        batch_size=batch_size,
        shuffle=True,
        num_workers=20,
    )

In [22]:
#model function
import copy
import torch.nn as nn
import torch.optim as optim
from torchvision import models


def model_maker(num_classes):
    Jerrin_model = models.resnet18(weights='DEFAULT')
    for param in Jerrin_model.parameters():
        param.requires_grad = False
    Jerrin_model.fc = nn.Sequential(
        nn.Linear(Jerrin_model.fc.in_features, 128),
        nn.RReLU(inplace=True),
        nn.Dropout(p=0.3),
        nn.Linear(128, 64),
        nn.RReLU(inplace=True),
        nn.Dropout(p=0.3),
        nn.Linear(64, num_classes),
        nn.Softmax(dim=1)
    )
    # return nn.DataParallel(Jerrin_model)
    return Jerrin_model


def train_model(model, train_loader, val_loader, device, max_epoc=100, patience=30):

    best_model_wts = copy.deepcopy(model.state_dict())
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-5)
    loss = nn.CrossEntropyLoss(reduction = 'sum')
    best_loss = 1
    temp_patience = patience
    for ep in range(max_epoc):

        training_loss = 0
        correct = 0
        model.train()
        for X, Y in train_loader:
            X, Y = X.to(device), Y.to(device)
            
            optimizer.zero_grad()
            pred = model(X)
            loss_batch = loss(pred, Y)
            correct += (pred.argmax(dim=1) == Y).sum().item()
            training_loss += loss_batch.item()
            loss_batch.backward()
            optimizer.step()

        training_loss = training_loss / len(train_loader.dataset)
        training_acc = correct / len(train_loader.dataset)

        val_loss = 0
        correct = 0
        model.eval()
        for X, Y in val_loader:
            X, Y = X.to(device), Y.to(device)
            pred = model(X)
            loss_batch = loss(pred, Y)
            correct += (pred.argmax(dim=1) == Y).sum().item()
            val_loss += loss_batch.item()

        validation_acc = correct / len(val_loader.dataset)
        validation_loss = val_loss / len(val_loader.dataset)

        if ep % 5 == 0 or ep == (max_epoc - 1):
            print(f'\t epoch:{ep}, T.acc:{training_acc*100:.3f}, V.acc:{validation_acc*100:.3f}')
            print(f'\t\t T.loss:{training_loss:.5f}, V.loss:{validation_loss:.5f}')


        if validation_loss > best_loss :
            patience -= 1
            if patience <= 0:
                print('Early stopping :(')
                print(f'\t epoch:{ep}, T.acc:{training_acc*100:.3f}, V.acc:{validation_acc*100:.3f}')
                print(f'\t\t T.loss:{training_loss:.5f}, V.loss:{validation_loss:.5f}')
                break
        else:
            best_loss = validation_loss
            patience = temp_patience
            best_model_wts = copy.deepcopy(model.state_dict())

    model.load_state_dict(best_model_wts)
    return model


def test_model(model, test_loader, device):
    model.to(device)
    model.eval()
    correct = 0
    loss = nn.CrossEntropyLoss()
    total_loss = 0
    for X, Y in test_loader:
        X, Y = X.to(device), Y.to(device)
        pred = model(X)
        total_loss += loss(pred, Y) * X.size(0)
        correct += (pred.argmax(dim=1) == Y).sum().item()

    total_loss = total_loss / len(test_loader.dataset)
    accuracy = correct / len(test_loader.dataset)
    print(f'Test_fold: Tloss: {total_loss:.5f}, Tacc: {accuracy*100:.3f}')
    return accuracy


In [24]:
#"Long_words"
X,Y=load_EEG("Long_words", 2)
type="Long_words"
random_seed=123
kfolds=10
batch_size=64
device=device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
skf = StratifiedKFold(
    n_splits=10, random_state=random_seed, shuffle=True)

score = [0]*kfolds
for i, (train_index, test_index) in enumerate(skf.split(X, Y)):
    print(f"------------------------fold {i}------------------------")
    print(train_index)
    


------------------------fold 0------------------------
[  1   2   3   4   7   8  10  11  12  13  14  15  16  17  18  19  20  21
  22  23  24  25  26  28  29  30  31  32  33  34  35  36  37  38  39  40
  41  42  43  45  46  47  48  49  50  51  52  53  55  56  57  58  59  60
  61  62  63  64  65  66  67  68  70  71  72  73  75  76  77  78  79  81
  82  83  84  85  86  87  88  89  90  91  92  93  94  95  96  97  98  99
 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
 118 119 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136
 137 138 139 140 142 143 144 145 147 148 150 151 152 153 155 156 157 158
 159 160 161 162 163 164 165 166 167 168 171 172 173 174 175 176 177 178
 179 180 181 182 183 184 185 187 188 189 191 192 194 195 196 197 198 199]
------------------------fold 1------------------------
[  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
  18  19  20  21  23  24  25  26  27  28  29  30  31  32  33  34  35  36
  37  38  40 

In [None]:
# K-fold function



def K_fold_evaluation(type, subj_no, device, kfolds=10, batch_size = 64, random_seed=123):
    X, Y = load_EEG(type, subj_no)

    skf = StratifiedKFold(
        n_splits=10, random_state=random_seed, shuffle=True)

    score = [0]*kfolds
    for i, (train_index, test_index) in enumerate(skf.split(X, Y)):
        print(f"------------------------fold {i}------------------------")
        train_X, val_X, train_Y, val_Y = train_test_split(
            X[train_index], Y[train_index], test_size=0.15, stratify= Y[train_index], random_state=random_seed)
        test_X, test_Y = X[test_index], Y[test_index]

        train_loader = EEG_Dataloader(train_X, train_Y, type, batch_size, test = False)
        val_loader = EEG_Dataloader(val_X, val_Y, type, batch_size, test = False)
        test_loader = EEG_Dataloader(test_X, test_Y, type)

        model = model_maker(len(numeric_labels[type]))
        model = train_model(model, train_loader, val_loader, device, max_epoc=150)
        score[i] = test_model(model, test_loader, device)
        del model
        torch.cuda.empty_cache()  
    return np.mean(score), np.std(score)


if __name__ == "__main__":
    from datetime import datetime

    now = datetime.now()
    dt_string = now.strftime("%d/%m/%Y %H:%M:%S")

    file_path = 'results.txt'
    import sys
    sys.stdout = open(file_path, "a")
    print("\n\n\n\ndate and time =", dt_string)
    parser = argparse.ArgumentParser()
    parser.add_argument('--type', type=str, default='Long_words')
    # parser.add_argument('--subj_no', type=int, default=2)
    parser.add_argument('--device', type=str, default='cuda:0')
    parser.add_argument('--kfolds', type=int, default=10)
    parser.add_argument('--batch_size', type=int, default=64)
    parser.add_argument('--random_seed', type=int, default=123)
    args = parser.parse_args()
    print(args)

    

    # subjects = [2,3,6,7,9,11]
    # for each in subjects:
    #     print()
    #     print("="*70)
    #     mean, variance = K_fold_evaluation(type, each, device, \
    #         kfolds, batch_size, random_seed)
    #     print(f"{each} K-fold mean: ", mean*100)
    #     print(f"{each} K-fold variance: ", variance*100)
    type="Long_words"
    each=2
    device =torch.device("cuda:1")
    kfolds=10
    batch_size=32
    random_seed=123
    mean, variance = K_fold_evaluation(type, each, device, \
        kfolds, batch_size, random_seed)
    print(f"{each} K-fold mean: ", mean*100)
    print(f"{each} K-fold variance: ", variance*100)
    



OSError: Caught OSError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/home/tseringj/.local/lib/python3.8/site-packages/numpy/lib/npyio.py", line 448, in load
    return pickle.load(fid, **pickle_kwargs)
EOFError: Ran out of input

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/home/tseringj/.local/lib/python3.8/site-packages/torch/utils/data/_utils/worker.py", line 302, in _worker_loop
    data = fetcher.fetch(index)
  File "/home/tseringj/.local/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 58, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/home/tseringj/.local/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 58, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/tmp/ipykernel_3193965/3881988263.py", line 21, in __getitem__
    image = np.load(f, allow_pickle=True).transpose(2, 0, 1).astype(np.float32)
  File "/home/tseringj/.local/lib/python3.8/site-packages/numpy/lib/npyio.py", line 450, in load
    raise IOError(
OSError: Failed to interpret file <_io.BufferedReader name='/home/tseringj/final_project/Long_Words/temp_files/2/independent_89_1.npy'> as a pickle
