In [188]:
import os
import mne
import torch
import pickle
import torchaudio
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset
import import_ipynb
from utils import load_eeg_data
from utils import load_eye_data
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

## Utils

In [189]:
def extract_session(string):
    parts = string.split("_")
    return parts[1]

In [190]:
def remove_substring(string, substring):
    if substring in string:
        index = string.index(substring)
        string = string[:index] + string[index+len(substring):]
    return string

In [191]:
def drop_channels(eeg_raw, useless_ch):
    ch_names = eeg_raw.ch_names

    # drop non-used channels
    eeg_raw.drop_channels(useless_ch)
    new_ch = eeg_raw.ch_names
    

In [192]:
# label_dict = {0:'Disgust', 1:'Fear', 2:'Sad', 3:'Neutral', 4:'Happy'}
def load_eeg_data(eeg_dir, file_name, print_=True):
    eeg_data_pickle = np.load( os.path.join(eeg_dir, file_name) )
    print(eeg_data_pickle)
    data = pickle.loads(eeg_data_pickle['data'])
    label = pickle.loads(eeg_data_pickle['label'])
    label_dict = {0:'Disgust', 1:'Fear', 2:'Sad', 3:'Neutral', 4:'Happy'}
    if(print_ == True):
        for i in range(45):
            print(f"\tSession {i//15+1} - Clip #{i%15+1} ----> {label_dict[label[i][0]]}")
    return data, label

## Test Data

In [193]:
'''
    EEG Test Data
        data, labels
'''

#eye_movement_features = './dataset/Eye_movement_features'
eeg_features = './dataset/EEG_DE_features'

eeg_dir = eeg_features
#eye_dir = eye_movement_features
test_eeg_file_list = os.listdir(eeg_dir)
test_eeg_file_list.sort()

res_dir = './res/cv3/'
if not os.path.exists(res_dir):
    os.makedirs(res_dir)
cv = 1

# subjectID_sessionID.npz
for f_id in test_eeg_file_list:
    data, labels = load_eeg_data(eeg_dir, f_id, False)

<numpy.lib.npyio.NpzFile object at 0x0000027DB8CC1AD0>


## Train Data

#### _EEG Data_

In [194]:
'''
    EEG Train Data
        train_data
'''

eeg_raw_data = './dataset/EEG_raw'
# eye_raw = './dataset/Eye_raw'

eeg_dir = eeg_raw_data
# eye_dir = eye_raw
train_eeg_file_list = os.listdir(eeg_dir)
train_eeg_file_list.sort()

res_dir = './res/cv3/'
if not os.path.exists(res_dir):
    os.makedirs(res_dir)


useless_ch = ['M1', 'M2', 'VEO', 'HEO']
for f_id in train_eeg_file_list: # train_eye_file_list
    print(f_id)
    eeg_raw = mne.io.read_raw_cnt(f'./dataset/EEG_raw/{f_id}')

    drop_channels(eeg_raw, useless_ch)
    eeg_raw.plot(start=0, duration=5, scalings='auto', n_channels=62)
    image_name = remove_substring(f_id, ".cnt")
    plt.savefig(f'./images/all_channels_data/{image_name}.png', format='png', dpi=300)


1_1_20180804.cnt


#### _Eye Data_

In [195]:
'''
    EEG Train Data
        train_data
'''

eye_raw = './dataset/Eye_raw'

eye_dir = eye_raw
train_eye_file_list = os.listdir(eye_dir)
train_eye_file_list.sort()

res_dir = './res/cv3/'
if not os.path.exists(res_dir):
    os.makedirs(res_dir)

for session_dir in train_eye_file_list:
    session_path = os.path.join(eye_dir, session_dir)
    print(f"Processing {session_dir}...")
    for file_name in os.listdir(session_path):
        print("\t"+file_name)


Processing Session_1...
	10_1_20180507.xlsx
	11_1_20180510.xlsx
	12_1_20180515.xlsx
	13_1_20180720.xlsx
	14_1_20180420.xlsx
	15_1_20180724.xlsx
	16_1_20180805.xlsx
	1_1_20180804.xlsx
	2_1_20180416.xlsx
	3_1_20180414.xlsx
	4_1_20180414.xlsx
	5_1_20180719.xlsx
	6_1_20180713.xlsx
	7_1_20180401.xlsx
	8_1_20180717.xlsx
	9_1_20180724.xlsx
Processing Session_2...
	10_2_20180524.xlsx
	11_2_20180508.xlsx
	12_2_20180508.xlsx
	13_2_20180806.xlsx
	14_2_20180423.xlsx
	15_2_20180807.xlsx
	16_2_20180815.xlsx
	1_2_20180810.xlsx
	2_2_20180419.xlsx
	3_2_20180419.xlsx
	4_2_20180417.xlsx
	5_2_20180728.xlsx
	6_2_20180731.xlsx
	7_2_20180418.xlsx
	8_2_20180802.xlsx
	9_2_20180804.xlsx
Processing Session_3...
	10_3_20180626.xlsx
	11_3_20180522.xlsx
	12_3_20180517.xlsx
	13_3_20180725.xlsx
	14_3_20180427.xlsx
	15_3_20180730.xlsx
	16_3_20180813.xlsx
	1_3_20180808.xlsx
	2_3_20180425.xlsx
	3_3_20180424.xlsx
	4_3_20180501.xlsx
	5_3_20180723.xlsx
	6_3_20180802.xlsx
	7_3_20180422.xlsx
	8_3_20180726.xlsx
	9_3_20180728.

### Extract train EEG data

In [196]:
train_data = eeg_raw.get_data()
print(len(train_data[0]))

3378360


## Pre-processing Data

In [197]:
class myDataset(Dataset):
    def __init__(self, eeg_raw, eye_raw):
        self.eeg_data = mne.io.read_raw_cnt(eeg_raw)
        self.eye_data = pd.read_excel(eye_raw)

    def __len__(self):
        return min(len(self.eeg_data), len(self.eye_data))

    def __getitem__(self, index):
        eeg_channel_name = 'FP1'  
        eeg_signal = self.eeg_data[eeg_channel_name][0][0]

        eye_signal = self.eye_data.iloc[index, 0]

        eeg_signal_tensor = torch.tensor(eeg_signal)
        eye_signal_tensor = torch.tensor(eye_signal)

        eeg_mean = torch.mean(eeg_signal_tensor)
        eeg_std = torch.std(eeg_signal_tensor)
        eye_mean = torch.mean(eye_signal_tensor)
        eye_std = torch.std(eye_signal_tensor)

        # Normalize the signal data
        eeg_signal_tensor = (eeg_signal_tensor - eeg_mean) / eeg_std
        eye_signal_tensor = (eye_signal_tensor - eye_mean) / eye_std

        return eeg_signal_tensor, eeg_mean, eeg_std, eye_signal_tensor, eye_mean, eye_std

In [198]:
for train_eeg_file in train_eeg_file_list:
    session_number = extract_session(train_eeg_file)
    train_eye_file = f'./dataset/Eye_raw/Session_{session_number}/{remove_substring(train_eeg_file,".cnt")+".xlsx"}'
    train_eeg_file = f'./dataset/EEG_raw/{train_eeg_file}'
    print(train_eeg_file,train_eye_file)

    ''' DataLoader '''
    # Create a dataset from the EEG and eye files
    dataset = myDataset(train_eeg_file, train_eye_file)
    # Create a data loader for the dataset
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=True)

    print(dataset[0])

./dataset/EEG_raw/1_1_20180804.cnt ./dataset/Eye_raw/Session_1/1_1_20180804.xlsx
(tensor([-0.8972, -0.8723, -0.9047,  ...,  3.2437,  3.0493,  2.8220],
       dtype=torch.float64), tensor(1.4153e-08, dtype=torch.float64), tensor(5.5059e-05, dtype=torch.float64), tensor(nan, dtype=torch.float64), tensor(432., dtype=torch.float64), tensor(nan, dtype=torch.float64))


In [199]:
print(len(dataset[0]))

6


In [200]:
import numpy as np
from torch.utils.data import DataLoader, SubsetRandomSampler

# Define the train, validation, and test splits (e.g. 70% train, 20% validation, 10% test)
train_split = 0.7
val_split = 0.2
test_split = 0.1

# Shuffle the indices of the dataset
dataset_size = len(dataset)
indices = list(range(dataset_size))
np.random.shuffle(indices)

# Compute the split indices
train_split_idx = int(np.floor(train_split * dataset_size))
val_split_idx = int(np.floor((train_split + val_split) * dataset_size))

# Define the samplers for each subset
train_sampler = SubsetRandomSampler(indices[:train_split_idx])
val_sampler = SubsetRandomSampler(indices[train_split_idx:val_split_idx])
test_sampler = SubsetRandomSampler(indices[val_split_idx:])

# Create the data loaders for each subset
test_loader = DataLoader(dataset, batch_size=32, sampler=train_sampler)
val_loader = DataLoader(dataset, batch_size=32, sampler=val_sampler)
train_loader = DataLoader(dataset, batch_size=32, sampler=test_sampler)


print(len(test_loader))
print(len(val_loader))
print(len(test_loader))

4
1
4


In [201]:
class LSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
        super(LSTM, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_()
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_()
        out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))
        out = self.fc(out[:, -1, :])
        return out


In [202]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = LSTM(input_dim=1, hidden_dim=100, num_layers=1, output_dim=1).to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

num_epochs = 100
for epoch in range(num_epochs):
    for i, (inputs, labels) in enumerate(test_loader):
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i+1) % 10 == 0:
            print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, i+1, len(test_loader), loss.item()))


KeyboardInterrupt: 

In [None]:
with torch.no_grad():
    correct = 0
    total = 0
    for inputs, labels in valid_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the validation set: {} %'.format(100 * correct / total))


In [None]:
with torch.no_grad():
    correct = 0
    total = 0
    for inputs, labels in test_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the test set: {} %'.format(100 * correct / total))
