In [1]:
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

In [2]:
import numpy as np
X_test = np.load("./data/X_test.npy")
y_test = np.load("./data/y_test.npy")
person_train_valid = np.load("./data/person_train_valid.npy")
X_train_valid = np.load("./data/X_train_valid.npy")
y_train_valid = np.load("./data/y_train_valid.npy")
person_test = np.load("./data/person_test.npy")


### Shape of data

In [3]:
print ('Training/Valid data shape: {}'.format(X_train_valid.shape))
print ('Test data shape: {}'.format(X_test.shape))
print ('Training/Valid target shape: {}'.format(y_train_valid.shape))
print ('Test target shape: {}'.format(y_test.shape))
print ('Person train/valid shape: {}'.format(person_train_valid.shape))
print ('Person test shape: {}'.format(person_test.shape))


Training/Valid data shape: (2115, 22, 1000)
Test data shape: (443, 22, 1000)
Training/Valid target shape: (2115,)
Test target shape: (443,)
Person train/valid shape: (2115, 1)
Person test shape: (443, 1)


### Data Augmentation

In [4]:
# -*- coding: utf-8 -*-
"""data_augmentation.py
Automatically generated by Colaboratory.
Original file is located at
    https://colab.research.google.com/drive/1mspvRcDFXus4jLFjUgUPtgrzm95cMxb7
"""

import numpy as np
"""
We will put functions to augment data in here.
Functions in this file:
  window_data
"""

def window_data(X, y, window_size, stride):
  '''
  This function takes in X (a 3-d tensor) of size (#trials x #electrodes x #time 
  series) and y data of size (#trials) and outputs two options for using it. 
  X_new1: The first output stacks the windowed data in a new dimension, resulting 
    in a 4-d tensor of size (#trials x #electrodes x #windows x #window_size).
  X_new2: The second option makes the windows into new trails, resulting in a new
    X tensor of size (#trials*#windows x #electrodes x #window_size). To account 
    for the larger number of trials, we also need to augment the y data.
  y_new:  The augmented y vector of size (#trials*#windows) to match X_new2.
  Some code to visualize what's happening:
  #X_new_wind1, X_new_wind2, Y_new  = window_data(X_train_valid, y_train_valid, 200, 20)
  #print(X_new_wind1.shape)
  #print(X_new_wind2.shape)
  #print(Y_new.shape)
  '''
  num_sub_trials = int((X.shape[2]-window_size)/stride)
  X_new1 = np.empty([X.shape[0],X.shape[1],num_sub_trials,window_size])
  X_new2 = np.empty([X.shape[0]*num_sub_trials,X.shape[1],window_size])
  y_new = np.empty([X.shape[0]*num_sub_trials])
  for i in range(X.shape[0]):
    for j in range(X.shape[1]):
      for k in range(num_sub_trials):
        X_new1[i,j,k:k+window_size]    = X[i,j,k*stride:k*stride+window_size]
        X_new2[i*num_sub_trials+k,j,:] = X[i,j,k*stride:k*stride+window_size]
        y_new[i*num_sub_trials+k] = y[i]
  return X_new1, X_new2, y_new


In [5]:
_, X_new, y_new = window_data(X_train_valid, y_train_valid, window_size=100, stride=100)

In [6]:
X_new.shape
y_new.shape

(19035,)

### PyTorch Dataloader for EEG data





In [7]:
class EEG_Dataset(Dataset):
    def __init__ (self, X, y, p, mode='train'):
        trial_num = X.shape[0]
        trial_idx = np.arange(trial_num)
        np.random.shuffle(trial_idx)
        train_idx = trial_idx[: int(np.floor(0.8*trial_num))]
        val_idx = trial_idx[int(np.ceil(0.8*trial_num)):]
        self.X = X
        self.y = y - 769
        self.p = p
        if mode == 'train':
            self.sample_list = train_idx
        else:
            self.sample_list = val_idx

    def __len__(self):
        return (len(self.sample_list))
    
    def __getitem__(self, idx):
        sample_idx = self.sample_list[idx]
        eeg_seq = torch.from_numpy(self.X[sample_idx,:,:]).float()
        label = torch.tensor(self.y[sample_idx]).long()
        #person_id = torch.from_numpy(self.p[sample_idx,:]).long()
        sample = {'eeg_seq': eeg_seq, 'label': label}

        return sample

In [8]:
EEG_trainset = EEG_Dataset(X_new, y_new, person_train_valid, mode='train')
EEG_trainloader = DataLoader(EEG_trainset, batch_size=128, shuffle=True)
#EEG_testset = EEG_Dataset(mode='test')
#EEG_testloader = DataLoader(EEG_testset, batch_size=128, shuffle=True)

for idx, batch in enumerate(EEG_trainloader):
    print (idx)
    eeg_seq = batch['eeg_seq'].permute(2,0,1)
    print (eeg_seq.size())
    print (batch['label'].size())
    #print (batch['person_id'])



0
torch.Size([100, 128, 22])
torch.Size([128])
1
torch.Size([100, 128, 22])
torch.Size([128])
2
torch.Size([100, 128, 22])
torch.Size([128])
3
torch.Size([100, 128, 22])
torch.Size([128])
4
torch.Size([100, 128, 22])
torch.Size([128])
5
torch.Size([100, 128, 22])
torch.Size([128])
6
torch.Size([100, 128, 22])
torch.Size([128])
7
torch.Size([100, 128, 22])
torch.Size([128])
8
torch.Size([100, 128, 22])
torch.Size([128])
9
torch.Size([100, 128, 22])
torch.Size([128])
10
torch.Size([100, 128, 22])
torch.Size([128])
11
torch.Size([100, 128, 22])
torch.Size([128])
12
torch.Size([100, 128, 22])
torch.Size([128])
13
torch.Size([100, 128, 22])
torch.Size([128])
14
torch.Size([100, 128, 22])
torch.Size([128])
15
torch.Size([100, 128, 22])
torch.Size([128])
16
torch.Size([100, 128, 22])
torch.Size([128])
17
torch.Size([100, 128, 22])
torch.Size([128])
18
torch.Size([100, 128, 22])
torch.Size([128])
19
torch.Size([100, 128, 22])
torch.Size([128])
20
torch.Size([100, 128, 22])
torch.Size([128])
21

### Use LSTM + FC to perform classification

In [9]:
class model(nn.Module):
    def __init__(self, input_size, hidden_size, class_num):
        super().__init__()
        self.rnn = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=1, dropout=0.5)
        self.fc = nn.Linear(hidden_size, class_num)

    def forward(self, x):
        _, (hn, cn) = self.rnn(x)
        out = self.fc(torch.squeeze(hn))
        return out

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#device = torch.device('cpu')
model = model(input_size=22, hidden_size=100, class_num=4).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 1e-3)

EEG_trainset = EEG_Dataset(X_new, y_new, person_train_valid, mode='train')
EEG_trainloader = DataLoader(EEG_trainset, batch_size=128, shuffle=True)
EEG_testset = EEG_Dataset(X_new, y_new, person_train_valid, mode='test')
EEG_testloader = DataLoader(EEG_testset, batch_size=128, shuffle=False)

epoch = 20
for i in range (epoch):
    print ('epoch:{}'.format(i+1))
    running_loss = 0.0
    total = 0
    correct = 0
    for idx, batch in enumerate(EEG_trainloader):
        eeg_seq = batch['eeg_seq'].permute(2,0,1).to(device)
        label = batch['label'].to(device)
        optimizer.zero_grad()
        output= model(eeg_seq)
        loss = criterion(output, label)
        running_loss += loss.item()
        loss.backward()                
        optimizer.step()
        pred = torch.argmax(output, dim=1)
        correct += torch.sum(pred == label).item()
        total += label.shape[0]
    print ('acc:{}'.format(correct/total))
    print ('loss:{}'.format(running_loss))


epoch:1
acc:0.26044129235618596
loss:165.42790460586548
epoch:2
acc:0.28887575518781194
loss:163.9574464559555
epoch:3
acc:0.30194378775939057
loss:163.21649825572968
epoch:4
acc:0.3202653007617547
loss:162.24136555194855
epoch:5
acc:0.3324796427633307
loss:161.41858565807343
epoch:6
acc:0.34581034935644867
loss:160.07163834571838
epoch:7
acc:0.3536905700026267
loss:158.99358248710632
epoch:8
acc:0.3675466246388232
loss:157.64956378936768
epoch:9
acc:0.3798923036511689
loss:155.6817011833191
epoch:10
acc:0.396046755975834
loss:154.0604157447815
epoch:11
acc:0.410428158655109
loss:152.18084239959717
epoch:12
acc:0.42796164959285526
loss:149.54713129997253
epoch:13
acc:0.4420147097452062
loss:147.0814243555069
epoch:14
acc:0.4583004990806409
loss:144.48752653598785
epoch:15
acc:0.4702521670606777
loss:142.18133568763733
epoch:16
acc:0.484896243761492
loss:139.26927208900452
epoch:17
acc:0.507551878119254
loss:135.7285737991333
epoch:18
acc:0.5197662201208301
loss:132.73919993638992
epoch

In [10]:
running_loss = 0.0
total = 0
correct = 0
for idx, batch in enumerate(EEG_testloader):
    eeg_seq = batch['eeg_seq'].permute(2,0,1).to(device)
    label = batch['label'].to(device)
    output= model(eeg_seq)
    pred = torch.argmax(output, dim=1)
    correct += torch.sum(pred == label).item()
    total += label.shape[0]
print ('acc:{}'.format(correct/total))
#print ('loss:{}'.format(running_loss))

acc:0.5098502758077226
