In [28]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader


In [29]:
import numpy as np
X_test = np.load("./data/X_test.npy")
y_test = np.load("./data/y_test.npy")
person_train_valid = np.load("./data/person_train_valid.npy")
X_train_valid = np.load("./data/X_train_valid.npy")
y_train_valid = np.load("./data/y_train_valid.npy")
person_test = np.load("./data/person_test.npy")


### Shape of data

In [30]:
print ('Training/Valid data shape: {}'.format(X_train_valid.shape))
print ('Test data shape: {}'.format(X_test.shape))
print ('Training/Valid target shape: {}'.format(y_train_valid.shape))
print ('Test target shape: {}'.format(y_test.shape))
print ('Person train/valid shape: {}'.format(person_train_valid.shape))
print ('Person test shape: {}'.format(person_test.shape))


Training/Valid data shape: (2115, 22, 1000)
Test data shape: (443, 22, 1000)
Training/Valid target shape: (2115,)
Test target shape: (443,)
Person train/valid shape: (2115, 1)
Person test shape: (443, 1)


### Data Augmentation

In [31]:
# -*- coding: utf-8 -*-
"""data_augmentation.py
Automatically generated by Colaboratory.
Original file is located at
    https://colab.research.google.com/drive/1mspvRcDFXus4jLFjUgUPtgrzm95cMxb7
"""

import numpy as np
"""
We will put functions to augment data in here.
Functions in this file:
  window_data
"""

def window_data(X, y, window_size, stride):
  '''
  This function takes in X (a 3-d tensor) of size (#trials x #electrodes x #time 
  series) and y data of size (#trials) and outputs two options for using it. 
  X_new1: The first output stacks the windowed data in a new dimension, resulting 
    in a 4-d tensor of size (#trials x #electrodes x #windows x #window_size).
  X_new2: The second option makes the windows into new trails, resulting in a new
    X tensor of size (#trials*#windows x #electrodes x #window_size). To account 
    for the larger number of trials, we also need to augment the y data.
  y_new:  The augmented y vector of size (#trials*#windows) to match X_new2.
  Some code to visualize what's happening:
  #X_new_wind1, X_new_wind2, Y_new  = window_data(X_train_valid, y_train_valid, 200, 20)
  #print(X_new_wind1.shape)
  #print(X_new_wind2.shape)
  #print(Y_new.shape)
  '''
  num_sub_trials = int((X.shape[2]-window_size)/stride)
  X_new1 = np.empty([X.shape[0],X.shape[1],num_sub_trials,window_size])
  X_new2 = np.empty([X.shape[0]*num_sub_trials,X.shape[1],window_size])
  y_new = np.empty([X.shape[0]*num_sub_trials])
  for i in range(X.shape[0]):
    for j in range(X.shape[1]):
      for k in range(num_sub_trials):
        X_new1[i,j,k:k+window_size]    = X[i,j,k*stride:k*stride+window_size]
        X_new2[i*num_sub_trials+k,j,:] = X[i,j,k*stride:k*stride+window_size]
        y_new[i*num_sub_trials+k] = y[i]
  return X_new1, X_new2, y_new

_, X_train, y_train = window_data(X_train_valid, y_train_valid, window_size=200, stride=10)


### PyTorch Dataloader for EEG data





In [5]:
# dataset for train/val by splitting the train_valid
'''
class EEG_Dataset(Dataset):
    def __init__ (self, X, y, p, mode='train'):
        trial_num = X.shape[0]
        trial_idx = np.arange(trial_num)
        np.random.shuffle(trial_idx)
        train_idx = trial_idx[: int(np.floor(0.8*trial_num))]
        val_idx = trial_idx[int(np.ceil(0.8*trial_num)):]
        self.X = X
        self.y = y - 769
        self.p = p
        if mode == 'train':
            self.sample_list = train_idx
        else:
            self.sample_list = val_idx

    def __len__(self):
        return (len(self.sample_list))
    
    def __getitem__(self, idx):
        sample_idx = self.sample_list[idx]
        eeg_seq = torch.from_numpy(self.X[sample_idx,:,:]).float()
        label = torch.tensor(self.y[sample_idx]).long()
        #person_id = torch.from_numpy(self.p[sample_idx,:]).long()
        sample = {'eeg_seq': eeg_seq, 'label': label}

        return sample
'''

"\nclass EEG_Dataset(Dataset):\n    def __init__ (self, X, y, p, mode='train'):\n        trial_num = X.shape[0]\n        trial_idx = np.arange(trial_num)\n        np.random.shuffle(trial_idx)\n        train_idx = trial_idx[: int(np.floor(0.8*trial_num))]\n        val_idx = trial_idx[int(np.ceil(0.8*trial_num)):]\n        self.X = X\n        self.y = y - 769\n        self.p = p\n        if mode == 'train':\n            self.sample_list = train_idx\n        else:\n            self.sample_list = val_idx\n\n    def __len__(self):\n        return (len(self.sample_list))\n    \n    def __getitem__(self, idx):\n        sample_idx = self.sample_list[idx]\n        eeg_seq = torch.from_numpy(self.X[sample_idx,:,:]).float()\n        label = torch.tensor(self.y[sample_idx]).long()\n        #person_id = torch.from_numpy(self.p[sample_idx,:]).long()\n        sample = {'eeg_seq': eeg_seq, 'label': label}\n\n        return sample\n"

In [6]:
# dataset for train/test 

class EEG_Dataset(Dataset):
    def __init__ (self, X_train, y_train, X_test, y_test, mode='train'):
        if mode == 'train':
            self.X = X_train
            self.y = y_train - 769
        else:
            self.X = X_test
            self.y = y_test - 769

    def __len__(self):
        return (self.X.shape[0])
    
    def __getitem__(self, idx):
        eeg_seq = torch.from_numpy(self.X[idx,:,:]).float()
        label = torch.tensor(self.y[idx]).long()
        sample = {'eeg_seq': eeg_seq, 'label': label}

        return sample

### Use LSTM + FC to perform classification

In [34]:
class model(nn.Module):
    def __init__(self, input_size, hidden_size, class_num):
        super().__init__()
        self.rnn = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=1, dropout=0.5)
        self.fc = nn.Linear(hidden_size, class_num)

    def forward(self, x):
        _, (hn, cn) = self.rnn(x)
        out = self.fc(torch.squeeze(hn))
        return out

device = torch.device('cuda:4' if torch.cuda.is_available() else 'cpu')
model = model(input_size=22, hidden_size=100, class_num=4).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 1e-3)

EEG_trainset = EEG_Dataset(X_train, y_train, X_test, y_test, mode='train')
EEG_trainloader = DataLoader(EEG_trainset, batch_size=256, shuffle=True)


epoch = 20
for i in range (epoch):
    print ('epoch:{}'.format(i+1))
    running_loss = 0.0
    total = 0
    correct = 0
    for idx, batch in enumerate(EEG_trainloader):
        eeg_seq = batch['eeg_seq'].permute(2,0,1).to(device)
        label = batch['label'].to(device)
        optimizer.zero_grad()
        output= model(eeg_seq)
        loss = criterion(output, label)
        running_loss += loss.item()
        loss.backward()                
        optimizer.step()
        pred = torch.argmax(output, dim=1)
        correct += torch.sum(pred == label).item()
        total += label.shape[0]
    print ('acc:{}'.format(correct/total))
    print ('loss:{}'.format(running_loss))


epoch:1
acc:0.28326241134751773
loss:911.6234918832779
epoch:2
acc:0.31173758865248224
loss:902.1433691978455
epoch:3
acc:0.31880023640661936
loss:899.1044268608093
epoch:4
acc:0.334580378250591
loss:891.2641495466232
epoch:5
acc:0.35709219858156027
loss:879.686817407608
epoch:6
acc:0.37388297872340426
loss:864.8881139755249
epoch:7
acc:0.38479314420803784
loss:857.483899474144
epoch:8
acc:0.4008451536643026
loss:843.9414757490158
epoch:9
acc:0.4170626477541371
loss:826.4971395730972
epoch:10
acc:0.4371985815602837
loss:807.6768835783005
epoch:11
acc:0.4633451536643026
loss:780.1588814258575
epoch:12
acc:0.476725768321513
loss:761.663426399231
epoch:13
acc:0.5035460992907801
loss:731.0705375671387
epoch:14
acc:0.522127659574468
loss:709.492603123188
epoch:15
acc:0.5496808510638298
loss:677.0949718952179
epoch:16
acc:0.5568498817966903
loss:668.4036608934402
epoch:17
acc:0.5729255319148936
loss:648.2337754964828
epoch:18
acc:0.5849231678486998
loss:632.1437944173813
epoch:19
acc:0.60154

In [44]:
EEG_testset = EEG_Dataset(X_train, y_train, X_test, y_test, mode='test')
EEG_testloader = DataLoader(EEG_testset, batch_size=128, shuffle=False)
running_loss = 0.0
total = 0
correct = 0
for idx, batch in enumerate(EEG_testloader):
    eeg_seq = batch['eeg_seq'].permute(2,0,1).to(device)
    label = batch['label'].to(device)
    random_idx = np.random.choice(800, 10)        
    for i in range(len(random_idx)):
        eeg_subseq = eeg_seq[random_idx[i]:random_idx[i]+200,:,:]
        if i == 0:
            output = model(eeg_subseq)
        else:
            output += model(eeg_subseq)
    pred = torch.argmax(output, dim=1)
    correct += torch.sum(pred == label).item()
    total += label.shape[0]
print ('acc:{}'.format(correct/total))
#print ('loss:{}'.format(running_loss))

acc:0.3837471783295711
