In [1]:
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

In [59]:
import numpy as np
X_test = np.load("./data/X_test.npy")
y_test = np.load("./data/y_test.npy")
person_train_valid = np.load("./data/person_train_valid.npy")
X_train_valid = np.load("./data/X_train_valid.npy")
y_train_valid = np.load("./data/y_train_valid.npy")
person_test = np.load("./data/person_test.npy")


### Shape of data

In [60]:
print ('Training/Valid data shape: {}'.format(X_train_valid.shape))
print ('Test data shape: {}'.format(X_test.shape))
print ('Training/Valid target shape: {}'.format(y_train_valid.shape))
print ('Test target shape: {}'.format(y_test.shape))
print ('Person train/valid shape: {}'.format(person_train_valid.shape))
print ('Person test shape: {}'.format(person_test.shape))


Training/Valid data shape: (2115, 22, 1000)
Test data shape: (443, 22, 1000)
Training/Valid target shape: (2115,)
Test target shape: (443,)
Person train/valid shape: (2115, 1)
Person test shape: (443, 1)


### Data Augmentation

In [61]:
# -*- coding: utf-8 -*-
"""data_augmentation.py
Automatically generated by Colaboratory.
Original file is located at
    https://colab.research.google.com/drive/1mspvRcDFXus4jLFjUgUPtgrzm95cMxb7
"""

import numpy as np
"""
We will put functions to augment data in here.
Functions in this file:
  window_data
"""

def window_data(X, y, window_size, stride):
  '''
  This function takes in X (a 3-d tensor) of size (#trials x #electrodes x #time 
  series) and y data of size (#trials) and outputs two options for using it. 
  X_new1: The first output stacks the windowed data in a new dimension, resulting 
    in a 4-d tensor of size (#trials x #electrodes x #windows x #window_size).
  X_new2: The second option makes the windows into new trails, resulting in a new
    X tensor of size (#trials*#windows x #electrodes x #window_size). To account 
    for the larger number of trials, we also need to augment the y data.
  y_new:  The augmented y vector of size (#trials*#windows) to match X_new2.
  Some code to visualize what's happening:
  #X_new_wind1, X_new_wind2, Y_new  = window_data(X_train_valid, y_train_valid, 200, 20)
  #print(X_new_wind1.shape)
  #print(X_new_wind2.shape)
  #print(Y_new.shape)
  '''
  num_sub_trials = int((X.shape[2]-window_size)/stride)
  X_new1 = np.empty([X.shape[0],X.shape[1],num_sub_trials,window_size])
  X_new2 = np.empty([X.shape[0]*num_sub_trials,X.shape[1],window_size])
  y_new = np.empty([X.shape[0]*num_sub_trials])
  for i in range(X.shape[0]):
    for j in range(X.shape[1]):
      for k in range(num_sub_trials):
        X_new1[i,j,k:k+window_size]    = X[i,j,k*stride:k*stride+window_size]
        X_new2[i*num_sub_trials+k,j,:] = X[i,j,k*stride:k*stride+window_size]
        y_new[i*num_sub_trials+k] = y[i]
  return X_new1, X_new2, y_new
  
_, X_train, y_train = window_data(X_train_valid, y_train_valid, window_size=200, stride=100)
_, X_test, y_test = window_data(X_test, y_test, window_size=200, stride=100)

In [62]:
_, X_train, y_train = window_data(X_train_valid, y_train_valid, window_size=200, stride=100)
_, X_test, y_test = window_data(X_test, y_test, window_size=200, stride=100)

### PyTorch Dataloader for EEG data





In [63]:
# dataset for train/val by splitting the train_valid
'''
class EEG_Dataset(Dataset):
    def __init__ (self, X, y, p, mode='train'):
        trial_num = X.shape[0]
        trial_idx = np.arange(trial_num)
        np.random.shuffle(trial_idx)
        train_idx = trial_idx[: int(np.floor(0.8*trial_num))]
        val_idx = trial_idx[int(np.ceil(0.8*trial_num)):]
        self.X = X
        self.y = y - 769
        self.p = p
        if mode == 'train':
            self.sample_list = train_idx
        else:
            self.sample_list = val_idx

    def __len__(self):
        return (len(self.sample_list))
    
    def __getitem__(self, idx):
        sample_idx = self.sample_list[idx]
        eeg_seq = torch.from_numpy(self.X[sample_idx,:,:]).float()
        label = torch.tensor(self.y[sample_idx]).long()
        #person_id = torch.from_numpy(self.p[sample_idx,:]).long()
        sample = {'eeg_seq': eeg_seq, 'label': label}

        return sample
'''

"\nclass EEG_Dataset(Dataset):\n    def __init__ (self, X, y, p, mode='train'):\n        trial_num = X.shape[0]\n        trial_idx = np.arange(trial_num)\n        np.random.shuffle(trial_idx)\n        train_idx = trial_idx[: int(np.floor(0.8*trial_num))]\n        val_idx = trial_idx[int(np.ceil(0.8*trial_num)):]\n        self.X = X\n        self.y = y - 769\n        self.p = p\n        if mode == 'train':\n            self.sample_list = train_idx\n        else:\n            self.sample_list = val_idx\n\n    def __len__(self):\n        return (len(self.sample_list))\n    \n    def __getitem__(self, idx):\n        sample_idx = self.sample_list[idx]\n        eeg_seq = torch.from_numpy(self.X[sample_idx,:,:]).float()\n        label = torch.tensor(self.y[sample_idx]).long()\n        #person_id = torch.from_numpy(self.p[sample_idx,:]).long()\n        sample = {'eeg_seq': eeg_seq, 'label': label}\n\n        return sample\n"

In [64]:
# dataset for train/test 

class EEG_Dataset(Dataset):
    def __init__ (self, X_train, y_train, X_test, y_test, mode='train'):
        if mode == 'train':
            self.X = X_train
            self.y = y_train - 769
        else:
            self.X = X_test
            self.y = y_test - 769

    def __len__(self):
        return (self.X.shape[0])
    
    def __getitem__(self, idx):
        eeg_seq = torch.from_numpy(self.X[idx,:,:]).float()
        label = torch.tensor(self.y[idx]).long()
        sample = {'eeg_seq': eeg_seq, 'label': label}

        return sample

### Use LSTM + FC to perform classification

In [66]:
class model(nn.Module):
    def __init__(self, input_size, hidden_size, class_num):
        super().__init__()
        self.rnn = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=1, dropout=0.5)
        self.fc = nn.Linear(hidden_size, class_num)

    def forward(self, x):
        _, (hn, cn) = self.rnn(x)
        out = self.fc(torch.squeeze(hn))
        return out

device = torch.device('cuda:4' if torch.cuda.is_available() else 'cpu')
model = model(input_size=22, hidden_size=100, class_num=4).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 1e-3)

EEG_trainset = EEG_Dataset(X_train, y_train, X_test, y_test, mode='train')
EEG_trainloader = DataLoader(EEG_trainset, batch_size=128, shuffle=True)


epoch = 20
for i in range (epoch):
    print ('epoch:{}'.format(i+1))
    running_loss = 0.0
    total = 0
    correct = 0
    for idx, batch in enumerate(EEG_trainloader):
        eeg_seq = batch['eeg_seq'].permute(2,0,1).to(device)
        label = batch['label'].to(device)
        optimizer.zero_grad()
        output= model(eeg_seq)
        loss = criterion(output, label)
        running_loss += loss.item()
        loss.backward()                
        optimizer.step()
        pred = torch.argmax(output, dim=1)
        correct += torch.sum(pred == label).item()
        total += label.shape[0]
    print ('acc:{}'.format(correct/total))
    print ('loss:{}'.format(running_loss))


epoch:1
acc:0.2564420803782506
loss:184.8035808801651
epoch:2
acc:0.28481087470449173
loss:183.35879015922546
epoch:3
acc:0.3037825059101655
loss:182.4354122877121
epoch:4
acc:0.31619385342789597
loss:181.39106845855713
epoch:5
acc:0.32505910165484636
loss:180.26608061790466
epoch:6
acc:0.33965721040189123
loss:178.95599675178528
epoch:7
acc:0.35390070921985817
loss:177.92072427272797
epoch:8
acc:0.366548463356974
loss:176.27342224121094
epoch:9
acc:0.3769503546099291
loss:174.62058985233307
epoch:10
acc:0.38894799054373524
loss:173.2044813632965
epoch:11
acc:0.4050236406619385
loss:170.52932906150818
epoch:12
acc:0.42080378250591016
loss:168.2230350971222
epoch:13
acc:0.43504728132387704
loss:166.0195014476776
epoch:14
acc:0.4475177304964539
loss:163.531343460083
epoch:15
acc:0.45839243498817966
loss:160.90161192417145
epoch:16
acc:0.47712765957446807
loss:157.79084074497223
epoch:17
acc:0.49326241134751775
loss:154.91331493854523
epoch:18
acc:0.508274231678487
loss:151.5716318488121


In [67]:
EEG_testset = EEG_Dataset(X_train, y_train, X_test, y_test, mode='test')
EEG_testloader = DataLoader(EEG_testset, batch_size=128, shuffle=False)
running_loss = 0.0
total = 0
correct = 0
for idx, batch in enumerate(EEG_testloader):
    eeg_seq = batch['eeg_seq'].permute(2,0,1).to(device)
    #eeg_seq = eeg_seq[100:200,:,:]
    label = batch['label'].to(device)
    output= model(eeg_seq)
    pred = torch.argmax(output, dim=1)
    correct += torch.sum(pred == label).item()
    total += label.shape[0]
print ('acc:{}'.format(correct/total))
#print ('loss:{}'.format(running_loss))

acc:0.2945823927765237
