<a href="https://colab.research.google.com/github/joshuaghannan/ECEC247_Project/blob/master/Updated_pipeline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Setup

In [0]:
import numpy as np
import matplotlib.pyplot as plt
import time
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import scipy.signal as sig
import pywt
from sklearn.decomposition import FastICA

### Set up the Device

In [2]:
# torch.cuda.is_available() checks and returns a Boolean True if a GPU is available, else it'll return False
is_cuda = torch.cuda.is_available()

# If we have a GPU available, we'll set our device to GPU. We'll use this device variable later in our code.
if is_cuda:
    device = torch.device("cuda")
    # device = torch.device("cuda:1") # For Yiming 
    print("GPU is available")
else:
    device = torch.device("cpu")
    print("GPU not available, CPU used")

GPU not available, CPU used


### If Using Colab

In [3]:
########################################################

# If running with Google Colab

from google.colab import drive
drive.mount('/content/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/gdrive


In [4]:
########################################################

# If running with Google Colab
# Create a folder "C247" and then store the project datasets within that folder
# Check that your datasets are setup correctly

!ls "/content/gdrive/My Drive/C247" # File path

cwt_into_window_tests	__pycache__	     X_train_ICA10.npy
Eden_tests1.ipynb	X_test_filtered.npy  X_train_ICA15.npy
Eden_tests2.ipynb	X_test_ICA10.npy     X_train_ICA5.npy
EEG_loading.ipynb	X_test_ICA15.npy     X_train_ICA.npy
FinalProject		X_test_ICA5.npy      X_train_valid.npy
person_test.npy		X_test_ICA.npy	     y_test.npy
person_train_valid.npy	X_test.npy	     y_train_valid.npy


### Load the Datasets

In [5]:
# X_test = np.load("X_test.npy")
# y_test = np.load("y_test.npy")
# person_train_valid = np.load("person_train_valid.npy")
# X_train_valid = np.load("X_train_valid.npy")
# y_train_valid = np.load("y_train_valid.npy")
# person_test = np.load("person_test.npy")

# Change if your directory is different

# dataset_path = './data/' # Yiming Path
dataset_path = "/content/gdrive/My Drive/C247/" 

X_test = np.load(dataset_path + "X_test.npy")
y_test = np.load(dataset_path + "y_test.npy")
person_train_valid = np.load(dataset_path + "person_train_valid.npy")
X_train_valid = np.load(dataset_path + "X_train_valid.npy")
y_train_valid = np.load(dataset_path + "y_train_valid.npy")
person_test = np.load(dataset_path + "person_test.npy")
print ('Training/Valid data shape: {}'.format(X_train_valid.shape))
print ('Test data shape: {}'.format(X_test.shape))
print ('Training/Valid target shape: {}'.format(y_train_valid.shape))
print ('Test target shape: {}'.format(y_test.shape))
print ('Person train/valid shape: {}'.format(person_train_valid.shape))
print ('Person test shape: {}'.format(person_test.shape))

Training/Valid data shape: (2115, 22, 1000)
Test data shape: (443, 22, 1000)
Training/Valid target shape: (2115,)
Test target shape: (443,)
Person train/valid shape: (2115, 1)
Person test shape: (443, 1)


# Data Manipulation

### K-Fold

In [0]:
# some major changes here for the Train_Val_Data function
def Train_Val_Data(X_train_valid, y_train_val):
    '''
    split the train_valid into k folds (we fix k = 5 here)
    return: list of index of train data and val data of k folds
    train_fold[i], val_fold[i] is the index for training and validation in the i-th fold 

    '''
    fold_idx = []
    train_fold = []
    val_fold = []
    train_val_num = X_train_valid.shape[0]
    fold_num = int(train_val_num / 5)
    perm = np.random.permutation(train_val_num)
    for k in range(5):
        fold_idx.append(np.arange(k*fold_num, (k+1)*fold_num, 1))
    for k in range(5):
        val_fold.append(fold_idx[k])
        count = 0
        for i in range(5):
            if i != k:
                if count == 0:
                    train_idx = fold_idx[i]
                else:
                    train_idx = np.concatenate((train_idx, fold_idx[i]))
                count += 1
        train_fold.append(train_idx)

    return train_fold, val_fold

### Customized Dataset

In [0]:
class EEG_Dataset(Dataset):
    '''
    use use fold_idx to instantiate different train val datasets for k-fold cross validation

    '''
    def __init__ (self, X_train=None, y_train=None, p_train=None, X_val=None, y_val=None, p_val=None, X_test=None, y_test=None, p_test=None, mode='train'):
        if mode == 'train':
            self.X = X_train
            self.y = y_train- 769
            self.p = p_train
            
        elif mode == 'val':
            self.X = X_val
            self.y = y_val- 769
            self.p = p_val

        elif mode == 'test':
            self.X = X_test
            self.y = y_test - 769        
            self.p = p_test

    def __len__(self):
        return (self.X.shape[0])
    
    def __getitem__(self, idx):
        '''
        X: (augmented) time sequence 
        y: class label
        p: person id

        '''
        X = torch.from_numpy(self.X[idx,:,:]).float()
        y = torch.tensor(self.y[idx]).long()
        p = torch.tensor(self.p[idx]).long()
        #p = torch.from_numpy(self.p[idx,:]).long()     
        sample = {'X': X, 'y': y, 'p':p}

        return sample

## Data Augmentation Functions

###Center and Whiten Data
Scales and shifts data to have zero mean and variance 1

In [0]:
from sklearn import preprocessing
def scale_data(X):
  #Takes 3-dim X and outputs scaled and shifted X_new with zero mean and var 1
  X_scaled = np.empty_like(X)
  for i in range(X.shape[1]):
    X_scaled[:,i,:] = preprocessing.scale(X[:,i,:])
  return X_scaled

### 1. Window Data

In [0]:
def window_data(X, y, p, window_size, stride):
  '''
  X (a 3-d tensor) of size (#trials, #electrodes, #time series)
  y (#trials,): label 
  p (#trials, 1): person id

  X_new1: The first output stacks the windowed data in a new dimension, resulting 
    in a 4-d tensor of size (#trials x #electrodes x #windows x #window_size).
  X_new2: The second option makes the windows into new trails, resulting in a new
    X tensor of size (#trials*#windows x #electrodes x #window_size). To account 
    for the larger number of trials, we also need to augment the y data.
  y_new: The augmented y vector of size (#trials*#windows) to match X_new2.
  p_new: The augmented p vector of size (#trials*#windows) to match X_new2
 
  '''
  num_sub_trials = int((X.shape[2]-window_size)/stride)
  X_new1 = np.empty([X.shape[0],X.shape[1],num_sub_trials,window_size])
  X_new2 = np.empty([X.shape[0]*num_sub_trials,X.shape[1],window_size])
  y_new = np.empty([X.shape[0]*num_sub_trials])
  p_new = np.empty([X.shape[0]*num_sub_trials])
  for i in range(X.shape[0]):
    for j in range(X.shape[1]):
      for k in range(num_sub_trials):
        X_new1[i,j,k:k+window_size]    = X[i,j,k*stride:k*stride+window_size]
        X_new2[i*num_sub_trials+k,j,:] = X[i,j,k*stride:k*stride+window_size]
        y_new[i*num_sub_trials+k] = y[i]
        p_new[i*num_sub_trials+k] = p[i]
  N, C, NT, T = X_new1.shape
  X_new1 = (X_new1.reshape(N, C*NT, T))
  return X_new1, X_new2, y_new, p_new

### 2. STFT

In [0]:
# Function that computes the short-time fourier transform of the data and returns the spectrogram
def stft_data(X, window, stride):
    '''
    Inputs:
    X - input data, last dimension is one which transform will be taken across.
    window - size of sliding window to take transform across
    stride - stride of sliding window across time-series

    Returns:
    X_STFT - Output data, same shape as input with last dimension replaced with two new dimensions, F x T.
            where F = window//2 + 1 is the frequency axis
            and T = (input_length - window)//stride + 1, similar to the formula for aconvolutional filter.
    t - the corresponding times for the time axis, T
    f - the corresponding frequencies on the frequency axis, F.

    reshape X_STFT (N, C, F, T) to (N, C*F, T) to fit the input of rnn

    Note that a smaller window means only higher frequencies may be found, but give finer time resolution.
    Conversely, a large window gives better frequency resolution, but poor time resolution.

    '''
    noverlap = window-stride
    #print(noverlap)
    if noverlap < 0 :
        print('Stride results in skipped data!')
        return
    f, t, X_STFT = sig.spectrogram(X,nperseg=window,noverlap=noverlap,fs=250, return_onesided=True)
    N, C, F, T = X_STFT.shape
    X_STFT = X_STFT.reshape(N, C*F, T)
    return X_STFT

### 3. CWT

In [0]:
def cwt_data(X, num_levels, top_scale=3):
    '''
    Takes in data, computes CWT using the mexican hat or ricker wavelet using scipy
    Also takes in the top scale parameter.  I use logspace, so scale goes from 1 -> 2^top_scale with num_levels steps.
    Appends to the data a new dimension, of size 'num_levels'
    New dimension corresponds to wavelet content at num_levels different scalings (linear)
    also returns the central frequencies that the scalings correspond to
    input data is N x C X T
    output data is N x C x T x F
    note: CWT is fairly slow to compute

    # EXAMPLE USAGE
    test, freqs = cwt_data(X_train_valid[0:5,:,:],num_levels=75,top_scale=4)
    '''
    scales = np.logspace(start=0,stop=top_scale,num=num_levels)
    out = np.empty((X.shape[0],X.shape[1],X.shape[2],num_levels))
    for i in range(X.shape[0]):
        for j in range(X.shape[1]):
            coef = sig.cwt(X[i,j,:],sig.ricker,scales)
            out[i,j,:] = coef.T
    freqs = pywt.scale2frequency('mexh',scales)*250
    N, C, T, F = out.shape
    X_CWT = np.transpose(out, (0,1,3,2)).reshape(N, C*F, T)
    return X_CWT

In [0]:
def cwt_data2(X, y, p, num_levels, top_scale=3):
    '''
    Takes in data, computes CWT using the mexican hat or ricker wavelet using scipy
    Also takes in the top scale parameter.  I use logspace, so scale goes from 1 -> 2^top_scale with num_levels steps.
    Appends to the data a new dimension, of size 'num_levels'
    New dimension corresponds to wavelet content at num_levels different scalings (linear)
    also returns the central frequencies that the scalings correspond to
    input data is N x C X T
    output data is N x C x T x F
    note: CWT is fairly slow to compute

    # EXAMPLE USAGE
    test, freqs = cwt_data(X_train_valid[0:5,:,:],num_levels=75,top_scale=4)
    '''
    scales = np.logspace(start=0,stop=top_scale,num=num_levels)
    out = np.empty((X.shape[0],X.shape[1],X.shape[2],num_levels))
    for i in range(X.shape[0]):
        for j in range(X.shape[1]):
            coef = sig.cwt(X[i,j,:],sig.ricker,scales)
            out[i,j,:] = coef.T
    freqs = pywt.scale2frequency('mexh',scales)*250
    N, C, T, F = out.shape
    X_cwt = np.transpose(out, (0,3,1,2)).reshape(N*F, C, T)
    y_cwt = np.empty([X.shape[0]*F])
    p_cwt = np.empty([X.shape[0]*F])
    for i in range(X.shape[0]):
      for k in range(F):
        y_cwt[i*F+k] = y[i]
        p_cwt[i*F+k] = p[i]
    return X_cwt, y_cwt, p_cwt, F

### 4. Independent Component Analysis (ICA)

In [0]:
# FUNCTION TO COMPUTE THE ICA OF DATA
def ica_data(X, n_components):
  """
  ICA is sensitive to low-frequency drifts and therefore requires the data to 
  be high-pass filtered prior to fitting. Typically, a cutoff frequency of 1 Hz 
  is recommended.
  """
  #filter data
  bp_filter = sig.butter(4, [30,50], 'bandpass', fs=250, output='sos')
  X_filtered = np.empty((X_train_valid.shape))
  out = np.empty((X.shape[0], n_components, X.shape[-1]))
  X_ica = FastICA(n_components=n_components, algorithm='deflation', whiten=True, max_iter=500, tol=0.001)
  for i in range(X.shape[0]):
    X_filtered[i,:,:] = sig.sosfilt(bp_filter, X_train_valid[i,:,:])
    tstart = time.time()
    out[i,:,:] = X_ica.fit_transform(X[i,:,:].T).T
    tstop = time.time()
    total_time = tstop-tstart
    print('Done processing data sample {}, time: {:<3.2f}'.format(i, total_time))  # Reconstruct signals
  return out

In [0]:
n_components = 22
hp_filter = sig.butter(10, 1, 'hp', fs=250, output='sos')
X_filtered = np.empty((X_train_valid.shape))

for i in range(X_filtered.shape[0]):
    X_filtered[i,:,:] = sig.sosfilt(hp_filter, X_train_valid[i,:,:])


## Define data augmentation wrapper

In [0]:
def Aug_Data(X, y, p, aug_type=None, window_size=200, window_stride=20, stft_size=None, stft_stride=None, cwt_level=None, cwt_scale=None, ica_num=None):
    if aug_type == None:
        X_aug, y_aug, p_aug = X, y, p
    elif aug_type == "window":
        _, X_aug, y_aug, p_aug = window_data(X, y, p, window_size, window_stride)
    elif aug_type == "stft":
        X_aug = stft_data(X, stft_size, stft_stride)
        y_aug, p_aug = y, p
    elif aug_type == 'cwt':
        X_aug = cwt_data(X, cwt_level, cwt_scale)
        y_aug, p_aug = y, p
    elif aug_type == 'cwt2':
        X_aug, y_aug, p_aug = cwt_data2(X, y, p, cwt_level, cwt_scale)
    elif aug_type == 'ica':
        X_aug = ica_data(X, ica_num)
        y_aug, p_aug = y, p
    
    return X_aug, y_aug, p_aug

# Architectures

### Define Basic LSTM

In [0]:
class LSTMnet(nn.Module):
    '''
    Create Basic LSTM:
    2 layers

    TODO: make number of layers, dropout, activation function, regularization all params
    see ex: https://blog.floydhub.com/gru-with-pytorch/
    '''

    def __init__(self, input_size, hidden_size, output_dim, dropout):
        super(LSTMnet, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_dim = output_dim
        self.rnn = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=2, dropout=dropout)
        self.fc = nn.Linear(hidden_size, output_dim)
    
    def forward(self, x, h=None):
        if type(h) == type(None):
            out, hn = self.rnn(x)
        else:
            out, hn = self.rnn(x, h.detach())
        out = self.fc(out[-1, :, :])
        return out

### Define Basic GRU

In [0]:
class GRUnet(nn.Module):
    '''
    Create Basic GRU:
    2 layers

    TODO: make number of layers, dropout, activation function, regularization all params
    see ex: https://blog.floydhub.com/gru-with-pytorch/
    '''

    def __init__(self, input_size, hidden_size, output_dim, dropout):
        super(GRUnet, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_dim = output_dim
        self.rnn = nn.GRU(input_size=input_size, hidden_size=hidden_size, num_layers=2, dropout=dropout)
        self.fc = nn.Linear(hidden_size, output_dim)
    
    def forward(self, x, h=None):
        if type(h) == type(None):
            out, hn = self.rnn(x)
        else:
            out, hn = self.rnn(x, h.detach())
        out = self.fc(out[-1, :, :])
        return out

# RNN Initialization

In [0]:
def InitRNN(rnn_type="LSTM", input_size=22, hidden_size=50, output_dim=4, dropout=0.5, lr=1e-3):
    '''
    Function to initialize RNN
    
    input: RNN type(LSTM, GRU), and other params if neccessary (regularization, acitvation, dropout, num layers, etc.)

    output: model, criterion, optimizer

    TODO: Eventually should also take in params such as dropout, number of layers, and activation function(s), etc.
    '''

    if rnn_type=="LSTM":
        model = LSTMnet(input_size=input_size, hidden_size=hidden_size, output_dim=output_dim, dropout=dropout).to(device)

    elif rnn_type=="GRU":
        model = GRUnet(input_size=input_size, hidden_size=hidden_size, output_dim=output_dim, dropout=dropout).to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    return model, criterion, optimizer


### K-Fold Training and Cross Validation

In [0]:
def TrainRNN(trainloader, valloader, num_epochs=20, verbose=True, aug_type=None):
    val_acc_list = []
    for ep in range(num_epochs):
        tstart = time.time()
        running_loss = 0.0
        correct, total = 0, 0
        for idx, batch in enumerate(EEG_trainloader):
            optimizer.zero_grad()
            X = batch['X'].permute(2, 0, 1).to(device)
            y = batch['y'].to(device)
            output = model(X)
            loss = criterion(output, y)
            running_loss += loss.item()
            loss.backward()
            optimizer.step()
            pred = torch.argmax(output, dim=1)
            correct += torch.sum(pred == y).item()
            total += y.shape[0]
        train_acc = correct / total
        train_loss = running_loss
        '''
        The validation need to be customized according to the data augmenation type
        for stft and cwt: they didn't increase the number of trials, we can directly pass the augmented data to the model
        for window: it increase the number of trials, we need to do a voting for different subsequences in one trial
        
        '''
        if aug_type == 'window':
            correct, total = 0, 0
            for idx, batch in enumerate(EEG_valloader):
                X = batch['X'].permute(2, 0, 1).to(device)
                y = batch['y'].to(device)
                vote_idx = np.random.choice(1000-window_size, vote_num)
                vote_pred = np.zeros(y.shape[0])
                for i in range(len(vote_idx)):
                    X_sub = X[vote_idx[i]:vote_idx[i]+window_size,:,:]
                    output = model(X_sub)
                    pred = torch.argmax(output, dim=1)
                    if i == 0:
                        vote_matrix = np.asarray(pred.cpu().view(-1, 1))
                    else:
                        vote_matrix = np.hstack((vote_matrix, np.asarray(pred.cpu().view(-1,1))))
                    for row in range(y.shape[0]):
                        vote_pred[row] = np.bincount(vote_matrix[row, :]).argmax()
                vote_pred = torch.from_numpy(vote_pred).long()
                correct += torch.sum(vote_pred == y.cpu()).item()
                total += y.shape[0]
            val_acc = correct / total 
        elif aug_type == 'cwt2':
            correct, total = 0, 0
            for idx, batch in enumerate(EEG_valloader):
                X = batch['X'].permute(2, 0, 1).to(device)
                y = batch['y'].to(device)
                vote_idx = np.random.choice(1000-window_size, vote_num)
                vote_pred = np.zeros(y.shape[0])
                for i in range(len(vote_idx)):
                    X_sub = X[vote_idx[i]:vote_idx[i]+window_size,:,:]
                    output = model(X_sub)
                    pred = torch.argmax(output, dim=1)
                    if i == 0:
                        vote_matrix = np.asarray(pred.cpu().view(-1, 1))
                    else:
                        vote_matrix = np.hstack((vote_matrix, np.asarray(pred.cpu().view(-1,1))))
                    for row in range(y.shape[0]):
                        vote_pred[row] = np.bincount(vote_matrix[row, :]).argmax()
                vote_pred = torch.from_numpy(vote_pred).long()
                correct += torch.sum(vote_pred == y.cpu()).item()
                total += y.shape[0]
            val_acc = correct / total        
        else:
            correct, total = 0, 0
            for idx, batch in enumerate(EEG_valloader):
                X = batch['X'].permute(2, 0, 1).to(device)
                y = batch['y'].to(device)
                output = model(X)                    
                pred = torch.argmax(output, dim=1)
                correct += torch.sum(pred == y.cpu()).item()
                total += y.shape[0]
            val_acc = correct / total
        tend = time.time()
        if verbose:
            print('epoch: {:<3d}    time: {:<3.2f}    loss: {:<3.3f}    train acc: {:<1.3f}    val acc: {:<1.3f}'.format(ep+1, tend - tstart, train_loss, train_acc, val_acc))
        val_acc_list.append(val_acc)
    best_val_acc = max(val_acc_list)
    return best_val_acc

# Pipeline

## 2. Initialize the model

In [0]:
# indicate hyperparameters here
model, criterion, optimizer = InitRNN(rnn_type='LSTM')


# Experiments

##(small) windowed augmentation
Done with small number of data points below

Testing Accuracy: 0.2596

#### Split the data to train and validation

In [0]:
train_fold, val_fold = Train_Val_Data(X_train_valid, y_train_valid)
X_train_valid[train_fold[0]].shape

(1692, 22, 1000)

#### Run the thing

In [0]:
aug_type = 'window'
window_size = 80
vote_num = 8
best_val_acc = 0.0
model, criterion, optimizer = InitRNN(rnn_type='LSTM')
for k in range(1):
    # indicate hyperparameters here
    print ('fold {}'.format(k+1))
    X_train, y_train, p_train = X_train_valid[train_fold[k]], y_train_valid[train_fold[k]], person_train_valid[train_fold[k]]
    X_val, y_val, p_val = X_train_valid[val_fold[k]], y_train_valid[val_fold[k]], person_train_valid[val_fold[k]]
    X_train, y_train, p_train = Aug_Data(X_train[0:500,:,:], y_train[0:500], p_train[0:500], aug_type=aug_type, window_size=window_size, window_stride=vote_num)
    if aug_type != 'window':
        X_val, y_val, p_val = Aug_Data(X_val, y_val, p_val, aug_type=aug_type, window_size=window_size, window_stride=vote_num)
    EEG_trainset = EEG_Dataset(X_train=X_train, y_train=y_train, p_train=p_train, mode='train')
    EEG_trainloader = DataLoader(EEG_trainset, batch_size=128, shuffle=True)
    EEG_valset = EEG_Dataset(X_val=X_val, y_val=y_val, p_val=p_val, mode='val')
    EEG_valloader = DataLoader(EEG_valset, batch_size=128, shuffle=False)
    best_val_acc += TrainRNN(EEG_trainloader, EEG_valloader, aug_type=aug_type) / 5
print ('average best validation accuracy of 5 folds is :{}'.format(best_val_acc))

fold 1
epoch: 1      time: 66.28    loss: 622.894    train acc: 0.267    val acc: 0.293
epoch: 2      time: 67.62    loss: 619.429    train acc: 0.292    val acc: 0.300
epoch: 3      time: 68.17    loss: 616.077    train acc: 0.306    val acc: 0.286
epoch: 4      time: 66.82    loss: 610.361    train acc: 0.324    val acc: 0.310
epoch: 5      time: 67.86    loss: 603.700    train acc: 0.346    val acc: 0.352
epoch: 6      time: 67.16    loss: 589.628    train acc: 0.375    val acc: 0.324
epoch: 7      time: 68.12    loss: 579.751    train acc: 0.392    val acc: 0.364
epoch: 8      time: 67.76    loss: 560.618    train acc: 0.427    val acc: 0.322
average best validation accuracy of 5 folds is :0.07281323877068559


In [0]:
X_test, y_test, p_test = X_test, y_test, person_test
if aug_type == 'window':
    EEG_testset = EEG_Dataset(X_train, y_train, p_train, X_val, y_val, p_val, X_test, y_test, p_test, mode='test')
    EEG_testloader = DataLoader(EEG_testset, batch_size=128, shuffle=False)
    correct, total = 0, 0
    for idx, batch in enumerate(EEG_testloader):
        X = batch['X'].permute(2, 0, 1).to(device)
        y = batch['y'].to(device)
        vote_idx = np.random.choice(1000-window_size, vote_num)
        vote_pred = np.zeros(y.shape[0])
        for i in range(len(vote_idx)):
            X_sub = X[vote_idx[i]:vote_idx[i]+200,:,:]
            output = model(X_sub)
            pred = torch.argmax(output, dim=1)
            if i == 0:
                vote_matrix = np.asarray(pred.cpu().view(-1, 1))
            else:
                vote_matrix = np.hstack((vote_matrix, np.asarray(pred.cpu().view(-1,1))))
            for row in range(y.shape[0]):
                vote_pred[row] = np.bincount(vote_matrix[row, :]).argmax()
        vote_pred = torch.from_numpy(vote_pred).long()
        correct += torch.sum(vote_pred == y.cpu()).item()
        total += y.shape[0]
    test_acc = correct / total 
else:
    X_test, y_test, p_test = Aug_Data(X_test, y_test, p_test)
    EEG_testset = EEG_Dataset(X_test=X_test, y_test=y_test, p_test=p_test, mode='test')
    EEG_testloader = DataLoader(EEG_testset, batch_size=128, shuffle=False)    
    correct, total = 0, 0
    for idx, batch in enumerate(EEG_testloader):
        X = batch['X'].permute(2, 0, 1).to(device)
        y = batch['y'].to(device)
        output = model(X)                    
        pred = torch.argmax(output, dim=1)
        correct += torch.sum(pred == y.cpu()).item()
        total += y.shape[0]
    test_acc = correct / total
print ('Testing Accuracy: {:.4f}'.format(test_acc))


Testing Accuracy: 0.2596


## (small) windowed augmentation w/ whitening and centering
Done with small number of data points below. 

We see whitening and centering helps with overfitting

Testing Accuracy: 0.2460

#### Preprocess data

In [0]:
X_scaled = scale_data(X_train_valid)
train_fold, val_fold = Train_Val_Data(X_train_valid, y_train_valid)
X_train_valid[train_fold[0]].shape

(1692, 22, 1000)

#### Run the thing

In [0]:
aug_type = "window"
window_size = 80
vote_num = 8
best_val_acc = 0.0
for k in range(1):
    # indicate hyperparameters here
    model, criterion, optimizer = InitRNN(rnn_type='LSTM')
    print ('fold {}'.format(k+1))
    X_train, y_train, p_train = X_scaled[train_fold[k]], y_train_valid[train_fold[k]], person_train_valid[train_fold[k]]
    X_val, y_val, p_val = X_scaled[val_fold[k]], y_train_valid[val_fold[k]], person_train_valid[val_fold[k]]
    X_train, y_train, p_train = Aug_Data(X_train[0:500,:,:], y_train[0:500], p_train[0:500], aug_type=aug_type, window_size=window_size, window_stride=vote_num)
    if aug_type != 'window':
        X_val, y_val, p_val = Aug_Data(X_val, y_val, p_val, aug_type=aug_type)
    EEG_trainset = EEG_Dataset(X_train=X_train, y_train=y_train, p_train=p_train, mode='train')
    EEG_trainloader = DataLoader(EEG_trainset, batch_size=128, shuffle=True)
    EEG_valset = EEG_Dataset(X_val=X_val, y_val=y_val, p_val=p_val, mode='val')
    EEG_valloader = DataLoader(EEG_valset, batch_size=128, shuffle=False)
    best_val_acc += TrainRNN(EEG_trainloader, EEG_valloader, aug_type=aug_type)
print ('best validation accuracy is :{}'.format(best_val_acc))

fold 1
epoch: 1      time: 66.87    loss: 622.249    train acc: 0.270    val acc: 0.258
epoch: 2      time: 68.54    loss: 618.756    train acc: 0.286    val acc: 0.296
epoch: 3      time: 67.25    loss: 616.951    train acc: 0.294    val acc: 0.241
epoch: 4      time: 67.12    loss: 620.118    train acc: 0.280    val acc: 0.314
epoch: 5      time: 66.77    loss: 617.386    train acc: 0.292    val acc: 0.298
epoch: 6      time: 67.11    loss: 619.829    train acc: 0.284    val acc: 0.307
epoch: 7      time: 66.42    loss: 616.222    train acc: 0.302    val acc: 0.324
epoch: 8      time: 67.78    loss: 610.287    train acc: 0.315    val acc: 0.322
average best validation accuracy of 5 folds is :0.06477541371158393


In [0]:
X_test, y_test, p_test = X_test, y_test, person_test
if aug_type == 'window':
    EEG_testset = EEG_Dataset(X_train, y_train, p_train, X_val, y_val, p_val, X_test, y_test, p_test, mode='test')
    EEG_testloader = DataLoader(EEG_testset, batch_size=128, shuffle=False)
    correct, total = 0, 0
    for idx, batch in enumerate(EEG_testloader):
        X = batch['X'].permute(2, 0, 1).to(device)
        y = batch['y'].to(device)
        vote_idx = np.random.choice(1000-window_size, vote_num)
        vote_pred = np.zeros(y.shape[0])
        for i in range(len(vote_idx)):
            X_sub = X[vote_idx[i]:vote_idx[i]+200,:,:]
            output = model(X_sub)
            pred = torch.argmax(output, dim=1)
            if i == 0:
                vote_matrix = np.asarray(pred.cpu().view(-1, 1))
            else:
                vote_matrix = np.hstack((vote_matrix, np.asarray(pred.cpu().view(-1,1))))
            for row in range(y.shape[0]):
                vote_pred[row] = np.bincount(vote_matrix[row, :]).argmax()
        vote_pred = torch.from_numpy(vote_pred).long()
        correct += torch.sum(vote_pred == y.cpu()).item()
        total += y.shape[0]
    test_acc = correct / total 
else:
    X_test, y_test, p_test = Aug_Data(X_test, y_test, p_test)
    EEG_testset = EEG_Dataset(X_test=X_test, y_test=y_test, p_test=p_test, mode='test')
    EEG_testloader = DataLoader(EEG_testset, batch_size=128, shuffle=False)    
    correct, total = 0, 0
    for idx, batch in enumerate(EEG_testloader):
        X = batch['X'].permute(2, 0, 1).to(device)
        y = batch['y'].to(device)
        output = model(X)                    
        pred = torch.argmax(output, dim=1)
        correct += torch.sum(pred == y.cpu()).item()
        total += y.shape[0]
    test_acc = correct / total
print ('Testing Accuracy: {:.4f}'.format(test_acc))


Testing Accuracy: 0.2460


## Test windowed 2

### Preprocess data

In [0]:
window_size = 300
stride = 100
X_wind, _, _, _ = window_data(X_train_valid, y_train_valid, person_train_valid, window_size, stride)
train_fold, val_fold = Train_Val_Data(X_train_valid, y_train_valid)
X_wind[val_fold[0]].shape

(423, 154, 300)

### Run the thing

In [0]:
aug_type = None
best_val_acc = 0.0
for k in range(1):
    # indicate hyperparameters here
    model, criterion, optimizer = InitRNN(rnn_type='LSTM', input_size=154)
    print ('fold {}'.format(k+1))
    X_train, y_train, p_train = X_wind[train_fold[k]], y_train_valid[train_fold[k]], person_train_valid[train_fold[k]]
    X_val, y_val, p_val = X_wind[val_fold[k]], y_train_valid[val_fold[k]], person_train_valid[val_fold[k]]
    X_train, y_train, p_train = Aug_Data(X_train, y_train, p_train, aug_type=aug_type)
    if aug_type != 'window':
        X_val, y_val, p_val = Aug_Data(X_val, y_val, p_val, aug_type=aug_type)
    EEG_trainset = EEG_Dataset(X_train=X_train, y_train=y_train, p_train=p_train, mode='train')
    EEG_trainloader = DataLoader(EEG_trainset, batch_size=128, shuffle=True)
    EEG_valset = EEG_Dataset(X_val=X_val, y_val=y_val, p_val=p_val, mode='val')
    EEG_valloader = DataLoader(EEG_valset, batch_size=128, shuffle=False)
    best_val_acc += TrainRNN(EEG_trainloader, EEG_valloader, aug_type=aug_type)
print ('best validation accuracy is :{}'.format(best_val_acc))

fold 1
epoch: 1      time: 36.27    loss: 19.425    train acc: 0.264    val acc: 0.265
epoch: 2      time: 36.57    loss: 19.191    train acc: 0.343    val acc: 0.267
epoch: 3      time: 36.91    loss: 19.019    train acc: 0.363    val acc: 0.288
epoch: 4      time: 38.18    loss: 18.782    train acc: 0.392    val acc: 0.246
epoch: 5      time: 40.53    loss: 18.413    train acc: 0.410    val acc: 0.265
epoch: 6      time: 38.94    loss: 17.822    train acc: 0.439    val acc: 0.286
epoch: 7      time: 17.40    loss: 17.239    train acc: 0.444    val acc: 0.284
epoch: 8      time: 11.74    loss: 16.613    train acc: 0.500    val acc: 0.255
best validation accuracy is :0.28841607565011823


## finding ideal window size for windowed augmentation

In [0]:
aug_type = "window"
#window_size = 50
windows = [500, 300, 100]
vote_num = 50
stride = 10
best_val_acc = 0.0
k = 0
for window_size in windows:
    model, criterion, optimizer = InitRNN(rnn_type='LSTM')
    # indicate hyperparameters here
    print ('window_size {}'.format(window_size))
    X_train, y_train, p_train = X_train_valid[train_fold[k]], y_train_valid[train_fold[k]], person_train_valid[train_fold[k]]
    X_val, y_val, p_val = X_train_valid[val_fold[k]], y_train_valid[val_fold[k]], person_train_valid[val_fold[k]]
    X_train, y_train, p_train = Aug_Data(X_train, y_train, p_train, aug_type=aug_type, window_size=window_size, window_stride=stride)
    if aug_type != 'window':
        X_val, y_val, p_val = Aug_Data(X_val, y_val, p_val, aug_type=aug_type)
    EEG_trainset = EEG_Dataset(X_train=X_train, y_train=y_train, p_train=p_train, mode='train')
    EEG_trainloader = DataLoader(EEG_trainset, batch_size=128, shuffle=True)
    EEG_valset = EEG_Dataset(X_val=X_val, y_val=y_val, p_val=p_val, mode='val')
    EEG_valloader = DataLoader(EEG_valset, batch_size=128, shuffle=False)
    best_val_acc = TrainRNN(EEG_trainloader, EEG_valloader, aug_type=aug_type)
#print ('best validation is :{}'.format(best_val_acc))

window_size 500


KeyboardInterrupt: ignored

##CWT - test for top_scale and num_levels

#### Num Levels

In [0]:
aug_type = "cwt"
levels = [20,25,30]
#num_levels = 5
top_scale = 1
best_val_acc = 0.0
k = 0
    
for num_levels in levels:
    print('num_levels: {}'.format(num_levels))
    # indicate hyperparameters here
    model, criterion, optimizer = InitRNN(rnn_type='LSTM', input_size = 22*num_levels)
    X_train, y_train, p_train = X_train_valid[train_fold[k]], y_train_valid[train_fold[k]], person_train_valid[train_fold[k]]
    X_val, y_val, p_val = X_train_valid[val_fold[k]], y_train_valid[val_fold[k]], person_train_valid[val_fold[k]]
    X_train, y_train, p_train = Aug_Data(X_train, y_train, p_train, aug_type=aug_type, cwt_level=num_levels, cwt_scale=top_scale)
    if aug_type != 'window':
        X_val, y_val, p_val = Aug_Data(X_val, y_val, p_val, aug_type=aug_type, cwt_level=num_levels, cwt_scale=top_scale)
    EEG_trainset = EEG_Dataset(X_train=X_train, y_train=y_train, p_train=p_train, mode='train')
    EEG_trainloader = DataLoader(EEG_trainset, batch_size=128, shuffle=True)
    EEG_valset = EEG_Dataset(X_val=X_val, y_val=y_val, p_val=p_val, mode='val')
    EEG_valloader = DataLoader(EEG_valset, batch_size=128, shuffle=False)
    best_val_acc += TrainRNN(EEG_trainloader, EEG_valloader, aug_type=aug_type) / 5
print ('average best validation accuracy of 5 folds is :{}'.format(best_val_acc))

num_levels: 20
epoch: 1      time: 78.19    loss: 19.425    train acc: 0.246    val acc: 0.262
epoch: 2      time: 78.30    loss: 19.266    train acc: 0.315    val acc: 0.241
epoch: 3      time: 78.08    loss: 19.191    train acc: 0.324    val acc: 0.253
epoch: 4      time: 78.44    loss: 19.128    train acc: 0.326    val acc: 0.272
epoch: 5      time: 79.58    loss: 19.046    train acc: 0.340    val acc: 0.274
epoch: 6      time: 81.94    loss: 18.939    train acc: 0.335    val acc: 0.281
epoch: 7      time: 84.93    loss: 18.885    train acc: 0.353    val acc: 0.236
epoch: 8      time: 87.99    loss: 18.785    train acc: 0.362    val acc: 0.262
num_levels: 25
epoch: 1      time: 88.33    loss: 19.466    train acc: 0.245    val acc: 0.253
epoch: 2      time: 85.95    loss: 19.325    train acc: 0.281    val acc: 0.253
epoch: 3      time: 85.36    loss: 19.266    train acc: 0.315    val acc: 0.248
epoch: 4      time: 86.31    loss: 19.180    train acc: 0.303    val acc: 0.265
epoch: 5  

#### Top Scale

In [0]:
aug_type = "cwt"
#levels = [3,5,10,15]
scales = [0.3, 0.5, 0.8, 1, 1.2]
num_levels = 15
#top_scale = 3
window_size = 200
vote_num = 20
best_val_acc = 0.0
k = 0
    
for top_scale in scales:
    print('top_scale: {}'.format(top_scale))
    # indicate hyperparameters here
    model, criterion, optimizer = InitRNN(rnn_type='LSTM', input_size = 22*num_levels)
    print ('fold {}'.format(k+1))
    X_train, y_train, p_train = X_train_valid[train_fold[k]], y_train_valid[train_fold[k]], person_train_valid[train_fold[k]]
    X_val, y_val, p_val = X_train_valid[val_fold[k]], y_train_valid[val_fold[k]], person_train_valid[val_fold[k]]
    X_train, y_train, p_train = Aug_Data(X_train, y_train, p_train, aug_type=aug_type, cwt_level=num_levels, cwt_scale=top_scale)
    if aug_type != 'window':
        X_val, y_val, p_val = Aug_Data(X_val, y_val, p_val, aug_type=aug_type, cwt_level=num_levels, cwt_scale=top_scale)
    EEG_trainset = EEG_Dataset(X_train=X_train, y_train=y_train, p_train=p_train, mode='train')
    EEG_trainloader = DataLoader(EEG_trainset, batch_size=128, shuffle=True)
    EEG_valset = EEG_Dataset(X_val=X_val, y_val=y_val, p_val=p_val, mode='val')
    EEG_valloader = DataLoader(EEG_valset, batch_size=128, shuffle=False)
    best_val_acc += TrainRNN(EEG_trainloader, EEG_valloader, aug_type=aug_type)
print ('average best train accuracy is :{}'.format(best_val_acc))

top_scale: 0.3
fold 1
epoch: 1      time: 71.97    loss: 19.462    train acc: 0.251    val acc: 0.296
epoch: 2      time: 70.87    loss: 19.338    train acc: 0.287    val acc: 0.260
epoch: 3      time: 71.07    loss: 19.235    train acc: 0.302    val acc: 0.258
epoch: 4      time: 71.22    loss: 19.200    train acc: 0.327    val acc: 0.253
epoch: 5      time: 71.00    loss: 19.122    train acc: 0.335    val acc: 0.220
epoch: 6      time: 72.94    loss: 19.014    train acc: 0.342    val acc: 0.227
epoch: 7      time: 77.08    loss: 18.915    train acc: 0.346    val acc: 0.232
epoch: 8      time: 83.10    loss: 18.854    train acc: 0.346    val acc: 0.274
epoch: 9      time: 89.41    loss: 18.665    train acc: 0.355    val acc: 0.270
epoch: 10     time: 97.63    loss: 18.499    train acc: 0.376    val acc: 0.279
epoch: 11     time: 103.71    loss: 18.301    train acc: 0.399    val acc: 0.272
epoch: 12     time: 114.68    loss: 17.848    train acc: 0.418    val acc: 0.267
epoch: 13     ti

## CWT augmentation

Testing Accuracy: 0.2483

#### Split the data to train and validation

In [0]:
train_fold, val_fold = Train_Val_Data(X_train_valid, y_train_valid)
X_train_valid[train_fold[0]].shape

(1692, 22, 1000)

###Run the thing

In [0]:
aug_type = "cwt"
num_levels = 20
top_scale = 1
best_val_acc = 0.0
k = 0

# indicate hyperparameters here
print('running cwt with num_levels: {}  and top_scale: {}'.format(num_levels, top_scale))
model, criterion, optimizer = InitRNN(rnn_type='LSTM', input_size = 22*num_levels)
X_train, y_train, p_train = X_train_valid[train_fold[k]], y_train_valid[train_fold[k]], person_train_valid[train_fold[k]]
X_val, y_val, p_val = X_train_valid[val_fold[k]], y_train_valid[val_fold[k]], person_train_valid[val_fold[k]]
X_train, y_train, p_train = Aug_Data(X_train, y_train, p_train, aug_type=aug_type, cwt_level=num_levels, cwt_scale=top_scale)
if aug_type != 'window':
    X_val, y_val, p_val = Aug_Data(X_val, y_val, p_val, aug_type=aug_type, cwt_level=num_levels, cwt_scale=top_scale)
EEG_trainset = EEG_Dataset(X_train=X_train, y_train=y_train, p_train=p_train, mode='train')
EEG_trainloader = DataLoader(EEG_trainset, batch_size=128, shuffle=True)
EEG_valset = EEG_Dataset(X_val=X_val, y_val=y_val, p_val=p_val, mode='val')
EEG_valloader = DataLoader(EEG_valset, batch_size=128, shuffle=False)
best_val_acc += TrainRNN(EEG_trainloader, EEG_valloader, aug_type=aug_type)
print ('best validation accuracy is :{}'.format(best_val_acc))

running cwt with num_levels: 20  and top_scale: 1
epoch: 1      time: 77.80    loss: 19.493    train acc: 0.238    val acc: 0.239
epoch: 2      time: 77.93    loss: 19.359    train acc: 0.298    val acc: 0.281
epoch: 3      time: 77.65    loss: 19.243    train acc: 0.313    val acc: 0.267
epoch: 4      time: 79.17    loss: 19.186    train acc: 0.313    val acc: 0.279
epoch: 5      time: 79.83    loss: 19.047    train acc: 0.324    val acc: 0.253
epoch: 6      time: 82.67    loss: 18.973    train acc: 0.341    val acc: 0.262
epoch: 7      time: 89.95    loss: 18.874    train acc: 0.353    val acc: 0.270
epoch: 8      time: 101.65    loss: 18.743    train acc: 0.358    val acc: 0.286
best validation accuracy is :0.2860520094562648


###Test it

In [0]:
X_test, y_test, p_test = X_test, y_test, person_test
if aug_type == 'window':
    EEG_testset = EEG_Dataset(X_train, y_train, p_train, X_val, y_val, p_val, X_test, y_test, p_test, mode='test')
    EEG_testloader = DataLoader(EEG_testset, batch_size=128, shuffle=False)
    correct, total = 0, 0
    for idx, batch in enumerate(EEG_testloader):
        X = batch['X'].permute(2, 0, 1).to(device)
        y = batch['y'].to(device)
        vote_idx = np.random.choice(1000-window_size, vote_num)
        vote_pred = np.zeros(y.shape[0])
        for i in range(len(vote_idx)):
            X_sub = X[vote_idx[i]:vote_idx[i]+200,:,:]
            output = model(X_sub)
            pred = torch.argmax(output, dim=1)
            if i == 0:
                vote_matrix = np.asarray(pred.cpu().view(-1, 1))
            else:
                vote_matrix = np.hstack((vote_matrix, np.asarray(pred.cpu().view(-1,1))))
            for row in range(y.shape[0]):
                vote_pred[row] = np.bincount(vote_matrix[row, :]).argmax()
        vote_pred = torch.from_numpy(vote_pred).long()
        correct += torch.sum(vote_pred == y.cpu()).item()
        total += y.shape[0]
    test_acc = correct / total 
else:
    X_test, y_test, p_test = Aug_Data(X_test, y_test, p_test, aug_type=aug_type, cwt_level=num_levels, cwt_scale=top_scale)
    EEG_testset = EEG_Dataset(X_test=X_test, y_test=y_test, p_test=p_test, mode='test')
    EEG_testloader = DataLoader(EEG_testset, batch_size=128, shuffle=False)    
    correct, total = 0, 0
    for idx, batch in enumerate(EEG_testloader):
        X = batch['X'].permute(2, 0, 1).to(device)
        y = batch['y'].to(device)
        output = model(X)                    
        pred = torch.argmax(output, dim=1)
        correct += torch.sum(pred == y.cpu()).item()
        total += y.shape[0]
    test_acc = correct / total
print ('Testing Accuracy: {:.4f}'.format(test_acc))

Testing Accuracy: 0.2483


## CWT augmentation type 2

Testing Accuracy: 0.2483

#### Split the data to train and validation

In [0]:
train_fold, val_fold = Train_Val_Data(X_train_valid, y_train_valid)
X_train_valid[train_fold[0]].shape

(1692, 22, 1000)

###Run the thing

In [0]:
aug_type = "cwt2"
num_levels = 20
top_scale = 1
best_val_acc = 0.0
k = 0

# indicate hyperparameters here
model, criterion, optimizer = InitRNN(rnn_type='LSTM', input_size = 22)
X_train, y_train, p_train = X_train_valid[train_fold[k]], y_train_valid[train_fold[k]], person_train_valid[train_fold[k]]
X_val, y_val, p_val = X_train_valid[val_fold[k]], y_train_valid[val_fold[k]], person_train_valid[val_fold[k]]

In [0]:
X_train, y_train, p_train, window_size = Aug_Data(X_train, y_train, p_train, aug_type=aug_type, cwt_level=num_levels, cwt_scale=top_scale)

In [0]:
if aug_type != 'window':
    X_val, y_val, p_val, F = Aug_Data(X_val, y_val, p_val, aug_type=aug_type, cwt_level=num_levels, cwt_scale=top_scale)

In [0]:
X_train.shape[0]/X_train_valid.shape[0]

(33840, 22, 1000)

In [0]:
vote_num = 100
print('running cwt2 with num_levels: {}  and top_scale: {}'.format(num_levels, top_scale))
EEG_trainset = EEG_Dataset(X_train=X_train, y_train=y_train, p_train=p_train, mode='train')
EEG_trainloader = DataLoader(EEG_trainset, batch_size=128, shuffle=True)
EEG_valset = EEG_Dataset(X_val=X_val, y_val=y_val, p_val=p_val, mode='val')
EEG_valloader = DataLoader(EEG_valset, batch_size=128, shuffle=False)
best_val_acc += TrainRNN(EEG_trainloader, EEG_valloader, aug_type=aug_type)
print ('best validation accuracy is :{}'.format(best_val_acc))

running cwt2 with num_levels: 20  and top_scale: 1
epoch: 1      time: 1325.99    loss: 363.393    train acc: 0.302    val acc: 0.266
epoch: 2      time: 1572.79    loss: 351.962    train acc: 0.357    val acc: 0.253
epoch: 3      time: 1660.67    loss: 338.832    train acc: 0.402    val acc: 0.288
epoch: 4      time: 1885.56    loss: 323.116    train acc: 0.444    val acc: 0.279
epoch: 5      time: 2168.13    loss: 306.893    train acc: 0.492    val acc: 0.284
epoch: 6      time: 2300.44    loss: 290.499    train acc: 0.524    val acc: 0.292


KeyboardInterrupt: ignored

###Test it

In [0]:
X_test, y_test, p_test = X_test, y_test, person_test
if aug_type == 'window':
    EEG_testset = EEG_Dataset(X_train, y_train, p_train, X_val, y_val, p_val, X_test, y_test, p_test, mode='test')
    EEG_testloader = DataLoader(EEG_testset, batch_size=128, shuffle=False)
    correct, total = 0, 0
    for idx, batch in enumerate(EEG_testloader):
        X = batch['X'].permute(2, 0, 1).to(device)
        y = batch['y'].to(device)
        vote_idx = np.random.choice(1000-window_size, vote_num)
        vote_pred = np.zeros(y.shape[0])
        for i in range(len(vote_idx)):
            X_sub = X[vote_idx[i]:vote_idx[i]+200,:,:]
            output = model(X_sub)
            pred = torch.argmax(output, dim=1)
            if i == 0:
                vote_matrix = np.asarray(pred.cpu().view(-1, 1))
            else:
                vote_matrix = np.hstack((vote_matrix, np.asarray(pred.cpu().view(-1,1))))
            for row in range(y.shape[0]):
                vote_pred[row] = np.bincount(vote_matrix[row, :]).argmax()
        vote_pred = torch.from_numpy(vote_pred).long()
        correct += torch.sum(vote_pred == y.cpu()).item()
        total += y.shape[0]
    test_acc = correct / total 
else:
    X_test, y_test, p_test = Aug_Data(X_test, y_test, p_test, aug_type=aug_type, cwt_level=num_levels, cwt_scale=top_scale)
    EEG_testset = EEG_Dataset(X_test=X_test, y_test=y_test, p_test=p_test, mode='test')
    EEG_testloader = DataLoader(EEG_testset, batch_size=128, shuffle=False)    
    correct, total = 0, 0
    for idx, batch in enumerate(EEG_testloader):
        X = batch['X'].permute(2, 0, 1).to(device)
        y = batch['y'].to(device)
        output = model(X)                    
        pred = torch.argmax(output, dim=1)
        correct += torch.sum(pred == y.cpu()).item()
        total += y.shape[0]
    test_acc = correct / total
print ('Testing Accuracy: {:.4f}'.format(test_acc))

Testing Accuracy: 0.2483


## CWT augmentation followed by windowing



####Preprocess data

In [0]:
num_levels = 20
top_scale = 1
X_cwt = cwt_data(X_train_valid, num_levels, top_scale=top_scale)

KeyboardInterrupt: ignored

In [0]:
train_fold, val_fold = Train_Val_Data(X_cwt, y_train_valid)
X_cwt[train_fold[0]].shape

(1692, 440, 1000)

###Run the thing

In [0]:
aug_type = 'window'
best_val_acc = 0.0
window_size = 200
stride = 100
vote_num = 50
k = 0
    
for k in range(1):
    # indicate hyperparameters here
    model, criterion, optimizer = InitRNN(rnn_type='LSTM', input_size = 22*num_levels)
    print ('fold {}'.format(k+1))
    X_train, y_train, p_train = X_cwt[train_fold[k]], y_train_valid[train_fold[k]], person_train_valid[train_fold[k]]
    X_val, y_val, p_val = X_cwt[val_fold[k]], y_train_valid[val_fold[k]], person_train_valid[val_fold[k]]
    X_train, y_train, p_train = Aug_Data(X_train, y_train, p_train, aug_type=aug_type, window_size=window_size, window_stride=stride)
    if aug_type != 'window':
        X_val, y_val, p_val = Aug_Data(X_val, y_val, p_val, aug_type=aug_type, cwt_level=num_levels, cwt_scale=top_scale)
    EEG_trainset = EEG_Dataset(X_train=X_train, y_train=y_train, p_train=p_train, mode='train')
    EEG_trainloader = DataLoader(EEG_trainset, batch_size=128, shuffle=True)
    EEG_valset = EEG_Dataset(X_val=X_val, y_val=y_val, p_val=p_val, mode='val')
    EEG_valloader = DataLoader(EEG_valset, batch_size=128, shuffle=False)
    best_val_acc += TrainRNN(EEG_trainloader, EEG_valloader, aug_type=aug_type)
print ('best validation is :{}'.format(best_val_acc))

fold 1


###Test it

In [0]:
X_test, y_test, p_test = X_test, y_test, person_test
X_test = cwt_data(X_test, num_levels, top_scale=top_scale)

if aug_type == 'window':
    EEG_testset = EEG_Dataset(X_train, y_train, p_train, X_val, y_val, p_val, X_test, y_test, p_test, mode='test')
    EEG_testloader = DataLoader(EEG_testset, batch_size=128, shuffle=False)
    correct, total = 0, 0
    for idx, batch in enumerate(EEG_testloader):
        X = batch['X'].permute(2, 0, 1).to(device)
        y = batch['y'].to(device)
        vote_idx = np.random.choice(1000-window_size, vote_num)
        vote_pred = np.zeros(y.shape[0])
        for i in range(len(vote_idx)):
            X_sub = X[vote_idx[i]:vote_idx[i]+200,:,:]
            output = model(X_sub)
            pred = torch.argmax(output, dim=1)
            if i == 0:
                vote_matrix = np.asarray(pred.cpu().view(-1, 1))
            else:
                vote_matrix = np.hstack((vote_matrix, np.asarray(pred.cpu().view(-1,1))))
            for row in range(y.shape[0]):
                vote_pred[row] = np.bincount(vote_matrix[row, :]).argmax()
        vote_pred = torch.from_numpy(vote_pred).long()
        correct += torch.sum(vote_pred == y.cpu()).item()
        total += y.shape[0]
    test_acc = correct / total 
else:
    X_test, y_test, p_test = Aug_Data(X_test, y_test, p_test, aug_type=aug_type, cwt_level=num_levels, cwt_scale=top_scale)
    EEG_testset = EEG_Dataset(X_test=X_test, y_test=y_test, p_test=p_test, mode='test')
    EEG_testloader = DataLoader(EEG_testset, batch_size=128, shuffle=False)    
    correct, total = 0, 0
    for idx, batch in enumerate(EEG_testloader):
        X = batch['X'].permute(2, 0, 1).to(device)
        y = batch['y'].to(device)
        output = model(X)                    
        pred = torch.argmax(output, dim=1)
        correct += torch.sum(pred == y.cpu()).item()
        total += y.shape[0]
    test_acc = correct / total
print ('Testing Accuracy: {:.4f}'.format(test_acc))

Testing Accuracy: 0.2483


#ICA

###Preprocess data

In [0]:
X_scaled = scale_data(X_train_valid)
n_components = 10
X_ica5 = ica_data(X_scaled, n_components)

Done processing data sample 0, time: 1.20
Done processing data sample 1, time: 6.38
Done processing data sample 2, time: 0.81
Done processing data sample 3, time: 7.95
Done processing data sample 4, time: 6.29
Done processing data sample 5, time: 6.55
Done processing data sample 6, time: 1.03
Done processing data sample 7, time: 2.25
Done processing data sample 8, time: 9.25
Done processing data sample 9, time: 9.93
Done processing data sample 10, time: 6.65
Done processing data sample 11, time: 7.95
Done processing data sample 12, time: 6.88
Done processing data sample 13, time: 1.07
Done processing data sample 14, time: 3.88
Done processing data sample 15, time: 6.22
Done processing data sample 16, time: 1.72
Done processing data sample 17, time: 0.94
Done processing data sample 18, time: 2.81
Done processing data sample 19, time: 1.01
Done processing data sample 20, time: 2.34
Done processing data sample 21, time: 6.42
Done processing data sample 22, time: 10.87
Done processing data

In [0]:
np.save(dataset_path + "X_train_ICA10.npy", X_ica5)

In [0]:
X_test_scaled = scale_data(X_test)
n_components = 10
X_test_ica5 = ica_data(X_test_scaled, n_components)

Done processing data sample 0, time: 8.78
Done processing data sample 1, time: 14.56
Done processing data sample 2, time: 6.71
Done processing data sample 3, time: 6.54
Done processing data sample 4, time: 8.37
Done processing data sample 5, time: 2.59
Done processing data sample 6, time: 7.93
Done processing data sample 7, time: 1.46
Done processing data sample 8, time: 0.72
Done processing data sample 9, time: 6.90
Done processing data sample 10, time: 5.28
Done processing data sample 11, time: 1.34
Done processing data sample 12, time: 6.99
Done processing data sample 13, time: 12.34
Done processing data sample 14, time: 8.39
Done processing data sample 15, time: 1.27
Done processing data sample 16, time: 17.48
Done processing data sample 17, time: 3.50
Done processing data sample 18, time: 6.57
Done processing data sample 19, time: 1.87
Done processing data sample 20, time: 13.65
Done processing data sample 21, time: 4.64
Done processing data sample 22, time: 18.11
Done processing 

In [0]:
np.save(dataset_path + "X_test_ICA10.npy", X_test_ica5)

####Load the data

In [0]:
X_train_ICA = np.load(dataset_path + "X_train_ICA.npy")
X_test_ICA  = np.load(dataset_path + "X_test_ICA.npy")

In [0]:
train_fold, val_fold = Train_Val_Data(X_train_ICA, y_train_valid)
X_train_ICA[train_fold[0]].shape

(1692, 21, 1000)

###Run the thing

In [0]:
aug_type = 'window'
window_size = 200
stride = 50
vote_num = 50
best_val_acc = 0.0
k = 1
    
# indicate hyperparameters here
model, criterion, optimizer = InitRNN(rnn_type='LSTM', input_size = X_train_ICA5.shape[1])
X_train, y_train, p_train = X_train_ICA5[train_fold[k]], y_train_valid[train_fold[k]], person_train_valid[train_fold[k]]
X_val, y_val, p_val = X_train_ICA5[val_fold[k]], y_train_valid[val_fold[k]], person_train_valid[val_fold[k]]
X_train, y_train, p_train = Aug_Data(X_train, y_train, p_train, aug_type=aug_type, window_size=window_size, window_stride=stride)
if aug_type != 'window':
    X_val, y_val, p_val = Aug_Data(X_val, y_val, p_val, aug_type=aug_type, ica_num=ica_components)
EEG_trainset = EEG_Dataset(X_train=X_train, y_train=y_train, p_train=p_train, mode='train')
EEG_trainloader = DataLoader(EEG_trainset, batch_size=128, shuffle=True)
EEG_valset = EEG_Dataset(X_val=X_val, y_val=y_val, p_val=p_val, mode='val')
EEG_valloader = DataLoader(EEG_valset, batch_size=128, shuffle=False)
best_val_acc = TrainRNN(EEG_trainloader, EEG_valloader, aug_type=aug_type)
print ('final validation accuracy is :{}'.format(best_val_acc))

epoch: 1      time: 481.17    loss: 293.865    train acc: 0.252    val acc: 0.239
epoch: 2      time: 472.12    loss: 293.871    train acc: 0.252    val acc: 0.258
epoch: 3      time: 462.46    loss: 293.858    train acc: 0.254    val acc: 0.267
epoch: 4      time: 452.20    loss: 293.836    train acc: 0.255    val acc: 0.258
epoch: 5      time: 448.28    loss: 293.838    train acc: 0.256    val acc: 0.243
epoch: 6      time: 442.14    loss: 293.850    train acc: 0.255    val acc: 0.258
epoch: 7      time: 442.55    loss: 293.831    train acc: 0.255    val acc: 0.258
epoch: 8      time: 443.44    loss: 293.814    train acc: 0.258    val acc: 0.246
epoch: 9      time: 381.59    loss: 293.822    train acc: 0.254    val acc: 0.258
epoch: 10     time: 336.35    loss: 293.811    train acc: 0.258    val acc: 0.258


KeyboardInterrupt: ignored