In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

import spacy
import numpy as np
import pandas as pd
# import sklearn
import scipy as sp

import random
import math
import time

In [2]:
SEED = 1234
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

#### now let's build a loader for our data.

In [3]:
from aopy import datareader, datafilter
from torch.utils.data.sampler import SequentialSampler, BatchSampler, SubsetRandomSampler
from torch.utils.data import TensorDataset, random_split

In [4]:
# load data
data_file_full_path = '/Volumes/Samsung_T5/aoLab/Data/WirelessData/Goose_Multiscale_M1/180325/001/rec001.LM1_ECOG_3.clfp.dat'
data_in, data_param, data_mask = datareader.load_ecog_clfp_data(data_file_name=data_file_full_path)
srate_in= data_param['srate']
num_ch = data_param['num_ch']

Loading data file:


In [5]:
# re(down)sample data
# srate = 250 # not much signal above 100Hz
# ds_factor = np.intc(np.floor(srate_in/srate)) # decimate does not allow for floats as ds_fac arguments
# data_in = sp.signal.decimate(data_in,ds_factor,axis=-1)

In [6]:
from torch.utils.data import Dataset, DataLoader

class EcogDataset(Dataset):
    def __init__(self, data_in, block_len):
        self.data = data_in
        self.block_len = int(block_len)

    def __len__(self):
        return self.data.shape[0] // self.block_len

    def __getitem__(self, index):
        idx = int(index)
        return self.data[idx:(idx + self.block_len),:] # each call returns a sequence of length `block_len`

In [7]:
# create dataframe
srate = srate_in
# data_in = np.double(data_in[:,:120*srate])
seq_len = 11 # use ten time points to predict the next time point
# dataset = pd.DataFrame(data_in.transpose(),dtype=np.double) # may be unnecessary for now, but df will probably help combine files in the future.
# datareader.load_ecog_clfp_data.get_args()
data_tensor = torch.from_numpy(data_in.transpose())
print(data_tensor.dtype)
dataset = EcogDataset(data_tensor,seq_len) ## make my own Dataset class

torch.float32


In [8]:
train_frac = 0.8
valid_frac = 0.1
test_frac = 0.1
block_time = 20
block_size = block_time*srate
BATCH_SIZE = 3

data_size = np.shape(data_in)[-1]
n_block = np.floor(data_size/block_size)
idx_all = np.arange(data_size)
train_split = int(np.floor(train_frac*data_size))
valid_split = int(np.floor(valid_frac*data_size))
test_split = int(np.floor(test_frac*data_size))
# rework this using
train_idx = idx_all[0:train_split:seq_len]
valid_idx = idx_all[train_split:train_split+valid_split:seq_len] 
test_idx  = idx_all[train_split+valid_split:-1:seq_len]
# print(train_idx, valid_idx)

train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)
test_sampler = SubsetRandomSampler(test_idx)

train_loader = torch.utils.data.DataLoader(dataset, batch_size=BATCH_SIZE,
                                           sampler=train_sampler,
                                           drop_last=True) # this can be avoided using some padding sequence classes, I think
valid_loader = torch.utils.data.DataLoader(dataset, batch_size=BATCH_SIZE,
                                           sampler=valid_sampler,
                                           drop_last=True)
test_loader = torch.utils.data.DataLoader(dataset, batch_size=BATCH_SIZE,
                                          sampler=test_sampler,
                                          drop_last=True)

In [9]:
# if your DataLoader is working correctly, this should be `torch.Size([<seq_len>, <num_ch>])
dataset.__getitem__(next(iter(train_sampler))).shape

torch.Size([11, 62])

#### back to our data (potenially)

In [10]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [11]:
# BATCH_SIZE = 128
# train_iterator, valid_iterator, test_iterator = BucketIterator.splits(
#     (train_data, valid_data, test_data),
#     batch_size = BATCH_SIZE,
#     device = device)

In [12]:
class Encoder_GRU(nn.Module):
    def __init__(self, input_dim, hid_dim, n_layers, dropout):
        super().__init__()
        self.input_dim = input_dim
        self.hid_dim = hid_dim
        self.n_layers = n_layers
        
        # gated recurrent layer, dropout layer
        self.gru_layer = nn.GRU(input_dim, hid_dim, n_layers, dropout=dropout, batch_first=True)
        # note: batch_first only permutes dimension order in input and output tensors. It does not affect hidden state.
        self.dropout = nn.Dropout(dropout)

    def forward(self, input_data):
        # input_data: [batch_size x seq_len x input_dim]
        # h0: [n_layers x batch_size x hid_dim]
        batch_size = input_data.size(0)
#         hidden = torch.randn(self.n_layers, batch_size, self.hid_dim) # initialize hidden layer value
        output, hidden = self.gru_layer(input_data) # hidden initialized as zero tensor
            
        # output = [batch_size x seq_len x hid_dim]
        # hidden = [n layers * n directions, batch size, hid dim]

        return output, hidden

In [13]:
class Decoder_GRU(nn.Module):
    def __init__(self, output_dim, hid_dim, n_layers, dropout):
        super().__init__()
        
        self.output_dim = output_dim
        self.hid_dim = hid_dim
        self.n_layers = n_layers
        
        self.rnn = nn.GRU(hid_dim, hid_dim, n_layers, dropout=dropout, batch_first=True)
        self.fc_out = nn.Linear(hid_dim, output_dim)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, input_data, hidden):
        # input = [batch_size, seq_len, hid_dim]
        # hidden = [n layers * n directions, batch size, hid dim]
        # cell = [n layers * n directions, batch size, hid dim]
        
        # hidden = [n layers, batch size, hid dim]
        
#         input_data = input_data.unsqueeze(0) # not sure if this this is needed for not-embedded inputs
        if len(input_data.size()) != 3 or len(hidden.size()) != 3:
            breakpoint()
        output, hidden = self.rnn(input_data, hidden)
        
        #output = [seq len, batch size, hid dim * n directions]
        #hidden = [n layers * n directions, batch size, hid dim]
        #cell = [n layers * n directions, batch size, hid dim]
        
        #"seq len and n directions will always be 1 in the decoder, therefore:" <- figure out how to change this
        #output = [batch_size, 1, hid dim]
        #hidden = [n layers, batch size, hid dim]
        
        prediction = self.fc_out(output)
        
        return prediction, output, hidden # predicted ECoG signal, decoder states, last decoder state

In [20]:
class Seq2Seq_GRU(nn.Module):
    def __init__(self, encoder, decoder, device):
        super().__init__()
        
        self.encoder = encoder
        self.decoder = decoder
        self.device = device # what is this?
        
        assert encoder.hid_dim == decoder.hid_dim, \
            "Encoder, decoder embedding dimensions (hidden state) must be equal."
        assert encoder.n_layers == decoder.n_layers, \
            "Encoder, decoder layer number must be equal."
        
    def forward(self, src, trg, teacher_forcing_ratio = 0.5):
        
        #src = [src len, batch size]
        #trg = [trg len, batch size]
        #teacher_forcing_ratio: prob. to use teacher forcing
        #e.g. if 0.75, ground-truth imports are used 75% of the time
        
        batch_size = trg.shape[0]
        trg_len = trg.shape[1]
        trg_dim = self.decoder.output_dim
        
        #tensor to store decoder outputs
        outputs = torch.zeros(trg_len, batch_size, trg_dim).to(self.device)
        
        enc_state, hidden = self.encoder(src)
        
        input = trg[:,0,:] # this was grabbing indexing into the first batch element, not the first sequence element.
        
        for t in range(1,trg_len): # ignore that first data point
            pred, output, hidden = self.decoder(input,hidden)
            
            outputs[t] = pred
            
            teacher_force = random.random() < teacher_forcing_ratio
            
            top1 = output.argmax(1)
            
            input = trg[:,t,:] if teacher_force else top1
        
        return outputs

In [21]:
INPUT_SEQ_LEN = 10 
OUTPUT_SEQ_LEN = 1 # predict one output state from 10 inputs prior
INPUT_DIM = num_ch
OUTPUT_DIM = num_ch
HID_DIM = num_ch
N_ENC_LAYERS = 1 
N_DEC_LAYERS = 1
ENC_DROPOUT = np.float32(0.5)
DEC_DROPOUT = np.float32(0.5)

enc = Encoder_GRU(INPUT_DIM, HID_DIM, N_ENC_LAYERS, ENC_DROPOUT)
dec = Decoder_GRU(OUTPUT_DIM, HID_DIM, N_DEC_LAYERS, DEC_DROPOUT)

model = Seq2Seq_GRU(enc, dec, device).to(device)

In [22]:
# example of enc/dec function
# let's pass the first pop off the dataset to the encoder and look at the outputs
enc_out, hid_enc = enc.forward(train_loader.__iter__()._next_data())
# out: [h1, h2, ..., h{seq_len}]
# hid: h{seq_len}
print(enc_out.size(),hid_enc.size())
est, dec_out, hid_dec = dec.forward(enc_out,hid_enc)
print(est.size(),dec_out.size(),hid_dec.size())

torch.Size([3, 11, 62]) torch.Size([1, 3, 62])
torch.Size([3, 11, 62]) torch.Size([3, 11, 62]) torch.Size([1, 3, 62])


In [23]:
def init_weights(m):
    for name, param in m.named_parameters():
        nn.init.uniform_(param.data, -0.08, 0.08)
        
model.apply(init_weights)

Seq2Seq_GRU(
  (encoder): Encoder_GRU(
    (gru_layer): GRU(62, 62, batch_first=True, dropout=0.5)
    (dropout): Dropout(p=0.5, inplace=False)
  )
  (decoder): Decoder_GRU(
    (rnn): GRU(62, 62, batch_first=True, dropout=0.5)
    (fc_out): Linear(in_features=62, out_features=62, bias=True)
    (dropout): Dropout(p=0.5, inplace=False)
  )
)

In [24]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 50,778 trainable parameters


In [25]:
optimizer = optim.Adam(model.parameters())

In [26]:
# PAD_IDX = TRG.vocab.stoi[TRG.pad_token]

criterion = nn.MSELoss()

In [27]:
def train(model, iterator, optimizer, criterion, clip):
    
    model.train()
    
    epoch_loss = 0
    
    for i, batch in enumerate(iterator):
#         import pdb; pdb.set_trace()
        src = batch[:,:-1,:]
        trg = batch[:,-1,:].unsqueeze(1) # otherwise it would automatically cut this out.
        
        optimizer.zero_grad()
        
        output = model(src, trg)
        
        #### RESUME DEBUGGING HERE - 2020.03.05.1800
        breakpoint()
        ####
        
        #trg = [trg len, batch size]
        #output = [trg len, batch size, output dim]
        
        output_dim = output.shape[-1]
        
        output = output[1:].view(-1, output_dim)
        trg = trg[1:].view(-1)
        
        #trg = [(trg len - 1) * batch size]
        #output = [(trg len - 1) * batch size, output dim]
        
        loss = criterion(output, trg)
        
        loss.backward()
        
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
        
        optimizer.step()
        
        epoch_loss += loss.item()
        
    return epoch_loss / len(iterator)

In [28]:
def evaluate(model, iterator, criterion):
    
    model.eval()
    
    epoch_loss = 0
    
    with torch.no_grad():
    
        for i, batch in enumerate(iterator):
            
            src = batch
            trg = batch

            output = model(src, trg, 0) #turn off teacher forcing

            #trg = [trg len, batch size]
            #output = [trg len, batch size, output dim]

            output_dim = output.shape[-1]
            
            output = output[1:].view(-1, output_dim)
            trg = trg[1:].view(-1)

            #trg = [(trg len - 1) * batch size]
            #output = [(trg len - 1) * batch size, output dim]

            loss = criterion(output, trg)
            
            epoch_loss += loss.item()
        
    return epoch_loss / len(iterator)

In [29]:
def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [30]:
print(model)
print(train_loader)
print(test_loader)

Seq2Seq_GRU(
  (encoder): Encoder_GRU(
    (gru_layer): GRU(62, 62, batch_first=True, dropout=0.5)
    (dropout): Dropout(p=0.5, inplace=False)
  )
  (decoder): Decoder_GRU(
    (rnn): GRU(62, 62, batch_first=True, dropout=0.5)
    (fc_out): Linear(in_features=62, out_features=62, bias=True)
    (dropout): Dropout(p=0.5, inplace=False)
  )
)
<torch.utils.data.dataloader.DataLoader object at 0x1a24d19550>
<torch.utils.data.dataloader.DataLoader object at 0x1a24d196d0>


In [31]:
N_EPOCHS = 10
CLIP = 1

best_test_loss = float('inf')

for epoch in range(N_EPOCHS):
    
    start_time = time.time()
    
    train_loss = train(model, train_loader, optimizer, criterion, CLIP)
    test_loss = evaluate(model.double(), test_loader, criterion)
    
    end_time = time.time()
    
    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    if test_loss < best_test_loss:
        best_test_loss = test_loss
        torch.save(model.state_dict(), 'tut1-model.pt')
    
    print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. PPL: {math.exp(valid_loss):7.3f}')

> <ipython-input-27-776a09230dcb>(20)train()
-> output_dim = output.shape[-1]


(Pdb)  output.size()


torch.Size([1, 3, 62])


(Pdb)  step


> <ipython-input-27-776a09230dcb>(22)train()
-> output = output[1:].view(-1, output_dim)


(Pdb)  step


> <ipython-input-27-776a09230dcb>(23)train()
-> trg = trg[1:].view(-1)


(Pdb)  trg


tensor([[[-3.5712e+00,  1.7371e+00, -3.7617e+01, -1.2642e+01, -1.9826e+00,
           1.5354e+01,  8.2303e+00,  4.7060e+01,  4.7313e+00, -8.0275e+00,
          -8.5970e+00,  6.8349e+00,  7.0171e+01, -6.4290e+00,  5.0932e+01,
           1.0116e+02,  5.5229e+01,  5.3648e+01,  6.8373e+01,  4.9456e+01,
           9.6087e+01,  1.1446e+02,  1.2144e+02,  1.5584e+02,  1.4867e+02,
           4.6781e+01,  3.5955e+01,  2.5189e+02, -1.4065e+01,  1.5370e+02,
           2.8549e+02,  2.8922e+02,  2.9503e+02,  2.5611e+02,  2.8940e+02,
           2.1902e+02,  2.3649e+02,  9.4047e+01,  3.8417e+01,  3.7832e+01,
           2.0239e+02,  8.7640e+01,  1.3835e+02,  5.1044e+01,  4.9968e+00,
           3.3520e+01,  1.5415e+02,  1.1280e+02,  1.4710e+02,  3.8994e+01,
           4.8086e+01,  6.3107e+01,  1.5108e+02,  1.2302e+02,  1.1168e+02,
           5.5282e+01,  5.5433e+01, -5.8299e+00,  2.6748e+01,  4.4082e+01,
          -1.5590e+01,  3.8389e+01]],

        [[-7.0955e+03, -7.4616e+03, -7.3266e+03, -7.5778e+03,

(Pdb)  step


RuntimeError: view size is not compatible with input tensor's size and stride (at least one dimension spans across two contiguous subspaces). Use .reshape(...) instead.
> <ipython-input-27-776a09230dcb>(23)train()
-> trg = trg[1:].view(-1)


(Pdb)  trg


tensor([[[-3.5712e+00,  1.7371e+00, -3.7617e+01, -1.2642e+01, -1.9826e+00,
           1.5354e+01,  8.2303e+00,  4.7060e+01,  4.7313e+00, -8.0275e+00,
          -8.5970e+00,  6.8349e+00,  7.0171e+01, -6.4290e+00,  5.0932e+01,
           1.0116e+02,  5.5229e+01,  5.3648e+01,  6.8373e+01,  4.9456e+01,
           9.6087e+01,  1.1446e+02,  1.2144e+02,  1.5584e+02,  1.4867e+02,
           4.6781e+01,  3.5955e+01,  2.5189e+02, -1.4065e+01,  1.5370e+02,
           2.8549e+02,  2.8922e+02,  2.9503e+02,  2.5611e+02,  2.8940e+02,
           2.1902e+02,  2.3649e+02,  9.4047e+01,  3.8417e+01,  3.7832e+01,
           2.0239e+02,  8.7640e+01,  1.3835e+02,  5.1044e+01,  4.9968e+00,
           3.3520e+01,  1.5415e+02,  1.1280e+02,  1.4710e+02,  3.8994e+01,
           4.8086e+01,  6.3107e+01,  1.5108e+02,  1.2302e+02,  1.1168e+02,
           5.5282e+01,  5.5433e+01, -5.8299e+00,  2.6748e+01,  4.4082e+01,
          -1.5590e+01,  3.8389e+01]],

        [[-7.0955e+03, -7.4616e+03, -7.3266e+03, -7.5778e+03,

(Pdb)  step


--Call--
> /Users/mickey/anaconda3/envs/ecog_is2s/lib/python3.7/site-packages/torch/utils/data/sampler.py(203)__iter__()
-> yield batch


(Pdb)  ste


*** NameError: name 'ste' is not defined


(Pdb)  step


GeneratorExit
> /Users/mickey/anaconda3/envs/ecog_is2s/lib/python3.7/site-packages/torch/utils/data/sampler.py(198)__iter__()
-> def __iter__(self):


(Pdb)  step


--Call--
> /Users/mickey/anaconda3/envs/ecog_is2s/lib/python3.7/site-packages/torch/utils/data/sampler.py(124)<genexpr>()
-> return (self.indices[i] for i in torch.randperm(len(self.indices)))


(Pdb)  exit


Exception ignored in: <generator object SubsetRandomSampler.__iter__.<locals>.<genexpr> at 0x1a98278150>
Traceback (most recent call last):
  File "/Users/mickey/anaconda3/envs/ecog_is2s/lib/python3.7/site-packages/torch/utils/data/sampler.py", line 124, in <genexpr>
    return (self.indices[i] for i in torch.randperm(len(self.indices)))
  File "/Users/mickey/anaconda3/envs/ecog_is2s/lib/python3.7/bdb.py", line 90, in trace_dispatch
    return self.dispatch_call(frame, arg)
  File "/Users/mickey/anaconda3/envs/ecog_is2s/lib/python3.7/bdb.py", line 135, in dispatch_call
    if self.quitting: raise BdbQuit
bdb.BdbQuit: 


RuntimeError: view size is not compatible with input tensor's size and stride (at least one dimension spans across two contiguous subspaces). Use .reshape(...) instead.

In [None]:
model.load_state_dict(torch.load('tut1-model.pt'))

test_loss = evaluate(model, test_iterator, criterion)

print(f'| Test Loss: {test_loss:.3f} | Test PPL: {math.exp(test_loss):7.3f} |')

In [None]:
if len(torch.Tensor(np.arange(10)).size()) != 3:
    breakpoint()