In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

import spacy
import numpy as np
import pandas as pd
# import sklearn
import scipy as sp

import random
import math
import time

In [2]:
SEED = 1234
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

#### now let's build a loader for our data.

In [3]:
from aopy import datareader, datafilter
from torch.utils.data.sampler import SequentialSampler, BatchSampler, SubsetRandomSampler
from torch.utils.data import TensorDataset, random_split

In [5]:
# load data
data_file_full_path = '/Volumes/Samsung_T5/aoLab/Data/WirelessData/Goose_Multiscale_M1/180325/001/rec001.LM1_ECOG_3.clfp.dat'
data_in, data_param, data_mask = datareader.load_ecog_clfp_data(data_file_name=data_file_full_path)
srate_in= data_param['srate']
num_ch = data_param['num_ch']

Loading data file:


In [6]:
# # plot data segment for a nice tidy figure
# import matplotlib.pyplot as pp
# t_plot = np.arange(data_in.shape[1])/srate_in
# t_plot.shape

# t_start = 70
# t_end = 80
# plot_idx = range(srate_in*t_start,srate_in*t_end)
# n_chan_plot = 10
# ch_idx = range(n_chan_plot)

# fig, ax = pp.subplots(figsize=(3,8))
# ax.plot(data_in[0:n_chan_plot*6:6,plot_idx].transpose() + 1500*np.arange(n_chan_plot),t_plot[plot_idx])
# ax.set_ylim((t_start,t_end))
# fig.patch.set_visible(False)
# ax.set_xlabel('ECoG Data')
# ax.axis('off')

# with open("ECoG_trace.png", 'wb') as outfile:
#     fig.canvas.print_png(outfile)

In [8]:
# re(down)sample data
# srate = 250 # not much signal above 100Hz
# ds_factor = np.intc(np.floor(srate_in/srate)) # decimate does not allow for floats as ds_fac arguments
# data_in = sp.signal.decimate(data_in,ds_factor,axis=-1)

In [9]:
from torch.utils.data import Dataset, DataLoader

class EcogDataset(Dataset):
    def __init__(self, data_in, block_len):
        self.data = data_in
        self.block_len = int(block_len)

    def __len__(self):
        return self.data.shape[0] // self.block_len

    def __getitem__(self, index):
        idx = int(index)
        return self.data[idx:(idx + self.block_len),:] # each call returns a sequence of length `block_len`

In [96]:
# create dataframe
srate = srate_in
# data_in = np.double(data_in[:,:120*srate])
enc_len = 10
dec_len = 1
seq_len = enc_len+dec_len # use ten time points to predict the next time point
# dataset = pd.DataFrame(data_in.transpose(),dtype=np.double) # may be unnecessary for now, but df will probably help combine files in the future.
# datareader.load_ecog_clfp_data.get_args()
data_tensor = torch.from_numpy(data_in.transpose())
print(data_tensor.dtype)
dataset = EcogDataset(data_tensor,seq_len) ## make my own Dataset class

torch.float32


In [11]:
train_frac = 0.8
valid_frac = 0.1
test_frac = 0.1
block_time = 20
block_size = block_time*srate
BATCH_SIZE = 3

data_size = np.shape(data_in)[-1]
n_block = np.floor(data_size/block_size)
idx_all = np.arange(data_size)
train_split = int(np.floor(train_frac*data_size))
valid_split = int(np.floor(valid_frac*data_size))
test_split = int(np.floor(test_frac*data_size))
# rework this using
train_idx = idx_all[0:train_split:seq_len]
valid_idx = idx_all[train_split:train_split+valid_split:seq_len] 
test_idx  = idx_all[train_split+valid_split:-1:seq_len]
# print(train_idx, valid_idx)

train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)
test_sampler = SubsetRandomSampler(test_idx)

train_loader = torch.utils.data.DataLoader(dataset, batch_size=BATCH_SIZE,
                                           sampler=train_sampler,
                                           drop_last=True) # this can be avoided using some padding sequence classes, I think
valid_loader = torch.utils.data.DataLoader(dataset, batch_size=BATCH_SIZE,
                                           sampler=valid_sampler,
                                           drop_last=True)
test_loader = torch.utils.data.DataLoader(dataset, batch_size=BATCH_SIZE,
                                          sampler=test_sampler,
                                          drop_last=True)

In [12]:
# if your DataLoader is working correctly, this should be `torch.Size([<seq_len>, <num_ch>])
dataset.__getitem__(next(iter(train_sampler))).shape

torch.Size([11, 62])

#### back to our data (potenially)

In [13]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [14]:
# BATCH_SIZE = 128
# train_iterator, valid_iterator, test_iterator = BucketIterator.splits(
#     (train_data, valid_data, test_data),
#     batch_size = BATCH_SIZE,
#     device = device)

In [15]:
class Encoder_GRU(nn.Module):
    def __init__(self, input_dim, hid_dim, n_layers, dropout):
        super().__init__()
        self.input_dim = input_dim
        self.hid_dim = hid_dim
        self.n_layers = n_layers
        
        # gated recurrent layer, dropout layer
        self.gru = nn.GRU(input_dim, hid_dim, n_layers, dropout=dropout, batch_first=True)
        # note: batch_first only permutes dimension order in input and output tensors. It does not affect hidden state.
        self.dropout = nn.Dropout(dropout)

    def forward(self, input_data):
        # input_data: [batch_size x seq_len x input_dim]
        # h0: [n_layers x batch_size x hid_dim]
        batch_size = input_data.size(0)
#         hidden = torch.randn(self.n_layers, batch_size, self.hid_dim) # initialize hidden layer value
        output, hidden = self.gru_layer(input_data) # hidden initialized as zero tensor
            
        # output = [batch_size x seq_len x hid_dim]
        # hidden = [n layers * n directions, batch size, hid dim]

        return output, hidden

In [16]:
class Decoder_GRU(nn.Module):
    def __init__(self, output_dim, hid_dim, n_layers, dropout):
        super().__init__()
        
        self.output_dim = output_dim
        self.hid_dim = hid_dim
        self.n_layers = n_layers
        
        self.rnn = nn.GRU(hid_dim, hid_dim, n_layers, dropout=dropout, batch_first=True)
        self.fc_out = nn.Linear(hid_dim, output_dim)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, input_data, hidden):
        # input = [batch_size, seq_len, hid_dim]
        # hidden = [n layers * n directions, batch size, hid dim]
        # cell = [n layers * n directions, batch size, hid dim]
        
        # hidden = [n layers, batch size, hid dim]
        
#         input_data = input_data.unsqueeze(0) # not sure if this this is needed for not-embedded inputs
        if len(input_data.size()) != 3 or len(hidden.size()) != 3:
            breakpoint()
        output, hidden = self.rnn(input_data, hidden)
        
        #output = [seq len, batch size, hid dim * n directions]
        #hidden = [n layers * n directions, batch size, hid dim]
        #cell = [n layers * n directions, batch size, hid dim]
        
        #"seq len and n directions will always be 1 in the decoder, therefore:" <- figure out how to change this
        #output = [batch_size, 1, hid dim]
        #hidden = [n layers, batch size, hid dim]
        
        prediction = self.fc_out(output)
        
        return prediction, output, hidden # predicted ECoG signal, decoder states, last decoder state

In [90]:
class Seq2Seq_GRU(nn.Module):
    def __init__(self, encoder, decoder, device):
        super().__init__()
        
        self.encoder = encoder
        self.decoder = decoder
        self.device = device # what is this?
        
        assert encoder.hid_dim == decoder.hid_dim, \
            "Encoder, decoder embedding dimensions (hidden state) must be equal."
        assert encoder.n_layers == decoder.n_layers, \
            "Encoder, decoder layer number must be equal."
        
    def forward(self, src, trg, teacher_forcing_ratio = 0.5):
        
        #src = [src len, batch size]
        #trg = [trg len, batch size]
        #teacher_forcing_ratio: prob. to use teacher forcing
        #e.g. if 0.75, ground-truth imports are used 75% of the time
        
        batch_size = trg.shape[0]
        
        src_len = src.shape[1]
        src_dim = src.shape[2]
        
        trg_len = trg.shape[1]
        trg_dim = self.decoder.output_dim
        
        #tensor to store decoder outputs
        outputs = torch.zeros(batch_size, trg_len, trg_dim).to(self.device)
        
        enc_state, hidden = self.encoder(src)
        
        output = src[:,-1,:].unsqueeze(1) # start the decoder with the actual output
        
        for t in range(trg_len): # ignore that first data point
            pred, output, hidden = self.decoder(output,hidden)
            
            outputs[:,t,:] = pred.squeeze()
            
            teacher_force = random.random() < teacher_forcing_ratio
            
            input = trg[:,t,:] if teacher_force else output
        
        return outputs

In [91]:
INPUT_SEQ_LEN = 10 
OUTPUT_SEQ_LEN = 1 # predict one output state from 10 inputs prior
INPUT_DIM = num_ch
OUTPUT_DIM = num_ch
HID_DIM = num_ch
N_ENC_LAYERS = 1 
N_DEC_LAYERS = 1
ENC_DROPOUT = np.float32(0.5)
DEC_DROPOUT = np.float32(0.5)

enc = Encoder_GRU(INPUT_DIM, HID_DIM, N_ENC_LAYERS, ENC_DROPOUT)
dec = Decoder_GRU(OUTPUT_DIM, HID_DIM, N_DEC_LAYERS, DEC_DROPOUT)

model = Seq2Seq_GRU(enc, dec, device).to(device)

In [92]:
# example of enc/dec function
# let's pass the first pop off the dataset to the encoder and look at the outputs
enc_out, hid_enc = enc.forward(train_loader.__iter__()._next_data())
# out: [h1, h2, ..., h{seq_len}]
# hid: h{seq_len}
print(enc_out.size(),hid_enc.size())
est, dec_out, hid_dec = dec.forward(enc_out,hid_enc)
print(est,dec_out.size(),hid_dec.size())

torch.Size([3, 11, 62]) torch.Size([1, 3, 62])
tensor([[[ 1.9925e-01,  1.5957e-02, -2.0756e-01,  ...,  2.5550e-01,
          -1.7293e-01, -6.5301e-02],
         [ 1.8119e-01, -1.4908e-01, -5.6814e-03,  ...,  6.5629e-02,
          -6.8411e-02, -3.6459e-02],
         [ 1.9039e-01, -2.2826e-01,  7.1456e-02,  ..., -4.8206e-02,
          -3.5384e-02,  1.8990e-03],
         ...,
         [ 1.4772e-01, -1.1799e-01,  1.6614e-01,  ..., -1.2116e-01,
          -7.9860e-02,  1.6674e-01],
         [ 1.2995e-01, -8.4423e-02,  1.6582e-01,  ..., -1.0815e-01,
          -6.9709e-02,  1.6483e-01],
         [ 1.0737e-01, -6.0524e-02,  1.6267e-01,  ..., -9.2503e-02,
          -6.6244e-02,  1.5733e-01]],

        [[-1.0720e-01,  6.1206e-01, -9.4757e-02,  ..., -1.6421e-01,
           1.1136e-01,  2.6572e-01],
         [-6.4889e-02,  2.8313e-01, -4.4704e-02,  ..., -2.4483e-01,
           1.2277e-01,  1.9054e-01],
         [-3.4246e-02,  1.0895e-01,  6.2070e-03,  ..., -2.4907e-01,
           1.4052e-01,  1.742

In [93]:
def init_weights(m):
    for name, param in m.named_parameters():
        nn.init.uniform_(param.data, -0.08, 0.08)
        
model.apply(init_weights)

Seq2Seq_GRU(
  (encoder): Encoder_GRU(
    (gru_layer): GRU(62, 62, batch_first=True, dropout=0.5)
    (dropout): Dropout(p=0.5, inplace=False)
  )
  (decoder): Decoder_GRU(
    (rnn): GRU(62, 62, batch_first=True, dropout=0.5)
    (fc_out): Linear(in_features=62, out_features=62, bias=True)
    (dropout): Dropout(p=0.5, inplace=False)
  )
)

In [101]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 50,778 trainable parameters


In [102]:
optimizer = optim.Adam(model.parameters())

In [103]:
# PAD_IDX = TRG.vocab.stoi[TRG.pad_token]

criterion = nn.MSELoss()

In [110]:
# test the forward pass
# get first data pull
# dataset.__getitem__(next(iter(train_sampler)))
data_batch = next(iter(train_loader))
src = data_batch[:,:enc_len,:]
trg = data_batch[:,enc_len:,:]
print(src.size(),trg.size())
test_out = model(src,trg)
print(test_out.size()) # it actually works!
criterion(test_out,trg)

torch.Size([3, 10, 62]) torch.Size([3, 1, 62])
torch.Size([3, 1, 62])


tensor(25138.0586, grad_fn=<MseLossBackward>)

In [111]:
def train(model, iterator, optimizer, criterion, clip):
    
    model.train()
    
    epoch_loss = 0
    
    for i, batch in enumerate(iterator):
#         import pdb; pdb.set_trace()
        src = batch[:,:-1,:]
        trg = batch[:,-1,:].unsqueeze(1) # otherwise it would automatically cut this out.
        
        optimizer.zero_grad()
        
        output = model(src, trg)
        
        #trg = [batch size, trg len, output dim]
        #output = [batch size, trg len, output dim]
        
        output_dim = output.shape[-1]
        
#         output = output[1:].view(-1, output_dim)
#         trg = trg[1:].view(-1)
        
        #trg = [(trg len - 1) * batch size]
        #output = [(trg len - 1) * batch size, output dim]
        
        loss = criterion(output, trg)
        
        loss.backward()
        
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
        
        optimizer.step()
        
        epoch_loss += loss.item()
        
    return epoch_loss / len(iterator)

In [116]:
def evaluate(model, iterator, criterion):
    
    model.eval()
    
    epoch_loss = 0
    
    with torch.no_grad():
    
        for i, batch in enumerate(iterator):
            
            src = batch[:,:-1,:]
            trg = batch[:,-1,:]

            output = model(src, trg, 0) #turn off teacher forcing

            #trg = [trg len, batch size]
            #output = [trg len, batch size, output dim]

            output_dim = output.shape[-1]
            
#             output = output[1:].view(-1, output_dim)
#             trg = trg[1:].view(-1)

            #trg = [(trg len - 1) * batch size]
            #output = [(trg len - 1) * batch size, output dim]

            loss = criterion(output, trg)
            
            epoch_loss += loss.item()
        
    return epoch_loss / len(iterator)

In [117]:
def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [118]:
print(model)
print(train_loader)
print(test_loader)

Seq2Seq_GRU(
  (encoder): Encoder_GRU(
    (gru_layer): GRU(62, 62, batch_first=True, dropout=0.5)
    (dropout): Dropout(p=0.5, inplace=False)
  )
  (decoder): Decoder_GRU(
    (rnn): GRU(62, 62, batch_first=True, dropout=0.5)
    (fc_out): Linear(in_features=62, out_features=62, bias=True)
    (dropout): Dropout(p=0.5, inplace=False)
  )
)
<torch.utils.data.dataloader.DataLoader object at 0x1a2334f4d0>
<torch.utils.data.dataloader.DataLoader object at 0x1a2334f850>


In [None]:
N_EPOCHS = 10
CLIP = 1

best_test_loss = float('inf')

for epoch in range(N_EPOCHS):
    
    start_time = time.time()
    
    train_loss = train(model, train_loader, optimizer, criterion, CLIP)
    test_loss = evaluate(model.double(), test_loader, criterion)
    
    end_time = time.time()
    
    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    if test_loss < best_test_loss:
        best_test_loss = test_loss
        torch.save(model.state_dict(), 'tut1-model.pt')
    
    print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. PPL: {math.exp(valid_loss):7.3f}')

In [None]:
model.load_state_dict(torch.load('tut1-model.pt'))

test_loss = evaluate(model, test_iterator, criterion)

print(f'| Test Loss: {test_loss:.3f} | Test PPL: {math.exp(test_loss):7.3f} |')

In [None]:
if len(torch.Tensor(np.arange(10)).size()) != 3:
    breakpoint()