## Test 1 - New data format with seq_len incorporated into dataset code


In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import pandas as pd
# Import comet_ml in the top of your file
from comet_ml import Experiment
##Needs to be imported before sklearn
from sklearn.model_selection import train_test_split
import sklearn

import torch
import torch.utils.data
from torch.utils.data import Dataset, TensorDataset, DataLoader #as DataLoaderBase
from torch import nn, optim, sigmoid
from torch.nn import functional as F
from torchvision import datasets, transforms
from torch.nn import modules
from torch.nn.modules import loss

from torch.autograd import Variable

#from torchaudio import transforms
#from torchaudio import Datasets

import os
import sys
import shutil
from glob import glob
import datetime
import re
from tqdm import tqdm
import time

import json
from pprint import pprint
#from torchviz import make_dot, make_dot_from_trace


In [2]:
torch.__version__

'0.4.1'

In [3]:
# Check device type


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:

def data_creator(T = 20, L = 10000, N = 1000):
    """
    Simple sine wave creator. Used to build a large dataset of sine waves.
    
    
    """
    np.random.seed(2)



    x = np.empty((N, L), 'int64')
    x[:] = np.array(range(L)) + np.random.randint(-4 * T, 4 * T, N).reshape(N, 1)
    data = np.sin(x / 1.0 / T).astype('float64')
    data = sklearn.preprocessing.normalize(data)

    torch.save(data, open('traindata.pt', 'wb'))

In [5]:

data_creator()

In [6]:
class EncoderRNN(nn.Module):
    """
    Code adapted from ----
    
    """
    def __init__(self, input_size, hidden_size, num_layers, isCuda):
        super(EncoderRNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        self.isCuda = isCuda
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.relu = nn.ReLU()
        
        #initialize weights
        nn.init.xavier_uniform_(self.lstm.weight_ih_l0, gain=np.sqrt(2))
        nn.init.xavier_uniform_(self.lstm.weight_hh_l0, gain=np.sqrt(2))

    def forward(self, input):
        tt = torch.cuda if self.isCuda else torch
        h0 = Variable(tt.FloatTensor(self.num_layers, input.size(0), self.hidden_size))
        c0 = Variable(tt.FloatTensor(self.num_layers, input.size(0), self.hidden_size))
        encoded_input, hidden = self.lstm(input, (h0, c0))
        encoded_input = self.relu(encoded_input)
        return encoded_input

class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, num_layers, isCuda):
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.num_layers = num_layers
        
        self.isCuda = isCuda
        self.lstm = nn.LSTM(hidden_size, output_size, num_layers, batch_first=True)
        #self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
        
        #initialize weights
        nn.init.xavier_uniform_(self.lstm.weight_ih_l0, gain=np.sqrt(2))
        nn.init.xavier_uniform_(self.lstm.weight_hh_l0, gain=np.sqrt(2))
        
    def forward(self, encoded_input):
        tt = torch.cuda if self.isCuda else torch
        h0 = Variable(tt.FloatTensor(self.num_layers, encoded_input.size(0), self.output_size))
        c0 = Variable(tt.FloatTensor(self.num_layers, encoded_input.size(0), self.output_size))
        decoded_output, hidden = self.lstm(encoded_input, (h0, c0))
        #decoded_output = self.sigmoid(decoded_output)
        return decoded_output

class LSTMAE(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, isCuda):
        super(LSTMAE, self).__init__()
        self.encoder = EncoderRNN(input_size, hidden_size, num_layers, isCuda)
        self.decoder = DecoderRNN(hidden_size, input_size, num_layers, isCuda)
        
    def forward(self, input):
        encoded_input = self.encoder(input)
        decoded_output = self.decoder(encoded_input)
        return decoded_output

In [7]:
class sineDataset(Dataset):
    """Face Landmarks dataset."""

    def __init__(self, seq_len, overlap):
        """
        Args:

        """
        self.data = torch.load('traindata.pt')
        self.label = []
        self.seq_len = seq_len
        self.overlap = overlap
        vals = len(self.data)
        for val in range(vals):
            self.label.append(val)
        if torch.cuda.is_available():
            self.data, self.label = map(torch.cuda.FloatTensor, (self.data, self.label))
        else:
            self.data, self.label = map(torch.FloatTensor, (self.data, self.label))

        self.data = self.window_mask()
        self.data = torch.as_tensor(self.data)
        self.data.permute(2,1,0 )
        print("shape = ", self.data.shape)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        return self.data[index]
    
    def window_mask(self):
        datavals = []
        results = []
        self.data= torch.tensor(self.data)
        start_val = 0
        occurences = int(len(self.data)/(self.seq_len*self.overlap))
        
        for rows in range(len(self.data)):

            start_val = 0
            for i in range(occurences-1):
                
                    value = self.data[rows,start_val:(start_val+self.seq_len)]
                    
                    start_val += int(self.seq_len*(1 - self.overlap))
                    datavals.append(value.unsqueeze(0))

            var = torch.cat(datavals)        
            results.append(var.unsqueeze(0))
            
            datavals = []
        result = torch.cat(results)
        result = result.permute(2,1,0)
        print(result.shape)
        print(result)
        return result
    
        

In [8]:
def get_data(train_dataset, test_dataset, batch_size):
    return (DataLoader(train_dataset, batch_size = batch_size),
    DataLoader(test_dataset, batch_size = batch_size * 2))

In [27]:
#hyperparameters

with open('config_file.json') as f:
    data = json.load(f)

pprint(data)

hyperparameters = data["hyperparams"]
optimiser = data["optimizer"]
trainer = data["trainer"]
loss_name = data["loss"]

{'arch': 'seabedmodel',
 'cuda': True,
 'data_loader': {'batch_size': 32,
                 'data_dir': 'datasets/',
                 'shuffle': True,
                 'type': 'ImageDataLoader'},
 'gpu': 0,
 'hyperparams': {'batch_size': 32,
                 'hidden_dim': 400,
                 'input_size': 1000,
                 'latent': 5,
                 'num_layers': 3,
                 'overlap': 0.5,
                 'seq_len': 200},
 'loss': 'BCEWithLogitsLoss',
 'loss_args': {'reduction': 'elementwise_mean'},
 'metrics': ['my_metric', 'my_metric2'],
 'model': {},
 'name': 'Autoencoder',
 'optimizer': {'lr': 1e-05, 'weight_decay': 0},
 'optimizer_type': 'Adam',
 'trainer': {'epochs': 1000,
             'monitor': 'val_loss',
             'monitor_mode': 'min',
             'save_dir': 'saved/',
             'save_freq': 1,
             'verbosity': 2},
 'validation': {'shuffle': True, 'test_split': 0.2, 'validation_split': 0.1},
 'visualization': {'log_dir': 'saved/runs', 'tens

In [28]:
 
full_dataset = sineDataset( hyperparameters["seq_len"], hyperparameters["overlap"])
full_dataset
train_size = int(0.8 * len(full_dataset))
test_size = len(full_dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(full_dataset, [train_size, test_size])
train_dl, test_dl = get_data(train_dataset, test_dataset, hyperparameters["batch_size"])
print(train_dl)

torch.Size([200, 9, 1000])
tensor([[[ 1.5308e-03, -5.5096e-03, -3.3845e-03,  ..., -1.2602e-02,
           4.0674e-03, -1.3179e-02],
         [ 1.3922e-02, -1.4059e-02,  1.2211e-02,  ..., -9.7256e-03,
           1.4149e-02, -8.6514e-03],
         [ 6.3675e-03, -2.4664e-03,  1.0312e-02,  ...,  7.0849e-03,
           3.9594e-03,  8.2705e-03],
         ...,
         [ 1.4133e-02, -1.3725e-02,  1.3049e-02,  ..., -8.2814e-03,
           1.4017e-02, -7.0951e-03],
         [ 4.6392e-03, -6.0085e-04,  8.9397e-03,  ...,  8.6423e-03,
           2.1268e-03,  9.7156e-03],
         [-1.1501e-02,  1.3384e-02, -7.9771e-03,  ...,  1.3184e-02,
          -1.2810e-02,  1.2607e-02]],

        [[ 8.2591e-04, -4.8514e-03, -4.0667e-03,  ..., -1.2266e-02,
           3.3851e-03, -1.2906e-02],
         [ 1.3779e-02, -1.4121e-02,  1.1839e-02,  ..., -1.0227e-02,
           1.4134e-02, -9.1996e-03],
         [ 6.9910e-03, -3.1596e-03,  1.0783e-02,  ...,  6.4644e-03,
           4.6333e-03,  7.6870e-03],
         ...

In [35]:

def get_model():
    
    #model = EncoderRNN(input_dim= input_dim, hidden_dim= hidden_dim).to(device)
    model = LSTMAE(input_size = hyperparameters["input_size"], hidden_size = hyperparameters["hidden_dim"], num_layers = hyperparameters["num_layers"], isCuda = False)

    
    return model, optim.Adam(model.parameters(), lr = optimiser["lr"])

In [36]:
def loss_function(output, x): 
    #loss_fn = getattr(loss, loss_name)
    #print(loss_fn)
    loss_fn = F.binary_cross_entropy_with_logits
    return loss_fn(output, x)


In [37]:
def loss_batch(model, loss_function, xb, opt=None): 
    output = model(xb) 
    
    loss = loss_function(output, xb)
    print(loss)
    if opt is not None:
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=0.25)
        opt.step()
        model.zero_grad()

    return loss.item(), len(xb)


In [38]:
def fit(epochs, model, loss_func, opt, train_dl, test_dl):
    print(epochs)
    for epoch in tqdm(range(epochs)):
        
        model.train()
        
        for xb in train_dl:
            
            print(xb.shape)
            #xb = xb.reshape(1,batch_size,input_size).to(device)  
           
            if torch.cuda.is_available():
                xb = Variable(xb)
            else:
                xb = Variable(xb)
            

            loss_batch(model, loss_func, xb, opt)
            model.eval()
            with torch.no_grad():
                losses,nums = zip(*[loss_batch(model, loss_func, xb)
                                    for xb in test_dl])
            val_loss = np.sum(np.multiply(losses,nums)) / np.sum(nums)
        print(epoch, val_loss)
        

In [42]:

model, opt = get_model()
#x = torch.randn(1, 10000)
#make_dot(model(x), params=dict(model.named_parameters()))

       
    

In [43]:
fit(trainer["epochs"], model, loss_function, opt, train_dl, test_dl)

1000



  0%|                                                                                         | 0/1000 [00:00<?, ?it/s]

torch.Size([32, 9, 1000])
tensor(nan, grad_fn=<MeanBackward1>)
tensor(nan)
torch.Size([32, 9, 1000])
tensor(nan, grad_fn=<MeanBackward1>)





KeyboardInterrupt: 