In [None]:
import numpy as np
import pandas as pd 
from glob import glob
from os import path
import matplotlib.pyplot as plt
from torch.utils.data.dataset import Dataset
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn import preprocessing
from torch.autograd import Variable
import torch.utils.data as utils
from time import sleep
import tensorflow as tf

In [None]:
N = 1200
batch_size = 32
seq_len = 20
num_features = 1
window_size = seq_len//2
step_size = 1

In [None]:
def generate_data(N, seq_len):
# Dataset is built similar to the tutorial: http://philipperemy.github.io/keras-stateful-lstm/
# The input is a sequence with all zeros except for the value in the first index which can be 0 or 1
# If the first index value is 0 then the output is 0, if it is 1 then the output is 1
# The point of the dataset is to make sure that the hidden state propagated in the LSTM
# is used to make predictions.
    N_train = int(N*0.8)
    X = np.zeros((N, seq_len), dtype=np.float32)
    y = np.zeros((N,1), dtype=np.float32)
    
    indices = np.random.randint(0, N, size=N//2)
    
    X[indices, 0] = 1.0
    y[indices, 0] = 1.0
    
    X_train, y_train = X[:N_train], y[:N_train]
    X_test, y_test = X[N_train:], y[N_train:]
    
    return X_train, y_train, X_test, y_test
    

In [None]:
# def generate_synthetic_test_data(N, seq_len):
#     N_train = int(N*0.8)
#     X = np.reshape(np.arange(0, N*seq_len, dtype=np.float32), (N, seq_len))
#     y = np.reshape(np.arange(0,N, dtype=np.float32), (N,1))
    
    
#     X_train, y_train = X[:N_train], y[:N_train]
#     X_test, y_test = X[N_train:], y[N_train:]
    
#     return X_train, y_train, X_test, y_test

In [None]:
X_train, y_train, X_test, y_test = generate_data(N, seq_len)

In [None]:
class SequenceDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y
        N = X.shape[0]        
        self.N = N
        
    def __len__(self):
        return self.N
    
    def __getitem__(self, index):
        x = np.expand_dims(self.X[index], -1)
        y = self.y[index]
        return x,y

In [None]:
def get_subsequences(x, window_size):
    # x : (batch_size, seq_len, features)
    # Split a batch of sequence data to generate all the subsequences
    # with a step size of 1 are returned.        
    seq_len = x.shape[1]
    assert(window_size <= seq_len)
    num_steps = seq_len - window_size + 1
    for i in range(num_steps):
        yield x[:, i:i+window_size]

In [None]:
class Net(nn.Module):
    def __init__(self, num_features, num_hidden, num_lstm_layers, batch_size):
        super(Net, self).__init__()        
        self.num_hidden = num_hidden
        self.num_lstm_layers = num_lstm_layers
        self.batch_size = batch_size
        self.lstm1 = nn.LSTM(input_size=1, hidden_size=num_hidden, 
                             num_layers=num_lstm_layers, batch_first=True)
        self.fc1 = nn.Linear(num_hidden, 1)
        self.sigmoid = nn.Sigmoid()
        self.hidden = list()
        
        
        
    def init_hidden(self):
        self.hidden = ((torch.zeros(self.num_lstm_layers, self.batch_size, self.num_hidden, dtype=torch.float32)), 
                       (torch.zeros(self.num_lstm_layers, self.batch_size, self.num_hidden, dtype=torch.float32)))
    
    def forward(self, batch_data):
        self.hidden = [Variable(h.data) for h in self.hidden]
        lstm_out, self.hidden = self.lstm1(batch_data, self.hidden)            
        y_pred = self.sigmoid(self.fc1(lstm_out[:,-1]))

        return y_pred

In [None]:
num_epochs = 15
train_ds = SequenceDataset(X_train, y_train)
train_dataloader = utils.DataLoader(train_ds, batch_size=batch_size, drop_last=True)
net = Net(num_features=1, num_hidden=64, num_lstm_layers=1, batch_size=batch_size)
learning_rate = 1e-3
loss_fn = torch.nn.BCELoss()
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)

test_ds = SequenceDataset(X_test, y_test)
test_dataloader = utils.DataLoader(test_ds, batch_size=batch_size, drop_last=True)

In [None]:
for epoch in range(num_epochs):
    tr_acc = 0
    tr_loss = 0
    total = 0
    for batch_data, batch_labels in train_dataloader:
        net.init_hidden()        
        
        for subsequence in get_subsequences(batch_data, window_size):
            optimizer.zero_grad()
            
            y_pred = net(subsequence)
            loss = loss_fn(y_pred, batch_labels)
            tr_loss += loss.item()
            loss.backward(retain_graph=False)
            optimizer.step()

            total += batch_size

            y_pred = (y_pred > 0.5).type(torch.FloatTensor)
            accuracy = (y_pred == batch_labels).sum().item()
            tr_acc += accuracy
        
        
    tr_loss = tr_loss/total
    tr_acc = tr_acc/total
        
    test_loss = 0
    test_acc = 0
    total = 0
    for batch_data, batch_labels in test_dataloader:
        net.init_hidden()
        for subsequence in get_subsequences(batch_data, window_size):
            y_pred = net(batch_data)
            loss = loss_fn(y_pred, batch_labels)
            test_loss += loss.item()

            y_pred = (y_pred > 0.5).type(torch.FloatTensor)
            accuracy = (y_pred == batch_labels).sum().item()
            test_acc += accuracy

            total += batch_size
    
    test_loss = test_loss/total
    test_acc = test_acc/total
    print("Epoch ", epoch)
    print("Training loss: ", tr_loss, ", Training accuracy: ", tr_acc)
    print("Test loss: ", test_loss, ", Test accuracy: ", test_acc)




In [None]:
model = tf.keras.Sequential()
model.add(tf.keras.layers.LSTM(units=64, stateful=True, batch_input_shape=(batch_size, window_size, num_features)))
model.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])


In [None]:
train_ds = SequenceDataset(X_train, y_train)
train_dataloader = utils.DataLoader(train_ds, batch_size=batch_size, drop_last=True)

test_ds = SequenceDataset(X_test, y_test)
test_dataloader = utils.DataLoader(test_ds, batch_size=batch_size, drop_last=True)


for epoch in range(num_epochs):
    train_loss = []
    train_acc = []
    
    for batch_data, batch_label in train_dataloader:
        model.reset_states()  
        for subsequence in get_subsequences(batch_data, window_size):
            loss, acc = model.train_on_batch(subsequence, batch_label)
        
            train_loss.append(loss)
            train_acc.append(acc)
    
    test_loss = []
    test_acc = []
    for batch_data, batch_label in test_dataloader:
        model.reset_states()
        for subsequence in get_subsequences(batch_data, window_size):
            loss, acc = model.test_on_batch(subsequence, batch_label)
        
            test_loss.append(loss)
            test_acc.append(acc)        
    print('Epoch {}'.format(epoch))
    print('Training loss {}, Training accuracy : {}'.format(np.mean(train_loss), np.mean(train_acc)))
    print('Test loss {}, Test accuracy {}'.format(np.mean(test_loss), np.mean(test_acc)))

        