<a href="https://colab.research.google.com/github/jpatra72/Advanced_ML/blob/main/LSTM_Playground.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Source Links to Playground Activity
*   *Outputs of LSTM: https://stackoverflow.com/questions/48302810/whats-the-difference-between-hidden-and-output-in-pytorch-lstm*

*   *How to update LSTM during training: https://machinelearningmastery.com/update-lstm-networks-training-time-series-forecasting*

*   *How to build LSTM (tf): https://medium.com/@erikhallstrm/hello-world-rnn-83cd7105b767#.ozeai0fo8*

* *BPTT in LSTM: https://stats.stackexchange.com/questions/219914/rnns-when-to-apply-bptt-and-or-update-weights*

* *https://machinelearningmastery.com/prepare-univariate-time-series-data-long-short-term-memory-networks/*

* *Keras LSTM diagram to understand Batch: https://github.com/MohammadFneish7/Keras_LSTM_Diagram*

* *Stateless vs Statefull and Subsequencing: http://philipperemy.github.io/keras-stateful-lstm/*

* *Pytorch vs TensorFlow - Stateless vs Statefull: https://discuss.pytorch.org/t/confusion-regarding-pytorch-lstms-compared-to-keras-stateful-lstm/44502/5*

* *Stateful w/ Subsequencing:  https://gist.github.com/spacegoing/7935e5c2f0c8fa2f0719d2e729e794e8#file-test_stateful_lstm-py-L22*

* *Pytorch forward Implementation: https://towardsdatascience.com/whats-happening-in-my-lstm-layer-dd8110ecc52f*




---
---



In [None]:
import torch.nn as nn
import torch
from torchsummary import summary

torch.manual_seed(1)
inputs = [torch.randn(1, 3) for _ in range(5)] # indicates that there are 5 sequences to be given as inputs and (1,3) indicates that there is 1 layer with 3 cells
hidden = (torch.randn(1, 1, 3),
          torch.randn(1, 1, 3)) # initializing h and c values to be of dimensions (1, 1, 3) which 
                                #indicates there is (1 * 1) - num_layers * num_directions, with batch size of 1 and projection size of 3. 
                                # Since there is only 1 batch in input, h and c can also have only one batch of data for initialization 
                                #and the number of cells in both input and output should also match.
 
lstm = nn.LSTM(3, 3) #implying both input and output are 3 dimensional data
# summary(lstm, input_size=(1,3))
for i in inputs:
    out, hidden = lstm(i.view(1, 1, -1), hidden)
    print('out:', out)
    print('hidden:', hidden, '\n')


In [None]:
num_layers = 2
inputs = [torch.randn(1, 3) for _ in range(5)] 
hidden = (torch.randn(2, 1, 3),
          torch.randn(2, 1, 3))
lstm = nn.LSTM(input_size=3, hidden_size=3, num_layers=2)
for i in inputs:
    # Step through the sequence one element at a time.
    # after each step, hidden contains the hidden state.
    out, hidden = lstm(i.view(1, 1, -1), hidden)
    print('out:', out)
    print('hidden:', hidden, '\n')

In [None]:
torch.manual_seed(0)
lstm = nn.LSTM( input_size = 1, hidden_size = 20, num_layers  = 1 )
x = torch.rand( 50, 1, 1)
output, (hn, cn) = lstm(x)
output.size()

In [None]:
lstm = nn.LSTM(3, 3)  # Input dim is 3, output dim is 3
inputs = [torch.randn(1, 3) for _ in range(5)]  # make a sequence of length 5
# print(inputs)
# initialize the hidden state.
hidden = (torch.randn(1, 1, 3),
          torch.randn(1, 1, 3))
for i in inputs:
    # Step through the sequence one element at a time.
    # after each step, hidden contains the hidden state.
    out, hidden = lstm(i.view(1, 1, -1), hidden)

# alternatively, we can do the entire sequence all at once.
# the first value returned by LSTM is all of the hidden states throughout
# the sequence. the second is just the most recent hidden state
# (compare the last slice of "out" with "hidden" below, they are the same)
# The reason for this is that:
# "out" will give you access to all hidden states in the sequence
# "hidden" will allow you to continue the sequence and backpropagate,
# by passing it as an argument  to the lstm at a later time
# Add the extra 2nd dimension
# print(len(inputs))
inputs = torch.cat(inputs).view(len(inputs), 1, -1)
# print((inputs), '\n')

hidden = (torch.randn(1, 1, 3), torch.randn(1, 1, 3))  # clean out hidden state
out, hidden = lstm(inputs, hidden)
print(out)
print((hidden))

tensor([[[ 0.3653,  0.0123, -0.4226]],

        [[ 0.1415,  0.1479, -0.2528]],

        [[ 0.4234,  0.0467, -0.1540]],

        [[ 0.5676, -0.1238,  0.0710]],

        [[ 0.7421, -0.0026,  0.2334]]], grad_fn=<StackBackward0>)
(tensor([[[ 0.7421, -0.0026,  0.2334]]], grad_fn=<StackBackward0>), tensor([[[ 1.3536, -0.0061,  0.3241]]], grad_fn=<StackBackward0>))


In [None]:
from keras.models import Sequential
from keras.layers import Dense, LSTM
import numpy as np
from numpy.random import choice


def prepare_sequences(x_train, window_length):
  windows = []
  for i, sequence in enumerate(x_train):
    for window_start in range(0, T - window_length + 1):
      window_end = window_start + window_length
      window = sequence[window_start:window_end]
      windows.append(window)
  return np.array(windows)


def get_sequential_batch(bX_train, bY_train, N_train, batch_size):
  bX_train = bX_train.reshape(N_train, T - window_length + 1, window_length)
  N = N_train - N_train % batch_size
  for i in range(0, N, batch_size):
    for t in range(T - window_length + 1):
      bX = bX_train[i:i + batch_size, t, :]
      bY = bY_train[i:i + batch_size]
      yield bX[..., np.newaxis], bY[..., np.newaxis], t
      # yield bX, bY, t


## hyper parameters
debug = True
N = 1200
T = 20
N_train = 1000
N_test = N - N_train
window_length = 10
batch_size = 32
epochs = 4
# if stateful = True, test acc = 1.0; False, test acc = 0.5
stateful = False

## create train / test dataset
data = np.zeros([N, T])
one_indexes = choice(a=N, size=N // 2, replace=False)
data[one_indexes, 0] = 1  # very long term memory.
X_train = data[:N_train]
Y_train = X_train[:, 0]
X_test = data[N_train:]
Y_test = X_test[:, 0]

## create model
model = Sequential()
model.add(
    LSTM(
        3,
        batch_input_shape=(batch_size, window_length, 1),
        return_sequences=False,
        stateful=stateful))
model.add(Dense(1, activation='sigmoid'))
model.compile(
    loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

## training loop
for e in range(epochs):
  # train data generator
  bX_train = prepare_sequences(X_train, window_length)
  # print(bX_train.reshape(N_train, T - window_length + 1, window_length).shape)
  x_train_batch_gen = get_sequential_batch(bX_train, Y_train, N_train,
                                           batch_size)
  for bX, bY, t in x_train_batch_gen:
    print(bX.shape, t)
    loss, acc = model.train_on_batch(bX, bY)
    tr_loss.append(loss)
    tr_acc.append(acc)
    counter += 1

    if counter == 1 and debug:
      t_dataset.append(
          sum(bY[:, 0] == bX[:, 0, :].reshape(-1)) + int(bX.sum() == bY.sum()))

    # reset states
    if counter == T - window_length + 1:
      model.reset_states()
      counter = 0
  print(np.mean(tr_acc))
  # debug
  if debug:
    print(np.mean(t_dataset))



In [None]:
import torch
from torch import nn
from torch.autograd import Variable


class SimpleLSTM(nn.Module):
  """implements a 'simple' lstm - a single/multilayer uni/bi directional lstm with a single output"""
  def __init__(self, n_features, window_size, 
               output_size, h_size, n_layers=1, 
               bidirectional=False, device=torch.device('cpu')):
    super().__init__()
    self.n_features = n_features
    self.window_size = window_size
    self.output_size = output_size
    self.h_size = h_size
    self.n_layers = n_layers
    self.directions = 2 if bidirectional else 1
    self.device = device

    # our layer of interest
    self.lstm = nn.LSTM(input_size=n_features, hidden_size=h_size, 
                        num_layers=n_layers, bidirectional=bidirectional, batch_first=True)
    self.hidden = None
    
    self.linear = nn.Linear(self.h_size * self.directions, self.output_size)
    

  def init_hidden(self, batch_size):
    
    hidden_state  = torch.randn(self.n_layers * self.directions,
                            batch_size ,self.h_size).to(self.device)
    cell_state  = torch.randn(self.n_layers * self.directions, 
                           batch_size,self.h_size).to(self.device)
    
    hidden_state = Variable(hidden_state)
    cell_state = Variable(cell_state)

    return (hidden_state, cell_state) 

  def forward(self, input):
    batch_size = list(input.size())[0]
    self.hidden = self.init_hidden(batch_size)
    lstm_output, self.hidden = self.lstm(input, self.hidden)
    print("lstm_output:", lstm_output.shape)
    print("hidden:", len(self.hidden), self.hidden[0].shape)
    last_hidden_states = torch.index_select(lstm_output, 1,  index=torch.LongTensor(([self.window_size-1])))
    predictions = self.linear(last_hidden_states)
    return predictions


model = SimpleLSTM(n_features=23, window_size=6, output_size=1, h_size=256)

data = torch.rand((100,6, 23))

print(model.forward(data).shape)

<generator object get_sequential_batch at 0x7f906977bed0>

In [17]:
import torch
import torch.nn as nn
import torch

# Custom Dataset
class TensorDataset(torch.utils.data.Dataset):
    def __init__(self, TensorX,TensorY):
        self.TensorX = TensorX
        self.TensorY = TensorY
    def __len__(self):
        return self.TensorX.shape[0]
    def __getitem__(self,idx):
        return (self.TensorX[idx],self.TensorY[idx])

# Model = Stateful LSTM+linear
class LSTM(nn.Module):
    def __init__(self, input_size,hidden_size,output_size):
        super(LSTM, self).__init__()
        self.lstm = torch.nn.LSTM(batch_first=True,input_size=input_size,hidden_size=hidden_size)
        self.linear = torch.nn.Linear(in_features=hidden_size, out_features=output_size)
    def forward(self, x, hn, cn):
        # Stateful
        x_longer = x.view(1,x.shape[0]*x.shape[1],x.shape[2])
        out_longer, (hn, cn) = self.lstm(x_longer, (hn.detach(), cn.detach()))
        out = out_longer.view(x.shape[0],x.shape[1],out_longer.shape[2])
        print("output pre linear layer: ", out, out.shape)
        print(out[:,-1,:], out[:,-1,:].shape)
        out = self.linear(out[:,-1,:])
        print("output post linear layer: ", out, out.shape)
        return out.unsqueeze(-1), (hn, cn)

N_epochs = 1
hidden_size = 2
features = 1
learning_rate = 0.001
batch_size=2
output_size = 1
model = LSTM(input_size=features,hidden_size=hidden_size,output_size=output_size)#Create model
optimizer = torch.optim.Adam(model.parameters(),lr=learning_rate)#optimizer
criterion = torch.nn.MSELoss() # loss
# Create dataset: Imagine original_batch_size=2
x = torch.tensor([[1.0, 2.0, 3.0],[4.0, 5.0, 6.0],[7.0, 8.0, 9.0],[10.0, 11.0, 12.0]]).unsqueeze(-1)
y = torch.tensor([[4.],[7.],[10.],[13.]]).unsqueeze(-1)
dataset = TensorDataset(x,y)
dataloader = torch.utils.data.DataLoader(dataset,batch_size=batch_size)
print("head from dataloader: ", next(iter(dataloader))[1].shape)
# Training
for epoch in range(0,N_epochs):
    # Create first hidden and cell state with batch=1 
    hn = torch.zeros(1, 1, hidden_size)#[num_layers*num_directions,batch,hidden_size]
    cn = torch.zeros(1, 1, hidden_size)#[num_layers*num_directions,batch,hidden_size]
    for x,y in dataloader:
        optimizer.zero_grad()
        out, (hn,cn) = model(x,hn,cn)
        loss = criterion(out,y)
        loss.backward()# Backward
        optimizer.step()# gradient descent on adam step

head from dataloader:  torch.Size([2, 1, 1])
output pre linear layer:  tensor([[[-0.1933, -0.0227],
         [-0.4396,  0.1259],
         [-0.5998,  0.4198]],

        [[-0.6966,  0.6801],
         [-0.7582,  0.7975],
         [-0.7991,  0.8407]]], grad_fn=<ViewBackward0>) torch.Size([2, 3, 2])
tensor([[-0.5998,  0.4198],
        [-0.7991,  0.8407]], grad_fn=<SliceBackward0>) torch.Size([2, 2])
output post linear layer:  tensor([[0.5430],
        [0.6204]], grad_fn=<AddmmBackward0>) torch.Size([2, 1])
output pre linear layer:  tensor([[[-0.8269,  0.8639],
         [-0.8490,  0.8801],
         [-0.8667,  0.8940]],

        [[-0.8813,  0.9063],
         [-0.8936,  0.9172],
         [-0.9041,  0.9268]]], grad_fn=<ViewBackward0>) torch.Size([2, 3, 2])
tensor([[-0.8667,  0.8940],
        [-0.9041,  0.9268]], grad_fn=<SliceBackward0>) torch.Size([2, 2])
output post linear layer:  tensor([[0.6257],
        [0.6281]], grad_fn=<AddmmBackward0>) torch.Size([2, 1])
