In [2]:
# Imports
import numpy as np
import sys

import torch
import torch.nn as nn
import torch.nn.functional as F


from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, Subset, TensorDataset

from torchsummary import summary
import torchvision as tv
import torchvision.transforms as T
import copy
import math
import matplotlib.pyplot as plt
import matplotlib_inline.backend_inline as backend_inline

backend_inline.set_matplotlib_formats("svg")


#### Pytorch device specific configuration ###

# Pytorch Gpu Configuration for Cuda
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# # Pytorch Gpu Configuration for directml(AMD GPU)
# import torch_directml

# device = torch_directml.device()

# Set default device
torch.set_default_device(device)


In [3]:
# Let's explore LSTM type
input_size = 9 # Number of features to extract (Number of data channels)
hidden_size = 16 # Number of units in the hidden state
num_layers = 2 # Number of vertical stacks of hidden layers (Note: only the final layer gives the output)

# Create an LSTM instance
lstm = nn.LSTM(input_size, hidden_size, num_layers)

lstm


LSTM(9, 16, num_layers=2)

In [4]:
??nn.LSTM


[0;31mInit signature:[0m [0mnn[0m[0;34m.[0m[0mLSTM[0m[0;34m([0m[0;34m*[0m[0margs[0m[0;34m,[0m [0;34m**[0m[0mkwargs[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mSource:[0m        
[0;32mclass[0m [0mLSTM[0m[0;34m([0m[0mRNNBase[0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m    [0;34mr"""__init__(input_size,hidden_size,num_layers=1,bias=True,batch_first=False,dropout=0.0,bidirectional=False,proj_size=0,device=None,dtype=None)[0m
[0;34m[0m
[0;34m    Apply a multi-layer long short-term memory (LSTM) RNN to an input sequence.[0m
[0;34m    For each element in the input sequence, each layer computes the following[0m
[0;34m    function:[0m
[0;34m[0m
[0;34m    .. math::[0m
[0;34m        \begin{array}{ll} \\[0m
[0;34m            i_t = \sigma(W_{ii} x_t + b_{ii} + W_{hi} h_{t-1} + b_{hi}) \\[0m
[0;34m            f_t = \sigma(W_{if} x_t + b_{if} + W_{hf} h_{t-1} + b_{hf}) \\[0m
[0;34m            g_t = \tanh(W_{ig} x_t + b_{ig} + W_{hg} h_{t-1} 

In [5]:
# set data parameters
seqlength = 5
batchsize = 2

# create some data
X = torch.rand(seqlength, batchsize, input_size)

# create initial hidden states (typically initialized as zeros)
H = torch.zeros(num_layers, batchsize, hidden_size)
C = torch.zeros(num_layers, batchsize, hidden_size)

# the input is actually a tuple of (hidden,cell)
hiddeninputs = (H, C)

# run some data through the model and show the output sizes
y, h = lstm(X, hiddeninputs)
print(f" Input shape: {list(X.shape)}")
print(f"Hidden shape: {list(h[0].shape)}")
print(f"  Cell shape: {list(h[1].shape)}")
print(f"Output shape: {list(y.shape)}")


 Input shape: [5, 2, 9]
Hidden shape: [2, 2, 16]
  Cell shape: [2, 2, 16]
Output shape: [5, 2, 16]


In [6]:
# Check out the learned parameters and their sizes
for p in lstm.named_parameters():
    if "weight" in p[0]:
        print(f"{p[0]} has size {list(p[1].shape)}")


weight_ih_l0 has size [64, 9]
weight_hh_l0 has size [64, 16]
weight_ih_l1 has size [64, 16]
weight_hh_l1 has size [64, 16]
