<a href="https://colab.research.google.com/github/qmeng222/CNN/blob/main/RNN/GRU_and_LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import numpy as np

# 1. LSTM:

In [2]:
# parameters
input_size  =  9 # number of data channels
hidden_size = 16 # number of units in the hidden state
num_layers  =  2 # number of vertical stacks of hidden layers

# create an LSTM instance
lstm = nn.LSTM(input_size,hidden_size,num_layers)
lstm

LSTM(9, 16, num_layers=2)

In [3]:
# set data parameters
seqlength = 5
batchsize = 2

# create some random data
X = torch.rand(seqlength,batchsize,input_size)

# create initial hidden states (typically initialized as zeros)
H = torch.zeros(num_layers,batchsize,hidden_size) # hidden state
C = torch.zeros(num_layers,batchsize,hidden_size) # cell state

# (hidden,cell) tuple
hiddeninputs = (H,C)

# pass through the model & check sizes
y,h = lstm(X,hiddeninputs) # 3 inputs: current input, previous hidden state, and previous cell state
print(f' Input shape: {list(X.shape)}')
print(f'Hidden shape: {list(h[0].shape)}') # h is a tuple of (hidden,cell)
print(f'  Cell shape: {list(h[1].shape)}')
print(f'Output shape: {list(y.shape)}') # final output

 Input shape: [5, 2, 9]
Hidden shape: [2, 2, 16]
  Cell shape: [2, 2, 16]
Output shape: [5, 2, 16]


In [8]:
# check out the learned parameters and their sizes
for p in lstm.named_parameters():
  print(p)
  if 'weight' in p[0]:
    print(f'{p[0]} has size {list(p[1].shape)}')
  print("-------------------------------")

('weight_ih_l0', Parameter containing:
tensor([[ 0.1542, -0.0360,  0.1344, -0.2426,  0.0506, -0.1397,  0.1742,  0.1688,
          0.0201],
        [ 0.2138, -0.0117,  0.1152,  0.1323, -0.1030, -0.2132,  0.0399, -0.0536,
          0.0445],
        [ 0.0215,  0.0241,  0.1133, -0.1527,  0.2128,  0.1725,  0.2045, -0.0369,
          0.1946],
        [ 0.0055, -0.0824, -0.1713, -0.2223,  0.0827,  0.2189, -0.1868,  0.2150,
         -0.2453],
        [-0.2213, -0.0312,  0.1743, -0.0336,  0.0115,  0.2174,  0.2426, -0.0560,
         -0.2279],
        [-0.0202, -0.1292, -0.1979, -0.0239,  0.2074, -0.1665, -0.0763,  0.0373,
          0.0610],
        [ 0.0877, -0.0642,  0.1015,  0.1242, -0.1975, -0.0943, -0.2011,  0.1507,
          0.2238],
        [ 0.0851, -0.1968,  0.0987, -0.1499, -0.2237, -0.1059,  0.0944, -0.2003,
         -0.0227],
        [ 0.2017,  0.2082,  0.1940, -0.2413,  0.0638, -0.0010, -0.1681,  0.0767,
          0.0797],
        [ 0.2176, -0.1465, -0.0302, -0.1297, -0.1041, -0.1107

👆 64 = 16 x 4

16: number of units in the hidden layer

 4: number of gates/weights matrices (w_ii, w_if, w_ig, w_io)

## Create a class for the DL model:

In [9]:
class LSTMnet(nn.Module):
  def __init__(self,input_size,num_hidden,num_layers):
    super().__init__()

    # store parameters
    self.input_size = input_size
    self.num_hidden = num_hidden
    self.num_layers = num_layers

    # RNN Layer (LSTM is a type of RNN)
    self.lstm = nn.LSTM(input_size,num_hidden,num_layers)

    # linear layer for output
    self.out = nn.Linear(num_hidden,1)

  def forward(self,x):

    print(f'Input: {list(x.shape)}')

    # run through the RNN layer
    y,hidden = self.lstm(x)
    print(f'RNN-out: {list(y.shape)}')
    print(f'RNN-hidden: {list(hidden[0].shape)}')
    print(f'RNN-cell: {list(hidden[1].shape)}')

    # pass the RNN output through the linear output layer
    o = self.out(y)
    print(f'Output: {list(o.shape)}')

    return o,hidden

In [10]:
# create an instance of the model and inspect
net = LSTMnet(input_size,hidden_size,num_layers)
print(net), print(' ')

# and check out all learnable parameters
for p in net.named_parameters():
  print(f'{p[0]:>20} has size {list(p[1].shape)}') # right-aligned within a 20-character wide space

LSTMnet(
  (lstm): LSTM(9, 16, num_layers=2)
  (out): Linear(in_features=16, out_features=1, bias=True)
)
 
   lstm.weight_ih_l0 has size [64, 9]
   lstm.weight_hh_l0 has size [64, 16]
     lstm.bias_ih_l0 has size [64]
     lstm.bias_hh_l0 has size [64]
   lstm.weight_ih_l1 has size [64, 16]
   lstm.weight_hh_l1 has size [64, 16]
     lstm.bias_ih_l1 has size [64]
     lstm.bias_hh_l1 has size [64]
          out.weight has size [1, 16]
            out.bias has size [1]


In [11]:
# test the model with some random data:

X = torch.rand(seqlength,batchsize,input_size)
y = torch.rand(seqlength,batchsize,1)
yHat,h = net(X)

lossfun = nn.MSELoss()
lossfun(yHat,y)

Input: [5, 2, 9]
RNN-out: [5, 2, 16]
RNN-hidden: [2, 2, 16]
RNN-cell: [2, 2, 16]
Output: [5, 2, 1]


tensor(0.2231, grad_fn=<MseLossBackward0>)

# GRU:

In [12]:
# create a GRU instance
gru = nn.GRU(input_size,hidden_size,num_layers)
gru

GRU(9, 16, num_layers=2)

In [13]:
# create some random data & initiate a hidden state
X = torch.rand(seqlength,batchsize,input_size)
H = torch.zeros(num_layers,batchsize,hidden_size)

# pass data through the model and show the output sizes
y,h = gru(X,H) # unlike LSTM, there's no cell states in GRU!
print(f' Input shape: {list(X.shape)}')
print(f'Hidden shape: {list(h.shape)}')
print(f'Output shape: {list(y.shape)}')

 Input shape: [5, 2, 9]
Hidden shape: [2, 2, 16]
Output shape: [5, 2, 16]


In [23]:
# check out the learned parameters and their sizes
for p in gru.named_parameters():
  # print(p)
  print(f'{p[0]:>15} has size {list(p[1].shape)}') # right-aligned within a 15-character wide space
  ## 👆if the text is shorter than the specified width (15 characters in this case), it will be padded with spaces on the left to reach the width
  # print("-------------------------------")

   weight_ih_l0 has size [48, 9]
   weight_hh_l0 has size [48, 16]
     bias_ih_l0 has size [48]
     bias_hh_l0 has size [48]
   weight_ih_l1 has size [48, 16]
   weight_hh_l1 has size [48, 16]
     bias_ih_l1 has size [48]
     bias_hh_l1 has size [48]


👆 48 = 16 x 3

16: number of units in the hidden layer

3: number of gates/weights matrices (w_ir, w_iz, w_in)