<a href="https://colab.research.google.com/github/qmeng222/CNN/blob/main/RNN/RNN_intro.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import numpy as np

In [2]:
input_size  =  9 # number of data channels
hidden_size = 16 # number of units in the hidden state
num_layers  =  1 # number of vertical stacks of hidden layers
actfun      = 'tanh'
bias        = True

# create an RNN instance
rnn = nn.RNN(input_size,hidden_size,num_layers,nonlinearity=actfun,bias=bias) # `num_layers=1, nonlinearity='tanh',bias=True` is default
print(rnn) # instance size

RNN(9, 16)


# Create some fake data & push it through the RNN instance:

In [6]:
# create some fake data
seqlength = 5
batchsize = 2
X = torch.rand(seqlength,batchsize,input_size) # torch tensor

# create a hidden layer (initialized as zeros)
hidden = torch.zeros(num_layers,batchsize,hidden_size)

# push data through the model & show the output sizes
y,h = rnn(X,hidden) # torch tensors

print(f' Input shape: {list(X.shape)}')
print(f'Hidden shape: {list(h.shape)}')
print(f'Output shape: {list(y.shape)}')

 Input shape: [5, 2, 9]
Hidden shape: [1, 2, 16]
Output shape: [5, 2, 16]


In [7]:
# scratch/demo:

# default hidden state is all zeros if nothing specified:
y,h1 = rnn(X,hidden)
print(h1, '\n')

y,h2 = rnn(X)
print(h2, '\n')

# they're the same! (meaning default=zeros)
print(h1-h2)

tensor([[[-0.2809,  0.2855, -0.1793,  0.3754,  0.0681,  0.4695, -0.0436,
           0.3177,  0.8018,  0.1217,  0.1087,  0.1841,  0.3456,  0.0418,
           0.0753, -0.1284],
         [-0.2278,  0.3388, -0.0153,  0.3632,  0.1878,  0.4778,  0.1053,
          -0.0159,  0.7996, -0.0548,  0.0606,  0.0789,  0.3153,  0.3863,
           0.2329, -0.2757]]], grad_fn=<StackBackward0>) 

tensor([[[-0.2809,  0.2855, -0.1793,  0.3754,  0.0681,  0.4695, -0.0436,
           0.3177,  0.8018,  0.1217,  0.1087,  0.1841,  0.3456,  0.0418,
           0.0753, -0.1284],
         [-0.2278,  0.3388, -0.0153,  0.3632,  0.1878,  0.4778,  0.1053,
          -0.0159,  0.7996, -0.0548,  0.0606,  0.0789,  0.3153,  0.3863,
           0.2329, -0.2757]]], grad_fn=<StackBackward0>) 

tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]],
       grad_fn=<SubBackward0>)


In [9]:
# check the sizes of the weights:
for p in rnn.named_parameters():
  print(p) # duple
  if 'weight' in p[0]:
    print(f'{p[0]} has size {list(p[1].shape)}')
  print("-------------------------------------")

('weight_ih_l0', Parameter containing:
tensor([[ 8.7304e-02,  1.5094e-01,  1.5851e-01,  4.6990e-02, -4.7582e-02,
         -2.1358e-01, -1.5857e-01, -5.0121e-02,  2.3272e-01],
        [-2.0136e-01,  1.6671e-01, -1.8179e-01,  2.0318e-02,  2.0876e-01,
          2.5309e-02,  1.4542e-01, -1.6530e-01,  1.4229e-01],
        [-1.1852e-01,  8.8165e-02,  1.6479e-01, -4.4847e-02,  1.9075e-01,
          1.0064e-01, -1.9595e-01,  8.7732e-02,  1.3616e-02],
        [ 1.2927e-01,  1.4370e-01, -7.8339e-02, -8.3005e-02,  1.5582e-01,
         -2.1558e-01,  2.3072e-01, -1.7258e-01, -1.1067e-01],
        [-1.5122e-01,  2.4495e-01, -9.7879e-03,  3.3051e-05,  1.8997e-01,
         -1.0735e-01,  1.1544e-01, -1.5215e-01,  9.5780e-02],
        [-6.0916e-02,  1.8608e-01, -8.3411e-02, -7.3063e-02,  2.0731e-01,
          1.9774e-01,  2.3049e-01, -2.1326e-01, -8.2132e-02],
        [-8.1426e-02, -1.0060e-01, -1.4740e-01,  1.5681e-01,  5.1082e-02,
          1.4020e-01, -1.1975e-01, -1.1302e-01,  1.3248e-01],
        [

# Create a class for the DL model:

In [10]:
class RNNnet(nn.Module):
  def __init__(self,input_size,num_hidden,num_layers):
    super().__init__()

    # parameters
    self.input_size = input_size
    self.num_hidden = num_hidden
    self.num_layers = num_layers

    # RNN Layer
    self.rnn = nn.RNN(input_size,num_hidden,num_layers)

    # linear layer for output
    self.out = nn.Linear(num_hidden,1) # num_hidden is the output of the RNN layer

  def forward(self,x):

    print(f'Input: {list(x.shape)}')

    # initialize the hidden state
    hidden = torch.zeros(self.num_layers,batchsize,self.num_hidden)
    print(f'Hidden: {list(hidden.shape)}')

    # push through the RNN layer
    y,hidden = self.rnn(x,hidden)
    print(f'RNN-out: {list(y.shape)}')
    print(f'RNN-hidden: {list(hidden.shape)}')

    # pass the RNN output through the linear output layer
    o = self.out(y)
    print(f'Output: {list(o.shape)}')

    return o,hidden

In [11]:
# create an instance of the model and inspect
net = RNNnet(input_size,hidden_size,num_layers)
print(net, "\n")

# check out sizes of all learnable parameters
for p in net.named_parameters():
  print(f'{p[0]} has size {list(p[1].shape)}')

RNNnet(
  (rnn): RNN(9, 16)
  (out): Linear(in_features=16, out_features=1, bias=True)
) 

rnn.weight_ih_l0 has size [16, 9]
rnn.weight_hh_l0 has size [16, 16]
rnn.bias_ih_l0 has size [16]
rnn.bias_hh_l0 has size [16]
out.weight has size [1, 16]
out.bias has size [1]


In [12]:
# test the model with some data:

# create some data
X = torch.rand(seqlength,batchsize,input_size)
y = torch.rand(seqlength,batchsize,1)
yHat,h = net(X)

# try a loss function
lossfun = nn.MSELoss()
lossfun(yHat,y)

Input: [5, 2, 9]
Hidden: [1, 2, 16]
RNN-out: [5, 2, 16]
RNN-hidden: [1, 2, 16]
Output: [5, 2, 1]


tensor(0.1055, grad_fn=<MseLossBackward0>)