In [1]:
# Imports
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F


from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, Subset, TensorDataset

from torchsummary import summary
import torchvision as tv
import torchvision.transforms as T
import copy

import matplotlib.pyplot as plt
import matplotlib_inline.backend_inline as backend_inline

backend_inline.set_matplotlib_formats("svg")


#### Pytorch device specific configuration ###

# Pytorch Gpu Configuration for Cuda
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# # Pytorch Gpu Configuration for directml(AMD GPU)
# import torch_directml

# device = torch_directml.device()

# Set default device
torch.set_default_device(device)


In [2]:
# Set layer parameters
input_size = 9  # Number of features to extract
hidden_size = 16 # Number of units in the hidden state
num_layers = 3 # Number of vertical stack to hidden layers ( Note: Only final layer gives the output)

actfun = 'tanh'
bias = True

# Create RNN instance
rnn = nn.RNN(input_size, hidden_size, num_layers, nonlinearity=actfun, bias=bias)
print(rnn)


RNN(9, 16, num_layers=3)


In [3]:
# Let's inspect nn.RNN class
??nn.RNN


[0;31mInit signature:[0m [0mnn[0m[0;34m.[0m[0mRNN[0m[0;34m([0m[0;34m*[0m[0margs[0m[0;34m,[0m [0;34m**[0m[0mkwargs[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mSource:[0m        
[0;32mclass[0m  [0mRNN[0m[0;34m([0m[0mRNNBase[0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m    [0;34mr"""__init__(input_size,hidden_size,num_layers=1,nonlinearity='tanh',bias=True,batch_first=False,dropout=0.0,bidirectional=False,device=None,dtype=None)[0m
[0;34m[0m
[0;34m    Apply a multi-layer Elman RNN with :math:`\tanh` or :math:`\text{ReLU}`[0m
[0;34m    non-linearity to an input sequence. For each element in the input sequence,[0m
[0;34m    each layer computes the following function:[0m
[0;34m[0m
[0;34m    .. math::[0m
[0;34m        h_t = \tanh(x_t W_{ih}^T + b_{ih} + h_{t-1}W_{hh}^T + b_{hh})[0m
[0;34m[0m
[0;34m    where :math:`h_t` is the hidden state at time `t`, :math:`x_t` is[0m
[0;34m    the input at time `t`, and :math:`h_{(t-1)}` is the hidd

In [4]:
# Set data parameters
seqlength = 5
batchsize = 2

# Create some data
X = torch.rand(seqlength, batchsize, input_size)

# create a hidden layer ( typically initialized as zeros)
hidden = torch.zeros(num_layers, batchsize, hidden_size)

# Run some data through the model and show the output size
y, h = rnn(X, hidden)

print(f'Input shape: {list(X.shape)}')
print(f'hidden shape: {list(h.shape)}')
print(f'Output shape: {list(y.shape)}')


Input shape: [5, 2, 9]
hidden shape: [3, 2, 16]
Output shape: [5, 2, 16]


In [5]:
## Default hidden state is all zeros if nothing specified:
y, h1 = rnn(X, hidden)
print(h1), print("\n\n")

y, h2 = rnn(X)
print(h2), print("\n\n")

# they're the same! (meaning default=zeros)
print(h1 - h2)


tensor([[[ 0.0553,  0.4692, -0.2992, -0.0185, -0.2966,  0.0512, -0.1573,
          -0.2652, -0.3762, -0.8168,  0.2851,  0.0541,  0.4387,  0.3182,
          -0.1089, -0.0278],
         [-0.0906,  0.4395, -0.2964,  0.0081,  0.1130,  0.4554, -0.0110,
          -0.3426, -0.0050, -0.6435,  0.2563,  0.1115,  0.3900,  0.3407,
          -0.1093,  0.1370]],

        [[ 0.2462, -0.2205, -0.5350, -0.1092, -0.3098,  0.2747,  0.1645,
           0.1644, -0.3411,  0.2375,  0.1095, -0.3378,  0.3213,  0.1506,
          -0.5266, -0.1016],
         [ 0.1683, -0.2819, -0.4678,  0.0744, -0.2742,  0.3269,  0.3241,
          -0.1487, -0.2897,  0.2091,  0.3661, -0.3137,  0.3389,  0.1493,
          -0.4381, -0.1070]],

        [[ 0.1359, -0.8017, -0.1283,  0.0902,  0.0916, -0.3205,  0.2123,
          -0.1447,  0.0589,  0.1687, -0.4721, -0.1765, -0.4288,  0.5419,
           0.0691,  0.0387],
         [ 0.1634, -0.7681, -0.1030, -0.0162,  0.0726, -0.2013,  0.2197,
          -0.2613,  0.1382,  0.1162, -0.3885, -0

In [6]:
# Check out the learned parameters and their sizes
for p in rnn.named_parameters():
    if "weight" in p[0]:
        print(f"{p[0]} has size {list(p[1].shape)}")


weight_ih_l0 has size [16, 9]
weight_hh_l0 has size [16, 16]
weight_ih_l1 has size [16, 16]
weight_hh_l1 has size [16, 16]
weight_ih_l2 has size [16, 16]
weight_hh_l2 has size [16, 16]


In [7]:
# Let's create a DL model class
class RNNnet(nn.Module):
    def __init__(self, input_size, num_hidden, num_layers):
        super().__init__()

        # Store parameters
        self.input_size = input_size
        self.num_hidden = num_hidden
        self.num_layers = num_layers

        # RNN Layer
        self.rnn = nn.RNN(self.input_size, self.num_hidden, self.num_layers)

        # Linear Layer for output
        self.out = nn.Linear(num_hidden, 1)

    def forward(self, x):
        print(f'Input: {list(x.shape)}')

        # Initialize hidden state for first input 
        hidden = torch.zeros(self.num_layers, batchsize, self.num_hidden)

        print(f'Hidden: {list(hidden.shape)}')

        # Run throught the RNN layer
        y, hidden = self.rnn(x, hidden)
        print(f'RNN-out: {list(y.shape)}')
        print(f'RNN-hidden: {list(hidden.shape)}')

        # pass the RNN output throught the linear output layer
        o = self.out(y)
        print(f'Output: {list(o.shape)}')

        return o, hidden


In [8]:
# Create the instance of the model and inspect
net = RNNnet(input_size, hidden_size, num_layers)
print(net), print(' ')

# And check all the learnable parameters
for p in net.named_parameters():
    print(f'{p[0]} has size of : {list(p[1].shape)}')


RNNnet(
  (rnn): RNN(9, 16, num_layers=3)
  (out): Linear(in_features=16, out_features=1, bias=True)
)
 
rnn.weight_ih_l0 has size of : [16, 9]
rnn.weight_hh_l0 has size of : [16, 16]
rnn.bias_ih_l0 has size of : [16]
rnn.bias_hh_l0 has size of : [16]
rnn.weight_ih_l1 has size of : [16, 16]
rnn.weight_hh_l1 has size of : [16, 16]
rnn.bias_ih_l1 has size of : [16]
rnn.bias_hh_l1 has size of : [16]
rnn.weight_ih_l2 has size of : [16, 16]
rnn.weight_hh_l2 has size of : [16, 16]
rnn.bias_ih_l2 has size of : [16]
rnn.bias_hh_l2 has size of : [16]
out.weight has size of : [1, 16]
out.bias has size of : [1]


In [9]:
# Let's test the model with some data
# Create some data
X = torch.rand(seqlength, batchsize, input_size)
y = torch.rand(seqlength, batchsize, 1)

yHat, h = net(X)

# Try a loss function
lossFun = nn.MSELoss()
lossFun(yHat, y)


Input: [5, 2, 9]
Hidden: [3, 2, 16]
RNN-out: [5, 2, 16]
RNN-hidden: [3, 2, 16]
Output: [5, 2, 1]


tensor(0.1724, device='cuda:0', grad_fn=<MseLossBackward0>)