In [1]:
# Imports
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F


from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, Subset, TensorDataset

from torchsummary import summary
import torchvision as tv
import torchvision.transforms as T
import copy

import matplotlib.pyplot as plt
import matplotlib_inline.backend_inline as backend_inline

backend_inline.set_matplotlib_formats("svg")


#### Pytorch device specific configuration ###

# Pytorch Gpu Configuration for Cuda
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# # Pytorch Gpu Configuration for directml(AMD GPU)
# import torch_directml

# device = torch_directml.device()

# Set default device
torch.set_default_device(device)


In [2]:
# Set layer parameters
input_size = 9  # Number of features to extract
hidden_size = 16 # Number of units in the hidden state
num_layers = 1 # Number of vertical stack to hidden layers ( Note: Only final layer gives the output)

actfun = 'tanh'
bias = True

# Create RNN instance
rnn = nn.RNN(input_size, hidden_size, num_layers, nonlinearity=actfun, bias=bias)
print(rnn)


RNN(9, 16)


In [3]:
# Let's inspect nn.RNN class
??nn.RNN


[0;31mInit signature:[0m [0mnn[0m[0;34m.[0m[0mRNN[0m[0;34m([0m[0;34m*[0m[0margs[0m[0;34m,[0m [0;34m**[0m[0mkwargs[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mSource:[0m        
[0;32mclass[0m [0mRNN[0m[0;34m([0m[0mRNNBase[0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m    [0;34mr"""__init__(input_size,hidden_size,num_layers=1,nonlinearity='tanh',bias=True,batch_first=False,dropout=0.0,bidirectional=False,device=None,dtype=None)[0m
[0;34m[0m
[0;34m    Apply a multi-layer Elman RNN with :math:`\tanh` or :math:`\text{ReLU}`[0m
[0;34m    non-linearity to an input sequence. For each element in the input sequence,[0m
[0;34m    each layer computes the following function:[0m
[0;34m[0m
[0;34m    .. math::[0m
[0;34m        h_t = \tanh(x_t W_{ih}^T + b_{ih} + h_{t-1}W_{hh}^T + b_{hh})[0m
[0;34m[0m
[0;34m    where :math:`h_t` is the hidden state at time `t`, :math:`x_t` is[0m
[0;34m    the input at time `t`, and :math:`h_{(t-1)}` is the hidde

In [4]:
# Set data parameters
seqlength = 5
batchsize = 2

# Create some data
X = torch.rand(seqlength, batchsize, input_size)

# create a hidden layer ( typically initialized as zeros)
hidden = torch.zeros(num_layers, batchsize, hidden_size)

# Run some data through the model and show the output size
y, h = rnn(X, hidden)

print(f'Input shape: {list(X.shape)}')
print(f'hidden shape: {list(h.shape)}')
print(f'Output shape: {list(y.shape)}')


Input shape: [5, 2, 9]
hidden shape: [1, 2, 16]
Output shape: [5, 2, 16]
