# Simple RNN in Pytorch

What included in this notebook:

- Implementation of Elman Network (Simple RNN) with nn.RNNCell and nn.RNN

## Simple RNN networks

To demonstrate how RNN works, we use [nn.RNNCell](https://pytorch.org/docs/stable/generated/torch.nn.RNNCell.html) in Pytorch.

In [None]:
import torch
import torch.nn as nn

device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

class ElmanRNN(nn.Module):
    """ an Elman RNN built using RNNCell """

    def __init__(self, input_size, hidden_size, batch_first=False):
        """
        Args:
            input_size (int): size of the input vectors
            hidden_size (int): size of the hidden state vectors
            batch_first (bool): whether the 0th dimension is batch
        """
        super(ElmanRNN, self).__init__()
        self.rnn_cell = nn.RNNCell(input_size, hidden_size)

        self.batch_first = batch_first
        self.hidden_size = hidden_size

    def _initialize_hidden(self, batch_size):
        return torch.zeros((batch_size, self.hidden_size))

    def forward(self, x_in, initial_hidden=None):
        """The forward pass of the ElmanRNN
        Args:
            x_in (torch.Tensor): an input data tensor.
                If self.batch_first: x_in.shape = (batch_size, seq_size, feat_size)
                Else: x_in.shape = (seq_size, batch_size, feat_size)
            initial_hidden (torch.Tensor): the initial hidden state for the RNN
        Returns:
            hiddens (torch.Tensor): The outputs of the RNN at each time step.
                If self.batch_first:
                    hiddens.shape = (batch_size, seq_size, hidden_size)
                Else: hiddens.shape = (seq_size, batch_size, hidden_size)
        """
        if self.batch_first:
            batch_size, seq_size, feat_size = x_in.size()
            x_in = x_in.permute(1, 0, 2)   # https://pytorch.org/docs/stable/generated/torch.permute.html
        else:
            seq_size, batch_size, feat_size = x_in.size()

        hiddens = []
        if initial_hidden is None:
            initial_hidden = self._initialize_hidden(batch_size)
            initial_hidden = initial_hidden.to(x_in.device)

        hidden_t = initial_hidden
        for t in range(seq_size):
            hidden_t = self.rnn_cell(x_in[t], hidden_t)
            hiddens.append(hidden_t)

        hiddens = torch.stack(hiddens)

        if self.batch_first:
            hiddens = hiddens.permute(1, 0, 2)

        return hiddens

Let create a model

In [None]:
model = ElmanRNN(10, 20, batch_first=True)
# Create a six sequences, each sequence contain 3 vectors with 10 features
input = torch.randn(6, 3, 10)

# Output of the model is 6 sequences of hidden vectors. Each sequence contains
# 3 hidden vectors of 10 dimension
hiddens = model(input)
print(hiddens.size())

torch.Size([6, 3, 20])


We can use [nn.RNN](https://pytorch.org/docs/stable/generated/torch.nn.RNN.html) to demonstrate Elman network.

In [None]:
rnn = nn.RNN(10, 20, batch_first=True)
h0 = torch.zeros((1, 6, 20))
output, hn = rnn(input, h0)
print(output.size())

torch.Size([6, 3, 20])


In [None]:
hn.size()

torch.Size([1, 6, 20])