# Objective

- s1: Implement RNN **with** Default Pytorch `nn.RNN` module
- s2: Implement RNN manually **without** `nn.RNN` module

- s3: Pass same weight of default weight and bias of s1 to s2.
- s4: pass same input_x to both s1 and s2

- s5: Compare RNN output of **1 forward pass**, taking care of sequence length, for both `s1` and `s2`

- [PYTORCH](https://www.deeplearningwizard.com/deep_learning/practical_pytorch/pytorch_recurrent_neuralnetwork/)

In [44]:
import torch
import torch.nn as nn
torch.manual_seed(2)

<torch._C.Generator at 0x104a7cf50>

### Checking documentation from `pytorch/nn.RNN`

In [2]:

input_size = 4
hidden_size = 3
n_layers = 1
seq_len = 5 #*****
batch_sz = 1  #***

torch.manual_seed(2)
rnn = torch.nn.RNN(input_size, hidden_size, n_layers)
input_x = torch.randn(seq_len, batch_sz, input_size)
h0 = torch.randn(n_layers, batch_sz, hidden_size)
ht, hn = rnn(input_x, h0)

#### Input_X

The following input is passed to all the comparing RNN architecture

In [3]:
input_x

tensor([[[-0.0919, -0.1320, -0.2751, -0.2350]],

        [[-1.2034, -1.0190,  0.3157, -1.6036]],

        [[ 1.8493,  0.0447,  1.5853, -0.5912]],

        [[ 0.1694,  0.7562, -1.2023, -0.5833]],

        [[-0.4407, -1.9791,  0.7787, -0.7749]]])

In [4]:
input_x.shape

torch.Size([5, 1, 4])

## S1: RNN with `nn.RNN`

In [24]:
class RNN_with_rnn_module(nn.Module):
    def __init__(self,input_size,output_size,hidden_dim,n_layers):
        super(RNN_with_rnn_module,self).__init__()
        self.hidden_dim=hidden_dim
        self.n_layers=n_layers
        self.rnn=nn.RNN(input_size,hidden_dim,n_layers)

    def forward(self,x):
        hidden=torch.Tensor([[[0.5602, 0.9671, 0.2931]]]) #self.init_hidden(batch_size)
        out,hidden=self.rnn(x,hidden)
        return out,hidden
    
    def init_hidden(self,batch_size):
        hidden=torch.zeros(self.n_layers,batch_size,self.hidden_dim)
        hidden=torch.Tensor([[[0.5602, 0.9671, 0.2931]]])
        return hidden

In [25]:
torch.manual_seed(2)
rnn_with_module=RNN_with_rnn_module(input_size=4,output_size=4,hidden_dim=3,n_layers=1)
print (list(rnn_with_module.named_parameters()))

[('rnn.weight_ih_l0', Parameter containing:
tensor([[ 0.1324, -0.1374,  0.1583, -0.0295],
        [ 0.2466,  0.1375, -0.0664, -0.4668],
        [ 0.1318, -0.5112,  0.0759,  0.0384]], requires_grad=True)), ('rnn.weight_hh_l0', Parameter containing:
tensor([[-0.1270,  0.4721,  0.0385],
        [ 0.2394,  0.2443, -0.3406],
        [-0.2220,  0.5552, -0.5655]], requires_grad=True)), ('rnn.bias_ih_l0', Parameter containing:
tensor([-0.0392, -0.0458,  0.4095], requires_grad=True)), ('rnn.bias_hh_l0', Parameter containing:
tensor([-0.0549,  0.1520, -0.0277], requires_grad=True))]


**Comparing with the above weight with below**

```py
[('linear_ip.weight', Parameter containing:
tensor([[ 0.1324, -0.1374,  0.1583, -0.0295],
        [ 0.2466,  0.1375, -0.0664, -0.4668],
        [ 0.1318, -0.5112,  0.0759,  0.0384]], requires_grad=True)), ('linear_ip.bias', Parameter containing:
tensor([-0.0392, -0.0458,  0.4095], requires_grad=True)), ('linear_hidden.weight', Parameter containing:
tensor([[-0.1270,  0.4721,  0.0385],
        [ 0.2394,  0.2443, -0.3406],
        [-0.2220,  0.5552, -0.5655]], requires_grad=True)), ('linear_hidden.bias', Parameter containing:
tensor([-0.0549,  0.1520, -0.0277], requires_grad=True))]
```

### S1: After 1st forward pass

In [26]:
## Sending as a batch
out, hidden_out=rnn_with_module(input_x)

print(out)
print(hidden_out)

tensor([[[ 0.2655,  0.4333,  0.5745]],

        [[ 0.1750,  0.3546,  0.5104]],

        [[ 0.5212,  0.6005,  0.5152]],

        [[-0.1111,  0.6044, -0.1680]],

        [[ 0.5069,  0.2106,  0.9488]]], grad_fn=<StackBackward>)
tensor([[[0.5069, 0.2106, 0.9488]]], grad_fn=<StackBackward>)


## S2: RNN Manual implementation with n cell where `n=seq_len`

Rememeber, while passing the hidden state to the next cell, make sure it's passed after the activation applied on it. Check line 23, 27, 28 to see how the h_n passed from one cell after another.

![image](https://datascience-enthusiast.com/figures/rnn.png)

**Note:** For the below architecture, consider $a_t$ (from above image) is equal to $h_n$

- [source_blog](https://datascience-enthusiast.com/DL/Building_a_Recurrent_Neural_Network-Step_by_Step_v1.html)



In [33]:
class RNNIMPLEMENT_multiple_cell(nn.Module):
    def __init__(self,input_dim,hidden_dim,seq_len,output_size):
        super().__init__()
        self.seq_len = seq_len
        self.linear_ip=nn.Linear(input_dim,hidden_dim)
        self.linear_ip.weight.data=torch.Tensor([[ 0.1324, -0.1374,  0.1583, -0.0295],
                                                    [ 0.2466,  0.1375, -0.0664, -0.4668],
                                                    [ 0.1318, -0.5112,  0.0759,  0.0384]])
        self.linear_ip.bias.data=torch.Tensor([-0.0392, -0.0458,  0.4095])
               
        self.linear_hidden=nn.Linear(hidden_dim, hidden_dim)
        self.linear_hidden.weight.data=torch.Tensor([[-0.1270,  0.4721,  0.0385],
                                                        [ 0.2394,  0.2443, -0.3406],
                                                        [-0.2220,  0.5552, -0.5655]])
        self.linear_hidden.bias.data=torch.Tensor([-0.0549,  0.1520, -0.0277])
    
        self.activation=torch.tanh
    def forward(self,x):
        """
        x: [seq_len, input_dim]
        """
        
        h_n=torch.Tensor([[[0.5602, 0.9671, 0.2931]]])
        output = []
        for i in range(self.seq_len):
            linear_output=self.linear_ip(x[i])
            hidden_output_interim=self.linear_hidden(h_n)
            h_n=self.activation(linear_output+hidden_output_interim)
            output.append(h_n)
        
        return output, h_n

### Checking if the weights are same with S1

In [34]:
torch.manual_seed(2)
model_rnn_mc=RNNIMPLEMENT_multiple_cell(4,3,5,1)
print (list(model_rnn_2.named_parameters()))

[('linear_ip.weight', Parameter containing:
tensor([[ 0.1324, -0.1374,  0.1583, -0.0295],
        [ 0.2466,  0.1375, -0.0664, -0.4668],
        [ 0.1318, -0.5112,  0.0759,  0.0384]], requires_grad=True)), ('linear_ip.bias', Parameter containing:
tensor([-0.0392, -0.0458,  0.4095], requires_grad=True)), ('linear_hidden.weight', Parameter containing:
tensor([[-0.1270,  0.4721,  0.0385],
        [ 0.2394,  0.2443, -0.3406],
        [-0.2220,  0.5552, -0.5655]], requires_grad=True)), ('linear_hidden.bias', Parameter containing:
tensor([-0.0549,  0.1520, -0.0277], requires_grad=True))]


In [None]:
input_x

### S2: After 1st forward pass

In [41]:
model_rnn_mc(input_x)

([tensor([[[0.2655, 0.4332, 0.5744]]], grad_fn=<TanhBackward>),
  tensor([[[0.1750, 0.3546, 0.5104]]], grad_fn=<TanhBackward>),
  tensor([[[0.5211, 0.6004, 0.5151]]], grad_fn=<TanhBackward>),
  tensor([[[-0.1111,  0.6044, -0.1681]]], grad_fn=<TanhBackward>),
  tensor([[[0.5069, 0.2105, 0.9489]]], grad_fn=<TanhBackward>)],
 tensor([[[0.5069, 0.2105, 0.9489]]], grad_fn=<TanhBackward>))

### S1. After 1st forward pass

In [43]:
rnn_with_module(input_x)

(tensor([[[ 0.2655,  0.4333,  0.5745]],
 
         [[ 0.1750,  0.3546,  0.5104]],
 
         [[ 0.5212,  0.6005,  0.5152]],
 
         [[-0.1111,  0.6044, -0.1680]],
 
         [[ 0.5069,  0.2106,  0.9488]]], grad_fn=<StackBackward>),
 tensor([[[0.5069, 0.2106, 0.9488]]], grad_fn=<StackBackward>))

### Remark:

As you can see both `model_rnn_mc(input_x)` and `rnn_with_module(input_x)` giving the same output.

# Helper Code

## RNN without `nn.RNN` with single cell only

In [36]:
class RNNIMPLEMENT_1(nn.Module):
    def __init__(self,input_dim,hidden_dim,output_size):
        super().__init__()
        self.linear_ip=nn.Linear(input_dim,hidden_dim)
        self.linear_hidden=nn.Linear(hidden_dim, hidden_dim)
        self.activation=torch.tanh
    def forward(self,x):
        h0=torch.Tensor([[[0.5602, 0.9671, 0.2931]]])
        linear_output=self.linear_ip(x)
        hidden_output=self.linear_hidden(h0)
        
        h_n=self.activation(linear_output+hidden_output)
        return h_n

In [37]:
torch.manual_seed(2)
model_rnn_1=RNNIMPLEMENT_1(4,3,1)
print (list(model_rnn_1.named_parameters()))

[('linear_ip.weight', Parameter containing:
tensor([[ 0.1147, -0.1190,  0.1371, -0.0255],
        [ 0.2136,  0.1190, -0.0575, -0.4042],
        [ 0.1142, -0.4427,  0.0657,  0.0332]], requires_grad=True)), ('linear_ip.bias', Parameter containing:
tensor([-0.1099,  0.4088,  0.0334], requires_grad=True)), ('linear_hidden.weight', Parameter containing:
tensor([[ 0.2394,  0.2443, -0.3406],
        [-0.2220,  0.5552, -0.5655],
        [-0.0392, -0.0458,  0.4095]], requires_grad=True)), ('linear_hidden.bias', Parameter containing:
tensor([-0.0549,  0.1520, -0.0277], requires_grad=True))]


### Same Manual Implementation, without `nn.RNN`, with default weight

In [38]:
class RNNIMPLEMENT_single_cell(nn.Module):
    def __init__(self,input_dim,hidden_dim,output_size):
        super().__init__()
        self.linear_ip=nn.Linear(input_dim,hidden_dim)
        self.linear_ip.weight.data=torch.Tensor([[ 0.1324, -0.1374,  0.1583, -0.0295],
                                                    [ 0.2466,  0.1375, -0.0664, -0.4668],
                                                    [ 0.1318, -0.5112,  0.0759,  0.0384]])
        self.linear_ip.bias.data=torch.Tensor([-0.0392, -0.0458,  0.4095])
               
        self.linear_hidden=nn.Linear(hidden_dim, hidden_dim)
        self.linear_hidden.weight.data=torch.Tensor([[-0.1270,  0.4721,  0.0385],
                                                        [ 0.2394,  0.2443, -0.3406],
                                                        [-0.2220,  0.5552, -0.5655]])
        self.linear_hidden.bias.data=torch.Tensor([-0.0549,  0.1520, -0.0277])
    
        self.activation=torch.tanh
    def forward(self,x):
        h0=torch.Tensor([[[0.5602, 0.9671, 0.2931]]])
        linear_output=self.linear_ip(x)
        
        h_n=self.activation(linear_output+hidden_output)
        return h_n

In [40]:
model_rnn_2=RNNIMPLEMENT_single_cell(4,3,1)
print (list(model_rnn_2.named_parameters()))

# input_x=torch.Tensor([[[ 0.0219, -0.3409, -1.1657,  0.8022]]])
# model_rnn_2(input_x[0])

[('linear_ip.weight', Parameter containing:
tensor([[ 0.1324, -0.1374,  0.1583, -0.0295],
        [ 0.2466,  0.1375, -0.0664, -0.4668],
        [ 0.1318, -0.5112,  0.0759,  0.0384]], requires_grad=True)), ('linear_ip.bias', Parameter containing:
tensor([-0.0392, -0.0458,  0.4095], requires_grad=True)), ('linear_hidden.weight', Parameter containing:
tensor([[-0.1270,  0.4721,  0.0385],
        [ 0.2394,  0.2443, -0.3406],
        [-0.2220,  0.5552, -0.5655]], requires_grad=True)), ('linear_hidden.bias', Parameter containing:
tensor([-0.0549,  0.1520, -0.0277], requires_grad=True))]


## Cross Checking with linear followed by tan (h)

In [45]:
class RNNMODEL(nn.Module):
    def __init__(self,input_size,output_size,hidden_dim,n_layers):
        super(RNNMODEL,self).__init__()
        self.hidden_dim=hidden_dim
        self.n_layers=n_layers
        self.rnn=nn.RNN(input_size,hidden_dim,n_layers,batch_first=True,bias=True)
        self.fc=nn.Linear(hidden_dim,output_size)
    def forward(self,x):
        h0=torch.Tensor([[[0.5602, 0.9671, 0.2931]]])
        out,hidden=self.rnn(x,h0)
        return out,hidden

torch.manual_seed(2)
RNNM=RNNMODEL(4,1,3,1)

print(list(RNNM.named_parameters()))

[('rnn.weight_ih_l0', Parameter containing:
tensor([[ 0.1324, -0.1374,  0.1583, -0.0295],
        [ 0.2466,  0.1375, -0.0664, -0.4668],
        [ 0.1318, -0.5112,  0.0759,  0.0384]], requires_grad=True)), ('rnn.weight_hh_l0', Parameter containing:
tensor([[-0.1270,  0.4721,  0.0385],
        [ 0.2394,  0.2443, -0.3406],
        [-0.2220,  0.5552, -0.5655]], requires_grad=True)), ('rnn.bias_ih_l0', Parameter containing:
tensor([-0.0392, -0.0458,  0.4095], requires_grad=True)), ('rnn.bias_hh_l0', Parameter containing:
tensor([-0.0549,  0.1520, -0.0277], requires_grad=True)), ('fc.weight', Parameter containing:
tensor([[-0.3233, -0.3272, -0.2805]], requires_grad=True)), ('fc.bias', Parameter containing:
tensor([-0.5245], requires_grad=True))]
