### RNN Basics
#### Code Source : https://github.com/deeplearningzerotoall/PyTorch

In [149]:
import torch
import tensorflow as tf
import numpy as np

In [150]:
# Random seed to make results deterministic and reproducible
torch.manual_seed(0)

<torch._C.Generator at 0x7f73f8fd0b10>

In [151]:
# declare dimension
input_size = 4
hidden_size = 2

In [152]:
# singleton example
# shape : (1, 1, 4)
# input_data_np = np.array([[[1, 0, 0, 0]]])

# sequential example
# shape : (3, 5, 4) = (batch size, sequence size, input size)
h = [1, 0, 0, 0]
e = [0, 1, 0, 0]
l = [0, 0, 1, 0]
o = [0, 0, 0, 1]
input_data_np = np.array([[h, e, l, l, o], [e, o, l, l, l], [l, l, e, e, l]], dtype=np.float32)

### PyTorch RNN : RNNCell

In [153]:
# transform as torch tensor
input_data = torch.Tensor(input_data_np)

In [154]:
# declare RNN
rnn = torch.nn.RNN(input_size, hidden_size)

In [155]:
# check output
outputs, state = rnn(input_data)

In [156]:
print(outputs.size())
print(outputs)

torch.Size([3, 5, 2])
tensor([[[-0.7497, -0.6135],
         [-0.5282, -0.2473],
         [-0.9136, -0.4269],
         [-0.9136, -0.4269],
         [-0.9028,  0.1180]],

        [[-0.5753, -0.0070],
         [-0.9052,  0.2597],
         [-0.9173, -0.1989],
         [-0.9173, -0.1989],
         [-0.8996, -0.2725]],

        [[-0.9077, -0.3205],
         [-0.8944, -0.2902],
         [-0.5134, -0.0288],
         [-0.5134, -0.0288],
         [-0.9127, -0.2222]]], grad_fn=<StackBackward>)


### Final hidden state

In [157]:
print(state.size())
print(state)

torch.Size([1, 5, 2])
tensor([[[-0.9077, -0.3205],
         [-0.8944, -0.2902],
         [-0.5134, -0.0288],
         [-0.5134, -0.0288],
         [-0.9127, -0.2222]]], grad_fn=<StackBackward>)


#### print weights : W_ih, W_hh

In [158]:
print(rnn.weight_ih_l0)
print(rnn.weight_hh_l0)

Parameter containing:
tensor([[-0.0053,  0.3793, -0.5820, -0.5204],
        [-0.2723,  0.1896, -0.0140,  0.5607]], requires_grad=True)
Parameter containing:
tensor([[-0.0628,  0.1871],
        [-0.2137, -0.1390]], requires_grad=True)


In [159]:
for name in rnn.named_parameters():
    if 'weight' in name[0]:
        print(name[0], name[1])

weight_ih_l0 Parameter containing:
tensor([[-0.0053,  0.3793, -0.5820, -0.5204],
        [-0.2723,  0.1896, -0.0140,  0.5607]], requires_grad=True)
weight_hh_l0 Parameter containing:
tensor([[-0.0628,  0.1871],
        [-0.2137, -0.1390]], requires_grad=True)


### TensorFlow/Keras RNN

In [160]:
simple_rnn = tf.keras.layers.SimpleRNN(2)

In [161]:
output = simple_rnn(input_data_np)  # The output has shape `[3, 2]`.
print(output.shape)
print(output)

(3, 2)
tf.Tensor(
[[ 0.63927287 -0.9321795 ]
 [ 0.9366004  -0.9246495 ]
 [ 0.36197236 -0.9104857 ]], shape=(3, 2), dtype=float32)


In [162]:
simple_rnn = tf.keras.layers.SimpleRNN(
    2, return_sequences=True, return_state=True)

# whole_sequence_output has shape `[3, 5, 2]`.
# final_state has shape `[5, 2]`.
output, state = simple_rnn(input_data_np)

In [163]:
print(output.shape)
print(output)

(3, 5, 2)
tf.Tensor(
[[[ 0.41671875  0.75149643]
  [-0.01093186 -0.0412939 ]
  [-0.23885903 -0.61019343]
  [-0.02947876 -0.13416067]
  [ 0.589034    0.5193354 ]]

 [[ 0.36916515  0.6169435 ]
  [ 0.2875167  -0.18302734]
  [-0.4970336  -0.51885283]
  [ 0.22681062 -0.21654943]
  [-0.45062372 -0.49282563]]

 [[-0.24819823 -0.6356118 ]
  [-0.02076373 -0.10890364]
  [ 0.38470918  0.6801698 ]
  [ 0.01933733  0.03075416]
  [-0.2655577  -0.6538473 ]]], shape=(3, 5, 2), dtype=float32)


### Final hidden state

In [164]:
print(state.shape)
print(state)

(3, 2)
tf.Tensor(
[[ 0.589034    0.5193354 ]
 [-0.45062372 -0.49282563]
 [-0.2655577  -0.6538473 ]], shape=(3, 2), dtype=float32)
