In [6]:
import torch
import torch.nn as nn

torch.manual_seed(1)
rnn_layer = nn.RNN(input_size=5, hidden_size=2, num_layers=1, batch_first=True)
W_xh = rnn_layer.weight_ih_l0
print(f'W_xh {W_xh}\n')
b_xh = rnn_layer.bias_ih_l0
print(f'b_xh {b_xh}\n')

W_hh = rnn_layer.weight_hh_l0
print(f'W_hh {W_hh}\n')
b_hh = rnn_layer.bias_hh_l0
print(f'b_hh {b_hh}\n')

W_xh Parameter containing:
tensor([[ 0.3643, -0.3121, -0.1371,  0.3319, -0.6657],
        [ 0.4241, -0.1455,  0.3597,  0.0983, -0.0866]], requires_grad=True)

b_xh Parameter containing:
tensor([-0.0516, -0.0637], requires_grad=True)

W_hh Parameter containing:
tensor([[ 0.1961,  0.0349],
        [ 0.2583, -0.2756]], requires_grad=True)

b_hh Parameter containing:
tensor([ 0.1025, -0.0028], requires_grad=True)



In [35]:
x_seq = torch.tensor([[1.0] * 5, [2.0] * 5, [3.0] * 5]).float()
print(f'x_seq:\n{x_seq}\n')

out_seq, hidden_states = rnn_layer(x_seq)

seq_length = len(x_seq)  # T
output = []
for t in range(seq_length):
    x_t = x_seq[t]
    print(f'Time step {t} =>')
    print(f'    Input           : {x_t.numpy()}')
    
    h_t = W_xh.matmul(x_t) + b_xh
    print(f'    Hidden          : {h_t.detach().numpy()}')

    h_prev = output[t - 1] if t > 0 else torch.zeros_like(h_t)
    h_t += W_hh.matmul(h_prev) + b_hh
    o_t = torch.tanh(h_t)
    output.append(o_t)
    print(f'    Output          : {o_t.detach().numpy()}')
    print(f'    RNN output      : {out_seq[t].detach().numpy()}')

x_seq:
tensor([[1., 1., 1., 1., 1.],
        [2., 2., 2., 2., 2.],
        [3., 3., 3., 3., 3.]])

Time step 0 =>
    Input           : [1. 1. 1. 1. 1.]
    Hidden          : [-0.4701929  0.5863904]
    Output          : [-0.3519801   0.52525216]
    RNN output      : [-0.3519801   0.52525216]
Time step 1 =>
    Input           : [2. 2. 2. 2. 2.]
    Hidden          : [-0.88883156  1.2364397 ]
    Output          : [-0.68424344  0.76074266]
    RNN output      : [-0.68424344  0.76074266]
Time step 2 =>
    Input           : [3. 3. 3. 3. 3.]
    Hidden          : [-1.3074701  1.886489 ]
    Output          : [-0.8649416   0.90466356]
    RNN output      : [-0.8649416   0.90466356]


In [33]:
hidden_states

tensor([[-0.8649,  0.9047]], grad_fn=<SqueezeBackward1>)

In [34]:
out_seq

tensor([[-0.3520,  0.5253],
        [-0.6842,  0.7607],
        [-0.8649,  0.9047]], grad_fn=<SqueezeBackward1>)