In [22]:
import torch
import torch.nn as nn

# API
# 单向、单层
single_rnn = nn.RNN(3, 4, 1, batch_first = True) 
input = torch.randn(2, 5, 3)  # bs sl fs
input

tensor([[[-0.8920,  0.9633, -0.1245],
         [ 0.5272,  0.4682,  1.5131],
         [ 0.6785, -0.4134,  0.2082],
         [ 1.2367, -0.0935, -1.4511],
         [ 0.0094,  1.8685, -0.7099]],

        [[-1.6764, -0.0644, -0.5363],
         [-0.6172,  1.2719,  0.5550],
         [ 0.6320, -1.0125,  0.5901],
         [ 0.2336,  0.7956,  0.1844],
         [ 1.1883,  1.9161,  0.8269]]])

In [23]:
output, h_n = single_rnn(input)  # h_n是最后一次的输出

In [24]:
output

tensor([[[ 0.4649, -0.5171,  0.0392, -0.8911],
         [ 0.1194, -0.1062, -0.3846, -0.0590],
         [-0.6075,  0.4019, -0.7106, -0.3793],
         [-0.4544,  0.7876, -0.7620, -0.7185],
         [ 0.7835,  0.0475, -0.0622, -0.9318]],

        [[ 0.1549, -0.5887, -0.1472, -0.8536],
         [ 0.6031, -0.4735, -0.0357, -0.7610],
         [-0.6661,  0.3441, -0.6848,  0.2250],
         [ 0.1097,  0.1363, -0.6194, -0.8001],
         [ 0.5773,  0.2415, -0.2417, -0.7165]]], grad_fn=<TransposeBackward1>)

In [26]:
h_n

tensor([[[ 0.7835,  0.0475, -0.0622, -0.9318],
         [ 0.5773,  0.2415, -0.2417, -0.7165]]], grad_fn=<StackBackward0>)

In [37]:
input_1 = torch.randn(1, 4, 3)  # bs sl fs
input_1

tensor([[[ 0.1779, -1.2328, -1.2820],
         [ 0.2657,  1.6019,  0.6330],
         [ 0.6776,  0.3896, -0.2637],
         [ 0.3944, -0.5108,  0.0258]]])

In [38]:
# 双向、单层
bidirectional_rnn = nn.RNN(3, 4, 1, batch_first=True, bidirectional=True)
output_1, h_1 = bidirectional_rnn(input_1)

In [39]:
output_1  # 双向的原因把前后的都加起来了

tensor([[[ 0.4250, -0.3968, -0.7336, -0.5059,  0.4606, -0.9172,  0.0283,
          -0.4111],
         [-0.8447,  0.4888, -0.2943,  0.0176, -0.3975, -0.3857,  0.3244,
          -0.0885],
         [-0.0768, -0.5147, -0.1021, -0.6052, -0.2393, -0.6053,  0.3922,
          -0.1365],
         [-0.1766,  0.0440, -0.5316, -0.6787,  0.1186, -0.7335,  0.0381,
          -0.3112]]], grad_fn=<TransposeBackward1>)

In [40]:
h_1

tensor([[[-0.1766,  0.0440, -0.5316, -0.6787]],

        [[ 0.4606, -0.9172,  0.0283, -0.4111]]], grad_fn=<StackBackward0>)

In [42]:
# 自己实现

bs, T = 2, 3  # batch_size, 序列长度
input_size, hidden_size = 2, 3  # 输入特征大小，隐藏层特征大小即后面输出的列数
input = torch.randn(bs, T, input_size)
h_prev = torch.zeros(bs, hidden_size)

In [95]:
rnn = nn.RNN(input_size, hidden_size, batch_first=True)
rnn_output, state_final = rnn(input, h_prev.unsqueeze(0))

In [121]:
def rnn_forward(intput, weight_ih, weight_hh, bias_ih, bias_hh, h_prev):
    bs, T, input_size = input.shape
    h_dim = weight_ih.shape[0]  # 隐藏层特征大小即后面输出的列数
    h_out = torch.zeros(bs, T, h_dim)  # 存储最后的输出结果
    
    for t in range(T):
        x = input[:, t, :].unsqueeze(2)  # 多增加一个维度才能进行计算 bs input_size 1
        w_ih_batch = weight_ih.unsqueeze(0).tile(bs, 1, 1)  # 增加batch维度 bs h_dim input_size
        w_hh_batch = weight_hh.unsqueeze(0).tile(bs, 1, 1)  # bs h_dim h_dim
        
        w_times_x = torch.bmm(w_ih_batch, x).squeeze(-1) # 不用管batch的维度进行矩阵乘法 bs h_dim
        w_times_h = torch.bmm(w_hh_batch, h_prev.unsqueeze(2)).squeeze(-1) # 减少一个维度使得成行排列 bs h_dim
        h_prev = torch.tanh(w_times_x + bias_ih + w_times_h + bias_hh)
        
        h_out[:, t, :] = h_prev  # 写入每行数据
    return h_out, h_prev.unsqueeze(0)  # 见下图知增加一维
output_1, final_1 = rnn_forward(input, rnn.weight_ih_l0, rnn.weight_hh_l0, rnn.bias_ih_l0, rnn.bias_hh_l0, h_prev)       


![](https://shangxueweilong.oss-cn-guangzhou.aliyuncs.com/20230719192451.png)


In [136]:
# 验证准确性
print("mine：")
print(output_1, final_1)
print("#"*100)
print("API：")
print(rnn_output, state_final)

mine：
tensor([[[ 0.5016, -0.2810, -0.0158],
         [ 0.6392, -0.5014,  0.1027],
         [ 0.3720, -0.2385, -0.0537]],

        [[ 0.6046, -0.3930, -0.0417],
         [ 0.1861,  0.1639, -0.4590],
         [ 0.9104, -0.7714,  0.0761]]], grad_fn=<CopySlices>) tensor([[[ 0.3720, -0.2385, -0.0537],
         [ 0.9104, -0.7714,  0.0761]]], grad_fn=<UnsqueezeBackward0>)
####################################################################################################
API：
tensor([[[ 0.5016, -0.2810, -0.0158],
         [ 0.6392, -0.5014,  0.1027],
         [ 0.3720, -0.2385, -0.0537]],

        [[ 0.6046, -0.3930, -0.0417],
         [ 0.1861,  0.1639, -0.4590],
         [ 0.9104, -0.7714,  0.0761]]], grad_fn=<TransposeBackward1>) tensor([[[ 0.3720, -0.2385, -0.0537],
         [ 0.9104, -0.7714,  0.0761]]], grad_fn=<StackBackward0>)
