### 1. Simple RNN

<!-- ![simple_rnn.png](simple_rnn.png) -->
<img src="simple_rnn.png" width=50%>

In [17]:
import torch
from torch import nn

In [23]:
# 数据结构：[seq_len, batch_size, embedding_dim]
# seq_len:序列的长度，也就是这句话有多长
# batch_size:有几句话，也就是有几个批次
# embedding_dim:嵌入维度，也就是每个词是多大的向量（维度）
X = torch.randn(70, 3, 256)
h0 = torch.zeros(1, 3, 512, dtype=torch.float32)
X.shape, h0.shape

(torch.Size([70, 3, 256]), torch.Size([1, 3, 512]))

In [24]:
# 构建循环神经网络
""" 
    input_size:特征的维度
    hidden_size：输出的维度
"""
rnn = nn.RNN(input_size=256, hidden_size=512)

In [25]:
# 调用RNN
out, hn = rnn(X, h0)

In [26]:
# [seq_len, batch_size, hidden_size] 所有步
out.shape

torch.Size([70, 3, 512])

In [27]:
# [seq_len, batch_size, hidden_size] 最后一步
hn.shape

torch.Size([1, 3, 512])

### 2. LSTM 长短期记忆网络(资料：https://colah.github.io/posts/2015-08-Understanding-LSTMs/)
- Long 长
- Short 短
- Term 期
- Memory 记忆
- 序列很长的时候，RNN就会丢失，跟熊瞎子掰苞米一样，所以需要LSTM

<img src="lstm.png" width=60%>

In [36]:
# 全自动
# Inputs: input, (h_0, c_0)
# Outputs: output, (h_n, c_n)
lstm = nn.LSTM(input_size=256, hidden_size=512)

In [37]:
# 准备数据
X = torch.randn(70, 3, 256)
h0 = torch.zeros(1, 3, 512, dtype=torch.float32)
c0 = torch.zeros(1, 3, 512, dtype=torch.float32)

In [38]:
out, (hn, cn) = lstm(X, (h0, c0))

In [39]:
out.shape

torch.Size([70, 3, 512])

In [40]:
hn.shape

torch.Size([1, 3, 512])

In [41]:
cn.shape

torch.Size([1, 3, 512])

In [46]:
# 单步
# Inputs: input, (h_0, c_0)
# Outputs: (h_1, c_1)
lstm_cell = nn.LSTMCell(input_size=256, hidden_size=512)

In [47]:
X = torch.randn(70, 3, 256)
h0 = torch.zeros(3, 512, dtype=torch.float32)
c0 = torch.zeros(3, 512, dtype=torch.float32)

In [48]:
# 取出一步的数据
X0 = X[0, :, :]

In [49]:
X0.shape

torch.Size([3, 256])

In [50]:
hn, cn = lstm_cell(X0, (h0, c0))

In [51]:
hn.shape

torch.Size([3, 512])

In [52]:
cn.shape

torch.Size([3, 512])

In [57]:
X.size(0)

70

In [58]:
out = []
for x in X:
    h0, c0 = lstm_cell(x, (h0, c0))
    out.append(h0)

In [59]:
# 最终所有步的短期状态
out = torch.stack(tensors=out, dim=0)

In [60]:
# 最后一步的状态
hn = h0
cn = c0

### GRU 
- 核心思想：吃LSTM的红利，化简LSTM!!
- 调用层面：跟Simple RNN是一样的

In [24]:
gru = nn.GRU(input_size=256, hidden_size=512)

In [25]:
X = torch.randn(70, 3, 256)
h0 = torch.zeros(1, 3, 512, dtype=torch.float32)

In [26]:
out, hn = gru(X, h0)

In [27]:
out.shape

torch.Size([70, 3, 512])

In [28]:
hn.shape

torch.Size([1, 3, 512])

In [29]:
nn.GRUCell

torch.nn.modules.rnn.GRUCell