## 6.2 理解循环神经网络

**代码清单 6-19** RNN伪代码

In [None]:
'''
# t时刻状态
state_t = 0
# 对序列元素进行遍历
for input_t in input_sequence:
    output_t = f(input_t, state_t)
    # 前一次的输出变成下一次迭代的状态
    state_t = output_t
'''

**代码清单 6-20** 更详细的RNN伪代码

In [None]:
'''
state_t = 0
for input_t in input_sequence:
    output_t = activation(dot(W, input_t) + dot(U, state_t) + b)
    state_t = output_t
'''

**代码清单 6-21** 简单RNN的Numpy实现

In [None]:
import numpy as np

timesteps = 100         # 输入序列的时间步
input_features = 32     # 输入特征空间的维度
output_features = 64    # 输出特征空间的维度

# 输入数据：随机噪声，仅作为示例
inputs = np.random.random((timesteps, input_features))

# 初始状态，全零向量
state_t = np.zeros((output_features,))

# 创建随机的权重矩阵
W = np.random.random((output_features, input_features))
U = np.random.random((output_features, output_features))
b = np.random.random((output_features,))

successive_outputs = []
# input_t是形状为(input_features,)的向量
for input_t in inputs:
    # 由输入和当前状态（前一个输出）计算得到当前输出
    output_t = np.tanh(np.dot(W, input_t) + np.dot(U, state_t) + b)

    # 将这个输出保存到一个列表中
    successive_outputs.append(output_t)

    # 更新网络的状态，用于下一个时间步
    state_t = output_t

# 最终输出是一个形状为(timesteps, output_features)的二维张量
final_output_sequence = np.stack(successive_outputs, axis=0)
print(final_output_sequence)

### 6.2.1 Keras中的循环层

In [None]:
from keras.models import Sequential
from keras.layers import Embedding, SimpleRNN

model = Sequential()
model.add(Embedding(10000, 32, input_length=20))
model.add(SimpleRNN(32))
model.summary()

In [None]:
model = Sequential()
model.add(Embedding(10000, 32, input_length=20))
model.add(SimpleRNN(32, return_sequences=True))
model.summary()

In [None]:
model = Sequential()
model.add(Embedding(10000, 32, input_length=20))
model.add(SimpleRNN(32, return_sequences=True))
model.add(SimpleRNN(32, return_sequences=True))
model.add(SimpleRNN(32, return_sequences=True))
# 最后一层仅返回最终输出
model.add(SimpleRNN(32))
model.summary()

**代码清单 6-22** 准备IMDB数据

In [None]:
from keras.datasets import imdb
from keras.preprocessing import sequence

import numpy as np
# save np.load
np_load_old = np.load

# modify the default parameters of np.load
np.load = lambda *a, **k: np_load_old(*a, allow_pickle=True, **k)

# 作为特征的单词个数
max_features = 1000
# 在maxlen个单词之后截断文本（这些单词都属于前max_features个最常见的单词）
maxlen = 500
batch_size = 32

print('Loading data...')
(input_train, y_train), (input_test, y_test) = imdb.load_data(num_words=max_features)

# restore np.load for future normal usage
np.load = np_load_old

print(len(input_train), 'train sequences')
print(len(input_test), 'test sequences')

print('Pad sequences (samples x time')
input_train = sequence.pad_sequences(input_train, maxlen=maxlen)
input_test = sequence.pad_sequences(input_test, maxlen=maxlen)
print('input_train shape:', input_train.shape)
print('input_test shape:', input_test.shape)

**代码清单 6-23** 用Embedding层和SimpleRNN层来训练模型

In [None]:
from keras.models import Sequential
from keras.layers import Embedding, SimpleRNN, Dense

model = Sequential()
model.add(Embedding(max_features, 32, input_length=maxlen))
model.add(SimpleRNN(32))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])
history = model.fit(input_train, y_train,
                    epochs=10,
                    batch_size=128,
                    validation_split=0.2)

**代码清单 6-24** 绘制结果

In [None]:
import matplotlib.pyplot as plt

acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(1, len(acc) + 1)

plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()

plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()

### 6.2.2 理解LSTM层和GRu层
**代码清单 6-25** LSTM架构的详细伪代码（1/2）

In [None]:
'''
output_t = activation(dot(state_t, Uo) + dot(input_t, Wo) + dot(C_t, Vo) + bo)

i_t = activation(dot(state_t, Ui) + dot(input_t, Wi) + bi)
f_t = activation(dot(state_t, Uf) + dot(input_t, Wf) + bf)
k_t = activation(dot(state_t, Uk) + dot(input_t, Wk) + bk)
'''

**代码清单 6-25** LSTM架构的详细伪代码（2/2）

In [None]:
'''
c_t+1 = i_t * k_t + c_t * f_t
'''

### 6.2.3 Keras中一个LSTM的具体例子
**代码清单 6-27** 使用Keras中的LSTM层

In [None]:
from keras.layers import LSTM

model = Sequential()
model.add(Embedding(max_features, 32, input_length=maxlen))
model.add(LSTM(32))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['acc'])
history = model.fit(input_train, y_train,
                    epochs=10,
                    batch_size=128,
                    validation_split=0.2)

In [None]:
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()

plt.figure()

plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()