In [None]:
"""
LSTM
LSTM 中引入了3个门，即输入门（input gate）、遗忘门（forget gate）和输出门（output gate），
以及与隐藏状态形状相同的记忆细胞（某些文献把记忆细胞当成一种特殊的隐藏状态），从而记录额外的信息。

"""

In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import backend as f
import numpy as np
import math
import sys
import time
sys.path.append("..") 
import d2lzh_tensorflow2 as d2l

In [2]:
(corpus_indices, char_to_idx, idx_to_char, vocab_size) = d2l.load_data_jay_lyrics(
    "../data/jaychou_lyrics.txt.zip")

In [12]:
# 试验 LSTMCell 
num_hiddens=256
cell=keras.layers.LSTMCell(num_hiddens,kernel_initializer='glorot_uniform')
batch_size = 2
state = cell.get_initial_state(batch_size=batch_size,dtype=tf.float32) # 初始化的隐藏状态列表
state[0].shape  # 注意: state是一个tensor list，分别是 h_state和c_state

TensorShape([2, 256])

In [15]:
rnn_layer = keras.layers.LSTM(num_hiddens,
                             time_major=True, # 含义在doc中没找到,其含义可能是
                             return_sequences=True,
                             return_state=True # state是每个rnn单元输出的隐藏层状态，用于下一个单元的输入
                            )

In [30]:
"""
注意：LSTM的输入state有两个，一个是c_state，另一个是h_state
c_state是从记忆细胞产生出来的, h_state是rnn的隐藏状态，与其他rnn模型类似
"""
num_steps = 23
X = tf.random.uniform(shape=(num_steps, batch_size, vocab_size)) # num_steps是使用多少个RNNcell来构造模型
Y, h_state, c_state = rnn_layer(X)  # 因为batch_size是2，所以state_new长度是2
# Y.shape, len(state_new), state_new[0].shape, state_new[1].shape
# 注意 Y的维度是(num_steps, batch_size, num_hidden) state_new的维度是 (batch_size, num_hidden)

In [33]:
Y.shape, memory_state.shape, carry_state.shape

(TensorShape([23, 2, 256]), TensorShape([2, 256]), TensorShape([2, 256]))

In [34]:
Y[-1,:,:] == memory_state

<tf.Tensor: id=7452, shape=(2, 256), dtype=bool, numpy=
array([[ True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,

In [35]:
Y[-1,:,:] == carry_state

<tf.Tensor: id=7457, shape=(2, 256), dtype=bool, numpy=
array([[False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False,
        False, False,

In [42]:
"""
由于LSTM的输出state包含两个值，所以RNNModel需要修改一下
"""
class RNNModel(keras.layers.Layer):
    def __init__(self, rnn_layer, vocab_size, **kwargs):
        super(RNNModel, self).__init__(**kwargs)
        self.rnn = rnn_layer
        self.vocab_size = vocab_size
        self.dense = keras.layers.Dense(vocab_size)

    def call(self, inputs, state):
        # 将输入转置成(num_steps, batch_size)后获取one-hot向量表示
        X = tf.one_hot(tf.transpose(inputs), self.vocab_size)
        Y, h_state, c_state = self.rnn(X, initial_state=state) # 注意：输出有三个
        
        # 全连接层会首先将Y的形状变成(num_steps * batch_size, num_hiddens)，它的输出
        # 形状为(num_steps * batch_size, vocab_size)
        output = self.dense(tf.reshape(Y,(-1, Y.shape[-1])))
        return output, [h_state, c_state]  # 这里需要把两个state合并成一个

    def get_initial_state(self, *args, **kwargs):
        return self.rnn.cell.get_initial_state(*args, **kwargs)

In [43]:
# 没训练之前，输出预测歌词的结果
model = RNNModel(rnn_layer, vocab_size)
d2l.predict_rnn_keras('分开', 10, model, vocab_size,  idx_to_char, char_to_idx)

'分开队队招队招拜延墓当否'

In [45]:
# 训练多轮，每隔N轮输出预测结果
num_epochs, batch_size, lr, clipping_theta = 250, 32, 1e-2, 1e-2
pred_period, pred_len, prefixes = 50, 50, ['分开', '不分开']
d2l.train_and_predict_rnn_keras(model, num_hiddens, vocab_size, 
                            corpus_indices, idx_to_char, char_to_idx,
                            num_epochs, num_steps, lr, clipping_theta,
                            batch_size, pred_period, pred_len, prefixes)

epoch 50, perplexity 20214.046207, time 3.19 sec
 - 分开  命  命刻  刻  刻 失   刻  失   刻  刻  失  刻   刻  失  刻   刻 
 - 不分开  命  命刻  刻   刻 失   刻  刻  失   刻  刻  失  刻   刻  失  刻 
epoch 100, perplexity 11898.305080, time 2.99 sec
 - 分开                                                  
 - 不分开                                                  
epoch 150, perplexity 8787.728522, time 3.04 sec
 - 分开                                                  
 - 不分开                                                  
epoch 200, perplexity 6918.206943, time 3.06 sec
 - 分开                                                  
 - 不分开                                                  
epoch 250, perplexity 6084.739069, time 3.04 sec
 - 分开                                                  
 - 不分开                                                  


In [None]:
"""
好像不太对，没学到东西。。。
"""