In [1]:
import tensorflow as tf
import os
import numpy as np

print(tf.__version__)

2.0.0


#### 周杰伦歌词数据:
链接: https://pan.baidu.com/s/1QieFe3iuDlDeyTYe4dRySA 提取码: vwvs

In [2]:
#读取数据
import random
import zipfile

with zipfile.ZipFile('./data/jaychou_lyrics.txt.zip') as zin:
    with zin.open('jaychou_lyrics.txt') as f:
        corpus_chars = f.read().decode('utf-8')

In [3]:
corpus_chars = corpus_chars.replace('\n', ' ').replace('\r', ' ')
corpus_chars = corpus_chars[0:10000]

In [4]:
# 建立字符索引
idx_to_char = list(set(corpus_chars))
char_to_idx = dict([(char, i) for i, char in enumerate(idx_to_char)])
vocab_size = len(char_to_idx)

In [5]:
corpus_indices = [char_to_idx[char] for char in corpus_chars]
sample = corpus_indices[:20]
print('chars:', ''.join([idx_to_char[idx] for idx in sample]))
print('indices:', sample)

chars: 想要有直升机 想要和你飞到宇宙去 想要和
indices: [440, 159, 83, 333, 894, 63, 712, 440, 159, 487, 774, 426, 516, 694, 30, 867, 712, 440, 159, 487]


In [6]:
# one-hot向量
tf.one_hot(np.array([0, 2]), vocab_size)


<tf.Tensor: id=4, shape=(2, 1027), dtype=float32, numpy=
array([[1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.]], dtype=float32)>

In [7]:
def to_onehot(X, size): 
    # X shape: (batch), output shape: (batch, n_class)
    return [tf.one_hot(x, size,dtype=tf.float32) for x in X.T]
X = np.arange(10).reshape((2, 5))
inputs = to_onehot(X, vocab_size)
len(inputs), inputs[0].shape

(5, TensorShape([2, 1027]))

In [8]:
# 从零实现 rnn
# 初始化模型参数，我们依据第二章的结论，使用 Ht = theta(concate(Xt + Ht-1, axis=-1) * W),所以可训练的模型参数是 W，输出矩阵Wo及bh,bo

# 初始化参数有：Ht-1 的维度
vocab_size = 1027
num_inputs, num_hiddens, num_outputs = vocab_size, 256, vocab_size
class Rnn(object):
    def __init__(self, hiden_dim，params=None):
        """
        初始化待训练的参数 variabel
        params = (hidden_weight, weight_out, bn_h, bn_o)
        """
        # 是否加载已有参数
        if params:
            self.hidden_weight, self.weight_out, self.bn_h, self.bn_o = params
        else:
            self.hidden_weight = self._ones(shape=(vocab_size+ num_hiddens, num_hiddens))
            self.weight_out = self._ones(shape=(num_hiddens, vocab_size + num_hiddens))
            self.bn_h = tf.Variable(tf.zeros([1,num_hiddens]), dtype=tf.float32)
            self.bn_o = tf.Variable(tf.zeros([1,vocab_size + num_hiddens]), dtype=tf.float32)
        
    def _ones(self,shape):
        return tf.Variable(tf.random.normal(shape=shape,stddev=0.01,mean=0,dtype=tf.float32))
    
    def net(self,inputs,Ht):
        # 展开做循环计算
        outputs = []
        for X in inputs:
            x = tf.reshape(X,(-1, vocab_size))
            # Ht = tf.tanh(theta(concate(Xt + Ht-1, axis=-1) * W) + bn)
            Ht = tf.tanh(tf.matmul(tf.concat([X,Ht], axis=1), self.hidden_weight) + self.bn_h)
            # 计算输出
            Y= tf.matmul(Ht,self.weight_out) + self.bn_o
            # 存储中间Y的输出,一般的rnn这里设置return_sequences和return_state参数, 
            # return_sequence=True返回多个序列,return_state=True代表返回隐向量
            outputs.append(Y)

        return outputs, Ht

In [9]:
# 初始化一个 hidden_state
def init_rnn_state(batch_size, num_hiddens):
    #随机初始化一个初始值
    return tf.zeros(shape=(batch_size, num_hiddens))

In [11]:
# 测试
state = init_rnn_state(X.shape[0], num_hiddens)
print(state.shape)
inputs = to_onehot(X, vocab_size)
print(len(inputs))# 5
print(inputs[0].shape)
rnn = Rnn(num_hiddens)
outputs, state_new = rnn.net(inputs, state)
print(len(outputs), outputs[0].shape, state_new.shape) 

(2, 256)
5
(2, 1027)
5 (2, 1283) (2, 256)
