## Statement
### This notebook code is based on Python3 and tensorflow 1.3.0

In [1]:
import os
import sys
import time

import numpy as np
import tensorflow as tf
from tensorflow.contrib.tensorboard.plugins import projector
from tensorflow.contrib import rnn as rnn_cell
from tensorflow.contrib import legacy_seq2seq as seq2seq

### Class include the variables we might use

In [2]:
# You can write this code to a py.file called config.
class Config():
    
    batch_size = 32
    n_epoch = 100
    # modify the learning_rate through time
    learning_rate = 0.01
    decay_steps = 1000
    decay_rate = 0.9
    # grad_clip incase of gradient explosion
    grad_clip = 5
    # prob of dropout
    keep_prob = 0.5

    # state_size is hidden_size in BasicLSTMCell 
    # size of hidden layer of neurons
    state_size = 100
    # number of RNN layers
    num_layers = 3
    # length of sentence
    seq_length = 20
    log_dir = './logs'
    metadata = 'metadata.tsv'
    # num of chars to generate
    gen_num = 500 

### Class tackle with the input data to get the standard input

In [3]:
# You can write this code to a py.file named datagenerator
class DataGenerator():
3
    def __init__(self, filename, config):
        
        # length of the sentence / 
        # number of words that input the network one-time
        self.seq_length = config.seq_length 
        self.batch_size = config.batch_size

        # read the data
        with open(filename, encoding='utf-8') as f:
            self.data = f.read()

        # Take every Chinese word as a word
        self.words = list(set(self.data))
        self.total_len,self.vocab_size= len(self.data),len(self.words)   
        self.words.sort()
        print ('data has %d characters, %d unique.' % (self.total_len, self.vocab_size)) 
        # word2index & index2word
        self.char2id_dict = {w: i for i, w in enumerate(self.words)}
        self.id2char_dict = {i: w for i, w in enumerate(self.words)}
        # pointer position to generate current batch
        self.p = 0
        # save metadata file
        self.save_metadata(config.metadata)

    def char2id(self, c):
        return self.char2id_dict[c]

    def id2char(self, id):
        return self.id2char_dict[id]

    # save the vocabulary with its index
    def save_metadata(self, file):
        with open(file, 'w') as f:
            f.write('id\tchar\n')
            for i in range(self.vocab_size):
                c = self.id2char(i)
                f.write('{}\t{}\n'.format(i, c))

    # Get the train data and targets for every batch
    def next_batch(self):
        input_batches = []
        target_batches = []
        for i in range(self.batch_size):
            # In case of that all the input data has been trained
            if self.p + self.seq_length + 1 >= self.total_len:
                # go from start of data
                self.p = 0
  
            inputs = [self.char2id(ch) for ch in self.data[self.p : self.p + self.seq_length]]
            targets = [self.char2id(ch) for ch in self.data[self.p + 1 : self.p + self.seq_length + 1]]
            # update pointer position
            self.p += self.seq_length  
            input_batches.append(inputs)
            target_batches.append(targets)
        return input_batches, target_batches

### Class model( )

In [4]:
class Model(object):

    def __init__(self, config, data, is_training=False):
         
        if not is_training:
            config.batch_size = 1
            config.seq_length = 1

        with tf.name_scope('inputs'):
            self.input_data = tf.placeholder(
                tf.int32, [config.batch_size, config.seq_length])
            self.target_data = tf.placeholder(
                tf.int32, [config.batch_size, config.seq_length])


        with tf.name_scope('model'):
        
            def lstm_cell():
                lstm_cell = rnn_cell.BasicLSTMCell(config.state_size)
                if is_training and config.keep_prob < 1:
                    lstm_cell = rnn_cell.DropoutWrapper(
                        lstm_cell, output_keep_prob=config.keep_prob)
                return lstm_cell
            # Attention: it says you can't use [lstm_cell] * config.num_layers
            self.cell = rnn_cell.MultiRNNCell([lstm_cell() for _ in range(config.num_layers)])
            # 构造完多层LSTM以后，使用zero_state即可对各种状态进行初始化。
            self.initial_state = self.cell.zero_state(config.batch_size, tf.float32)


            with tf.device("/cpu:0"):
                    # embedding means word embedding.
                    # Turn the input words into embedding , 
                    # so the state_size is the size of embedding
                embedding = tf.get_variable('embedding', [data.vocab_size, config.state_size])
                # 返回一个tensor，shape是(batch_size, seq_length, state_size)
                inputs = tf.nn.embedding_lookup(embedding, self.input_data)

            if is_training and config.keep_prob < 1:
                inputs = tf.nn.dropout(inputs, config.keep_prob)

            outputs, last_state = tf.nn.dynamic_rnn(
                self.cell, inputs, initial_state=self.initial_state)

            w = tf.get_variable('softmax_w', [config.state_size, data.vocab_size])
            b = tf.get_variable('softmax_b', [data.vocab_size])

        with tf.name_scope('loss'):
            # 把之前的list展开，成[batch, hidden_size*seq_length],
            # 然后 reshape, 成[batch*seq_length, hidden_size]
            output = tf.reshape(outputs, [-1, config.state_size])

            self.logits = tf.matmul(output, w) + b 
            self.probs = tf.nn.softmax(self.logits) 
            self.last_state = last_state

            # target, [batch_size, seq_length] 然后展开成一维列表
            targets = tf.reshape(self.target_data, [-1])
            # loss: shape=[batch*seq_length]
            loss = seq2seq.sequence_loss_by_example([self.logits],
                                                    [targets],
                                                    [tf.ones_like(targets, dtype=tf.float32)])
            # 计算得到平均每批batch的误差
            self.cost = tf.reduce_sum(loss) / config.batch_size
            tf.summary.scalar('loss', self.cost)

        with tf.name_scope('optimize'):
            self.lr = tf.placeholder(tf.float32, [])
            tf.summary.scalar('learning_rate', self.lr)

            # 通过tf.trainable_variables 可以得到整个模型中所有trainable=True的Variable。
            # 实际得到的tvars是一个列表，里面存有所有可以进行训练的变量。
            tvars = tf.trainable_variables()
            # tf.gradients 返回一个len(xs)的tesnor列表
            grads = tf.gradients(self.cost, tvars)
            # 梯度修剪，控制梯度爆炸
            grads, _ = tf.clip_by_global_norm(grads, config.grad_clip)
            for g in grads:
                tf.summary.histogram(g.name, g)

            optimizer = tf.train.AdamOptimizer(self.lr)
            self.train_op = optimizer.apply_gradients(zip(grads, tvars))
            self.merged_op = tf.summary.merge_all()

### Train the network

In [5]:
def train(data, model, config):
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()
        writer = tf.summary.FileWriter(config.log_dir, sess.graph)
        start_time = time.time()
        costs = 0.0
        iters = 0

        # projector for tensorboard
        Pro_con = projector.ProjectorConfig()
        embed = Pro_con.embeddings.add()
        embed.tensor_name = 'rnnlm/embedding:0'
        embed.metadata_path = config.metadata
        projector.visualize_embeddings(writer, Pro_con)

        # n_epoch means the using times of the whole input_data
        max_iter = config.n_epoch * \
            (data.total_len // config.seq_length) // config.batch_size

        for i in range(max_iter):
            learning_rate = config.learning_rate * \
                (config.decay_rate ** (i // config.decay_steps))
            x_batch, y_batch = data.next_batch()
            feed_dict = {model.input_data: x_batch,
                         model.target_data: y_batch, model.lr: learning_rate}
            train_loss, summary, _, _ = sess.run([model.cost, model.merged_op, model.last_state, model.train_op],
                                                 feed_dict)

            # ------------------------ #
            # still have some question #
            # ------------------------ #
            costs += train_loss
            iters += config.seq_length
            if i % 10 == 0:
                writer.add_summary(summary, global_step=i)
                print('Step:{}/{}, training_loss:{:4f},perplexity:{:2f},cost-time:{:2f}'\
                    .format(i,max_iter, train_loss,np.exp(train_loss / iters),(time.time() - start_time)))
     
            start_time = time.time()
            if i % 2000 == 0 or (i + 1) == max_iter:
                saver.save(sess, os.path.join(
                    config.log_dir, 'lyrics_model.ckpt'), global_step=i)

### Generate the words by network

In [6]:
def sample(data, model, args):
    saver = tf.train.Saver()
    with tf.Session() as sess:
        ckpt = tf.train.latest_checkpoint(args.log_dir)
        print(ckpt)
        saver.restore(sess, ckpt)

        # initial phrase to warm RNN
        prime = u'你要离开我知道很简单'
        state = sess.run(model.cell.zero_state(1, tf.float32))

        for word in prime[:-1]:
            x = np.zeros((1, 1))
            x[0, 0] = data.char2id(word)
            feed = {model.input_data: x, model.initial_state: state}
            state = sess.run(model.last_state, feed)

        word = prime[-1]
        lyrics = prime
        for i in range(args.gen_num):
            x = np.zeros([1, 1])
            x[0, 0] = data.char2id(word)
            feed_dict = {model.input_data: x, model.initial_state: state}
            probs, state = sess.run([model.probs, model.last_state], feed_dict)
            p = probs[0]
            word = data.id2char(np.argmax(p))
            print(word, end='')
            sys.stdout.flush()
            time.sleep(0.05)
            lyrics += word
        return lyrics

In [7]:
def main(is_training):

    config = Config()
    data = DataGenerator('JayLyrics.txt', config)
    model = Model(config, data, is_training=is_training)

    run_fn = train if is_training else sample

    run_fn(data, model,config)

In [8]:
main(1)

data has 65697 characters, 2636 unique.
INFO:tensorflow:Summary name optimize/clip_by_global_norm/optimize/clip_by_global_norm/_0:0 is illegal; using optimize/clip_by_global_norm/optimize/clip_by_global_norm/_0_0 instead.
INFO:tensorflow:Summary name optimize/clip_by_global_norm/optimize/clip_by_global_norm/_1:0 is illegal; using optimize/clip_by_global_norm/optimize/clip_by_global_norm/_1_0 instead.
INFO:tensorflow:Summary name optimize/clip_by_global_norm/optimize/clip_by_global_norm/_2:0 is illegal; using optimize/clip_by_global_norm/optimize/clip_by_global_norm/_2_0 instead.
INFO:tensorflow:Summary name optimize/clip_by_global_norm/optimize/clip_by_global_norm/_3:0 is illegal; using optimize/clip_by_global_norm/optimize/clip_by_global_norm/_3_0 instead.
INFO:tensorflow:Summary name optimize/clip_by_global_norm/optimize/clip_by_global_norm/_4:0 is illegal; using optimize/clip_by_global_norm/optimize/clip_by_global_norm/_4_0 instead.
INFO:tensorflow:Summary name optimize/clip_by_glob

KeyboardInterrupt: 