In [0]:
!pip install mxnet==1.6.0b20200101
!pip install d2lzh==0.8.10

from google.colab import drive
drive.mount('/content/drive')

import d2lzh as d2l
import math
from mxnet import autograd, gluon, init, nd
from mxnet.gluon import loss as gloss, nn, rnn
import time

from mxnet import nd
import random

with open('/content/drive/My Drive/Data/Andy_Lau.txt') as f:
  for line in f:
    corpus_chars = f.read()#.decode('utf-8')

# This data set has many characters. For printing convenience, we replace line breaks with spaces
corpus_chars = corpus_chars.replace('\n', ' ').replace('\r', ' ')
#corpus_chars = corpus_chars[0:10000]

# We map each character to a continuous integer starting from 0, also known as the index, to facilitate subsequent data processing. 
# In order to get the index, we take all the different characters in the data set, and then map them one by one to the index to construct the dictionary. 
# Next, print vocab_size, which is the number of different characters in the dictionary, also known as the vocabulary size.
idx_to_char = list(set(corpus_chars))
char_to_idx = dict([(char, i) for i, char in enumerate(idx_to_char)])
vocab_size = len(char_to_idx)
corpus_indices = [char_to_idx[char] for char in corpus_chars]

In [0]:
# Initializing model parameters
# The following code initializes the model parameters. The hyperparameter num_hiddens defines the number of hidden units.

num_inputs, num_hiddens, num_outputs = vocab_size, 256, vocab_size
ctx = d2l.try_gpu()

def get_params():
    def _one(shape):
        return nd.random.normal(scale=0.01, shape=shape, ctx=ctx)

    def _three():
        return (_one((num_inputs, num_hiddens)),
                _one((num_hiddens, num_hiddens)),
                nd.zeros(num_hiddens, ctx=ctx))

    W_xi, W_hi, b_i = _three() # Input gate parameters
    W_xf, W_hf, b_f = _three() # Foget gate parameters
    W_xo, W_ho, b_o = _three() # Output gate parameters
    W_xc, W_hc, b_c = _three() # Candidate memory cell parameters
    # Output layer parameter
    W_hq = _one((num_hiddens, num_outputs))
    b_q = nd.zeros(num_outputs, ctx=ctx)
    # Attach gradient
    params = [W_xi, W_hi, b_i, W_xf, W_hf, b_f, W_xo, W_ho, b_o, W_xc, W_hc,
              b_c, W_hq, b_q]
    for param in params:
        param.attach_grad()
    return params



In [0]:
# Defining the model
# In the initialization function, the hidden state of the long-short-term memory needs to return additional memory cells whose shape (batch size, number of hidden units) is 0.

def init_lstm_state(batch_size, num_hiddens, ctx):
    return (nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx),
            nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx))

# Define the model based on the calculation expression of long--short-term memory. Only the hidden state is transmitted to the output layer, and the memory cells do not participate in the calculation of the output layer.

def lstm(inputs, state, params):
    [W_xi, W_hi, b_i, W_xf, W_hf, b_f, W_xo, W_ho, b_o, W_xc, W_hc, b_c,
     W_hq, b_q] = params
    (H, C) = state
    outputs = []
    for X in inputs:
        I = nd.sigmoid(nd.dot(X, W_xi) + nd.dot(H, W_hi) + b_i)
        F = nd.sigmoid(nd.dot(X, W_xf) + nd.dot(H, W_hf) + b_f)
        O = nd.sigmoid(nd.dot(X, W_xo) + nd.dot(H, W_ho) + b_o)
        C_tilda = nd.tanh(nd.dot(X, W_xc) + nd.dot(H, W_hc) + b_c)
        C = F * C + I * C_tilda
        H = O * C.tanh()
        Y = nd.dot(H, W_hq) + b_q
        outputs.append(Y)
    return outputs, (H, C)



In [0]:
# Train models and write lyrics
# Only use adjacent sampling when training the model. After setting the hyperparameters, we will train the model and create a 50-character piece of lyrics based on the prefixes "男人" and "女人".

num_epochs, num_steps, batch_size, lr, clipping_theta = 160, 35, 32, 1e2, 1e-2
pred_period, pred_len, prefixes = 50, 50, ['男人', '女人']

# Every 50 iterations we write a lyrics based on the currently trained model.

d2l.train_and_predict_rnn(lstm, get_params, init_lstm_state, num_hiddens,
                          vocab_size, ctx, corpus_indices, idx_to_char,
                          char_to_idx, False, num_epochs, num_steps, lr,
                          clipping_theta, batch_size, pred_period, pred_len,
                          prefixes)



epoch 50, perplexity 53.166713, time 180.38 sec
 - 男人 我不想再见你不再相信 再见你不再一生 无论无尽的生命 人生命里才变 一生一个人走 不要我想起起 时
 - 女人 我不想再想你不再再次 你不能再一起一天 一生一世都可能再爱 我想不想不想不再不可以 情情深深情恨我
epoch 100, perplexity 22.418568, time 183.35 sec
 - 男人生命中有了 我们爱意你一起过去 我愿意信你的手 我要你你要走我的要一定是无论 我要我的爱是我的爱得我
 - 女人 情若是我不必再出你的影子 我要你的梦里我不能的心间 不再爱你我不能爱你 我不能不要我的意 只因你在
epoch 150, perplexity 13.388277, time 176.50 sec
 - 男人生如像有几分 名著迷恋的女子 过去的笑容都会认只会睡我心你 不要再想我不怕爱情代不开口 我的爱你的梦
 - 女人 但你可知道地儿是不敢 如果你是天天天边 我要一个十年 我想想你的 我想你的我却不要有负  我说你不


In [0]:
# Simple implementation
# In Gluon, we can directly call the LSTM class in the rnn module.

num_epochs, num_steps, batch_size, lr, clipping_theta = 160, 35, 32, 1e2, 1e-2
pred_period, pred_len, prefixes = 50, 50, ['男人', '女人']

lstm_layer = rnn.LSTM(num_hiddens)
model = d2l.RNNModel(lstm_layer, vocab_size)
d2l.train_and_predict_rnn_gluon(model, num_hiddens, vocab_size, ctx,
                                corpus_indices, idx_to_char, char_to_idx,
                                num_epochs, num_steps, lr, clipping_theta,
                                batch_size, pred_period, pred_len, prefixes)



epoch 50, perplexity 54.856527, time 116.16 sec
 - 男人 无论爱的爱情愿永远不再 我心中有泪泪水向你的心中 一个人一生的爱你 一个家一年我一个感觉我的心中 
 - 女人 无论爱的你是谁人生谁又何了 我心中一个人 为何时候永远再会再会 再次我一点的心情 谁人的情感觉是谁
epoch 100, perplexity 22.650973, time 116.38 sec
 - 男人 但是我一对爱错 这一天真的爱意 愿意中的呼应 偏偏偏偏 情深深深爱与爱可明 如何可以往事再次 爱情
 - 女人 但我吧过你一个梦 我爱你可知道爱恨爱 爱你爱到永恒不再 爱与你不是一生一世 可以可能为何相对 不相
epoch 150, perplexity 13.332149, time 108.79 sec
 - 男人 但是我没有缺过的生活 能知道离开你会变好 我祝福时光 为你我不开心亲紧紧 活得去算已经已经不会懂 
 - 女人 但是我没有新过的生活 能知道离开你会慢慢地亲亲爱火 人心我不想走 你能习惯一个自己 不管他情人已不
