In [0]:
!pip install mxnet==1.6.0b20200101
!pip install d2lzh==0.8.10

from google.colab import drive
drive.mount('/content/drive')

In [0]:
import d2lzh as d2l
import math
from mxnet import autograd, gluon, init, nd
from mxnet.gluon import loss as gloss, nn, rnn
import time

from mxnet import nd
import random

with open('/content/drive/My Drive/Data/Andy_Lau.txt') as f:
  for line in f:
    corpus_chars = f.read()#.decode('utf-8')

# This data set has many characters. For printing convenience, we replace line breaks with spaces
corpus_chars = corpus_chars.replace('\n', ' ').replace('\r', ' ')
#corpus_chars = corpus_chars[0:10000]

# We map each character to a continuous integer starting from 0, also known as the index, to facilitate subsequent data processing. 
# In order to get the index, we take all the different characters in the data set, and then map them one by one to the index to construct the dictionary. 
# Next, print vocab_size, which is the number of different characters in the dictionary, also known as the vocabulary size.
idx_to_char = list(set(corpus_chars))
char_to_idx = dict([(char, i) for i, char in enumerate(idx_to_char)])
vocab_size = len(char_to_idx)
corpus_indices = [char_to_idx[char] for char in corpus_chars]

vocab_size

2783

In [0]:
# Implementing from scratch
# Initializing model parameters
# The following code initializes the model parameters. The hyperparameter num_hiddens defines the number of hidden units.

num_inputs, num_hiddens, num_outputs = vocab_size, 256, vocab_size
ctx = d2l.try_gpu()

def get_params():
    def _one(shape):
        return nd.random.normal(scale=0.01, shape=shape, ctx=ctx)

    def _three():
        return (_one((num_inputs, num_hiddens)),
                _one((num_hiddens, num_hiddens)),
                nd.zeros(num_hiddens, ctx=ctx))

    W_xz, W_hz, b_z = _three()  # Update gate parameters
    W_xr, W_hr, b_r = _three()  # Reset gate parameters
    W_xh, W_hh, b_h = _three()  # Candidate hidden state parameters
    # Output layer parameters
    W_hq = _one((num_hiddens, num_outputs))
    b_q = nd.zeros(num_outputs, ctx=ctx)
    # Attach gradient
    params = [W_xz, W_hz, b_z, W_xr, W_hr, b_r, W_xh, W_hh, b_h, W_hq, b_q]
    for param in params:
        param.attach_grad()
    return params

In [0]:
# Defining the model
# The following code defines the hidden state initialization function init_gru_state. It returns a tuple consisting of an NDArray with a shape (batch size, number of hidden units) of 0.

def init_gru_state(batch_size, num_hiddens, ctx):
    return (nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx), )

# Define the model according to the calculation expression of the GRU.

def gru(inputs, state, params):
    W_xz, W_hz, b_z, W_xr, W_hr, b_r, W_xh, W_hh, b_h, W_hq, b_q = params
    H, = state
    outputs = []
    for X in inputs:
        Z = nd.sigmoid(nd.dot(X, W_xz) + nd.dot(H, W_hz) + b_z)
        R = nd.sigmoid(nd.dot(X, W_xr) + nd.dot(H, W_hr) + b_r)
        H_tilda = nd.tanh(nd.dot(X, W_xh) + nd.dot(R * H, W_hh) + b_h)
        H = Z * H + (1 - Z) * H_tilda
        Y = nd.dot(H, W_hq) + b_q
        outputs.append(Y)
    return outputs, (H,)



In [0]:
# Train models and write lyrics
# Only use adjacent sampling when training the model. After setting the hyperparameters, we will train the model and create a 50-character piece of lyrics based on the prefixes "男人" and "女人".

num_epochs, num_steps, batch_size, lr, clipping_theta = 160, 35, 32, 1e2, 1e-2
pred_period, pred_len, prefixes = 50, 50, ['男人', '女人']

# Every 50 iterations we write a lyrics based on the currently trained model.

d2l.train_and_predict_rnn(gru, get_params, init_gru_state, num_hiddens,
                          vocab_size, ctx, corpus_indices, idx_to_char,
                          char_to_idx, False, num_epochs, num_steps, lr,
                          clipping_theta, batch_size, pred_period, pred_len,
                          prefixes)

epoch 50, perplexity 22.993363, time 173.90 sec
 - 男人 就算是再见你不可再相恋 让我可以相爱过才是你 你的眼神就是我的家 有你的爱我的那一天你一生 也许不
 - 女人 就算是再见你不可再相恋 让我可以相爱过才是你 你的眼神就是我的家 有你的爱我的那一天你一生 也许不
epoch 100, perplexity 8.790387, time 168.05 sec
 - 男人 就算是分开不要得入爱 若要放弃了前途 难道出生的世界 我也会愿意在你地方的和我一个人的  在这里自
 - 女人 就算是分开不要得入爱 若要放弃了前途 难道出生的世界 我也会愿意在你地方的和我一个人的  在这里自
epoch 150, perplexity 5.255374, time 170.49 sec
 - 男人 但是你在这里 间空间一丝的美布 爱情在爱伤害了我愿去一点 如果你有天使怨怨会不敢 终于失去你从未爱
 - 女人 就算习惯一个人动人间 缘份就算再清楚 我会用去再可看看见已很想 现在我信爱已失踪 人不情爱你又会想


In [0]:
# Simple implementation
# In Gluon, we can directly call the GRU class in the rnn module.

num_epochs, num_steps, batch_size, lr, clipping_theta = 160, 35, 32, 1e2, 1e-2
pred_period, pred_len, prefixes = 50, 50, ['男人', '女人']

gru_layer = rnn.GRU(num_hiddens)
model = d2l.RNNModel(gru_layer, vocab_size)
d2l.train_and_predict_rnn_gluon(model, num_hiddens, vocab_size, ctx,
                                corpus_indices, idx_to_char, char_to_idx,
                                num_epochs, num_steps, lr, clipping_theta,
                                batch_size, pred_period, pred_len, prefixes)



epoch 50, perplexity 21.197166, time 96.85 sec
 - 男人生的 不会再一起一份 再会看一世间到的心 到底可到尽头 我有你的人 我会想番起 我会想番起 我会想番
 - 女人 我会想番起我由以前的环境转 到底怎么可以做错 我的心不可以像风雨 吹过千个人不可以 相信我爱情吧 
epoch 100, perplexity 7.914827, time 96.94 sec
 - 男人生 再见你一次我也不会想你 只要你在我的心里面看你的风里 说我都已在何地等待 我只想说不会的演 会慢
 - 女人 我会想番起我由以前嘅环境转变到而家 全部是无所谓 最爱你的人是我一个中国人 我也只能夜夜的望穿 那
epoch 150, perplexity 4.773199, time 96.75 sec
 - 男人哭吧 怎么可以给我女人 就算他看到半天才到在天边 让我可拥抱的火一 心想起争气还像在雪间 长夜多浪漫
 - 女人 我会想番起 我会想番起我由以前嘅环境转变到而家 这一生是我吗为何必再遇上 看著一切都不必会有人 可
