In [0]:
!pip install mxnet==1.6.0b20200101
!pip install d2lzh==0.8.10

import d2lzh as d2l
import math
from mxnet import autograd, gluon, init, nd
from mxnet.gluon import loss as gloss, nn, rnn
import time


In [0]:
from google.colab import drive
drive.mount('/content/drive')

from mxnet import nd
import random

with open('/content/drive/My Drive/Data/Andy_Lau.txt') as f:
  for line in f:
    corpus_chars = f.read()#.decode('utf-8')

# This data set has many characters. For printing convenience, we replace line breaks with spaces
corpus_chars = corpus_chars.replace('\n', ' ').replace('\r', ' ')
# corpus_chars = corpus_chars[0:10000]

# We map each character to a continuous integer starting from 0, also known as the index, to facilitate subsequent data processing. 
# In order to get the index, we take all the different characters in the data set, and then map them one by one to the index to construct the dictionary. 
# Next, print vocab_size, which is the number of different characters in the dictionary, also known as the vocabulary size.
idx_to_char = list(set(corpus_chars))
char_to_idx = dict([(char, i) for i, char in enumerate(idx_to_char)])
vocab_size = len(char_to_idx)
corpus_indices = [char_to_idx[char] for char in corpus_chars]

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
# Defining the model
# Gluon's rnn module provides an implementation of a recurrent neural network. Below, construct a recurrent neural network layer rnn_layer with a single hidden layer and 256 hidden units, and initialize the weights.

num_hiddens = 256
rnn_layer = rnn.RNN(num_hiddens)
rnn_layer.initialize()

# Next call the member function begin_state of rnn_layer to return the list of initialized hidden states. It has an element with the shape (number of hidden layers, batch size, number of hidden units).

batch_size = 2
state = rnn_layer.begin_state(batch_size=batch_size)
state[0].shape

# The input shape of rnn_layer is (number of time steps, batch size, number of inputs). The number of inputs is the one-hot vector length (vocabulary size). In addition, rnn_layer, as the rnn.RNN instance of Gluon, will return the output and hidden state respectively after the forward calculation. 
# The output refers to the hidden state calculated and output by the hidden layer at each step. They are usually used as the input to the following output layer. 

num_steps = 35
X = nd.random.uniform(shape=(num_steps, batch_size, vocab_size))
Y, state_new = rnn_layer(X, state)
Y.shape, len(state_new), state_new[0].shape

((35, 2, 256), 1, (1, 2, 256))

In [0]:
# Next it inherits the Block class to define a complete recurrent neural network. It first takes the input data, uses a one-hot vector representation, then inputs it into rnn_layer, and then uses the fully connected output layer to get the output. 
# The number of outputs is equal to the vocab_size.

class RNNModel(nn.Block):
    def __init__(self, rnn_layer, vocab_size, **kwargs):
        super(RNNModel, self).__init__(**kwargs)
        self.rnn = rnn_layer
        self.vocab_size = vocab_size
        self.dense = nn.Dense(vocab_size)

    def forward(self, inputs, state):
        # Get the one-hot vector representation after transposing the input to (num_steps, batch_size)
        X = nd.one_hot(inputs.T, self.vocab_size)
        Y, state = self.rnn(X, state)

        # The fully connected layer will first change the shape of Y to (num_steps * batch_size, num_hiddens), and its output shape will be (num_steps * batch_size, vocab_size)
        output = self.dense(Y.reshape((-1, Y.shape[-1])))
        return output, state

    def begin_state(self, *args, **kwargs):
        return self.rnn.begin_state(*args, **kwargs)



In [0]:
# Training the Model
# A prediction function is defined below. The difference here is the function interface for forward computation and initialization of hidden states.

def predict_rnn_gluon(prefix, num_chars, model, vocab_size, ctx, idx_to_char,
                      char_to_idx):
    # Use model member functions to initialize the hidden state
    state = model.begin_state(batch_size=1, ctx=ctx)
    output = [char_to_idx[prefix[0]]]
    for t in range(num_chars + len(prefix) - 1):
        X = nd.array([output[-1]], ctx=ctx).reshape((1, 1))
        (Y, state) = model(X, state)
        # Forward calculation does not need to pass in model parameters
        if t < len(prefix) - 1:
            output.append(char_to_idx[prefix[t + 1]])
        else:
            output.append(int(Y.argmax(axis=1).asscalar()))
    return ''.join([idx_to_char[i] for i in output])

# Predict using a model with random weights.

ctx = d2l.try_gpu()
model = RNNModel(rnn_layer, vocab_size)
model.initialize(force_reinit=True, ctx=ctx)
predict_rnn_gluon('男人', 10, model, vocab_size, ctx, idx_to_char, char_to_idx)

'男人褓劳墙净颇液液液液液'

In [0]:
# Implement the training function. Here only adjacent samples are used to read the data.

def train_and_predict_rnn_gluon(model, num_hiddens, vocab_size, ctx,
                                corpus_indices, idx_to_char, char_to_idx,
                                num_epochs, num_steps, lr, clipping_theta,
                                batch_size, pred_period, pred_len, prefixes):
    loss = gloss.SoftmaxCrossEntropyLoss()
    model.initialize(ctx=ctx, force_reinit=True, init=init.Normal(0.01))
    trainer = gluon.Trainer(model.collect_params(), 'sgd',
                            {'learning_rate': lr, 'momentum': 0, 'wd': 0})

    for epoch in range(num_epochs):
        l_sum, n, start = 0.0, 0, time.time()
        data_iter = d2l.data_iter_consecutive(
            corpus_indices, batch_size, num_steps, ctx)
        state = model.begin_state(batch_size=batch_size, ctx=ctx)
        for X, Y in data_iter:
            for s in state:
                s.detach()
            with autograd.record():
                (output, state) = model(X, state)
                y = Y.T.reshape((-1,))
                l = loss(output, y).mean()
            l.backward()
            # Clip gradient
            params = [p.data() for p in model.collect_params().values()]
            d2l.grad_clipping(params, clipping_theta, ctx)
            trainer.step(1)
            # Because the error has been averaged, the gradient need not be averaged
            l_sum += l.asscalar() * y.size
            n += y.size

        if (epoch + 1) % pred_period == 0:
            print('epoch %d, perplexity %f, time %.2f sec' % (
                epoch + 1, math.exp(l_sum / n), time.time() - start))
            for prefix in prefixes:
                print(' -', predict_rnn_gluon(
                    prefix, pred_len, model, vocab_size, ctx, idx_to_char,
                    char_to_idx))

In [0]:
# Train the model

num_epochs, batch_size, lr, clipping_theta = 250, 32, 1e2, 1e-2
pred_period, pred_len, prefixes = 50, 50, ['男人', '女人']
train_and_predict_rnn_gluon(model, num_hiddens, vocab_size, ctx, 
                            corpus_indices, idx_to_char, char_to_idx,
                            num_epochs, num_steps, lr, clipping_theta,
                            batch_size, pred_period, pred_len, prefixes)

epoch 50, perplexity 65.720982, time 48.47 sec
 - 男人 我是你的梦里 我不要再不想要再一次 我是我的温柔 你是我的温柔 你的笑你 我的心不是你的口我是我的
 - 女人 我是我最好的朋友 一天真的爱走的理 我的心不是你的口 我的家不是你的口 我的眼不是你的泪 我的人不
epoch 100, perplexity 36.926451, time 49.37 sec
 - 男人 我要你的心里 爱你的爱意 我的眼神 你的眼光在我的温柔 我的眼光是你的路 我的眼睛是你的泪 我知道
 - 女人 就像是我们也是不是有太多歉 我要求你愿我一切 一对如一天走一次 只有爱的苦口 我们都在这方情义无可
epoch 150, perplexity 26.287890, time 48.85 sec
 - 男人 谁会懂得我太多我的心已经在人 你是我的女人 我说的心都跟你 不知道为了错的爱情 情深深刻 这一生都
 - 女人 谁又怕的心 没有再会再独留过 我们都在感慨中等候 我们都在感慨之中 我是我的心中 我的心也有苦苦嘘
epoch 200, perplexity 20.522814, time 49.34 sec
 - 男人 谁会懂得不相爱 我会愿意一样 我要是我的心情 我的眼神么的眼风 我有天空开不可惜 我会想番起我由以
 - 女人 如何能相逢 你是我的女人 我只是同行的眼光 我祝满家的心间的雾一样 我想你的人 我会让海会给我一点
epoch 250, perplexity 17.180056, time 52.79 sec
 - 男人 不要人扶留在你的梦里 我看著你的身影 我要你的爱情 我的爱你的收 我不必孤单 我的心不是你的路我的
 - 女人 无法得到的天长 我们都在梦中解脱清醒的神吻 我要你的对方我的心 我们都在心中 一个归去 我会想番起
