In [2]:
import sys

sys.path.append('..')
import numpy as np
from common.time_layers import *

In [12]:
class SimpleRnnlm:
    def __init__(self, vocab_size, wordvec_size, hidden_size):
        V, D, H = vocab_size, wordvec_size, hidden_size
        rn = np.random.randn

        # 初始化权重
        embed_W = (rn(V, D) / 100).astype('f')
        rnn_Wx = (rn(D, H) / np.sqrt(D)).astype('f')
        rnn_Wh = (rn(H, H) / np.sqrt(H)).astype('f')
        rnn_b = np.zeros(H).astype('f')
        affine_W = (rn(H, V) / np.sqrt(H)).astype('f')
        affine_b = np.zeros(V).astype('f')

        # 生成层
        self.layers = [
            TimeEmbedding(embed_W),
            TimeRNN(rnn_Wx, rnn_Wh, rnn_b, stateful=True),
            TimeAffine(affine_W, affine_b)
        ]

        self.loss_layer = TimeSoftmaxWithLoss()
        self.rnn_layer = self.layers[1]

        # 将所有的权重和梯度整理到列表中
        self.params, self.grads = [], []
        for layer in self.layers:
            self.params += layer.params
            self.grads += layer.grads

    def forward(self, xs, ts):
        for layer in self.layers:
            xs = layer.forward(xs)
        loss = self.loss_layer.forward(xs, ts)
        return loss

    def backward(self, dout=1):
        dout = self.loss_layer.backward(dout)
        for layer in reversed(self.layers):
            dout = layer.backward(dout)
        return dout

    def reset_state(self):
        self.rnn_layer.reset_state()

In [6]:
import sys

sys.path.append('..')
import matplotlib.pyplot as plt
import numpy as np
from common.optimizer import SGD
from dataset import ptb

In [7]:
# 设定超参数
batch_size = 10
wordvec_size = 100
hidden_size = 100 # RNN的隐藏状态向量的元素个数
time_size = 5 # Truncated BPTT的时间跨度大小
lr = 0.1
max_epoch = 100

In [8]:
# 读入训练数据（缩小了数据集）
corpus, word_to_id, id_to_word = ptb.load_data('train')
corpus_size = 1000
corpus = corpus[:corpus_size]
vocab_size = int(max(corpus) + 1)

In [9]:
xs = corpus[:-1] # 输入
ts = corpus[1:] # 输出（监督标签）
data_size = len(xs)
print('corpus size: %d, vocabulary size: %d' % (corpus_size, vocab_size))

corpus size: 1000, vocabulary size: 418


In [10]:
# 学习用的参数
max_iters = data_size // (batch_size * time_size)
time_idx = 0
total_loss = 0
loss_count = 0
ppl_list = []

In [13]:
# 生成模型
model = SimpleRnnlm(vocab_size, wordvec_size, hidden_size)
optimizer = SGD(lr)

In [14]:
from common.trainer import RnnlmTrainer

In [15]:
trainer = RnnlmTrainer(model, optimizer)

In [16]:
trainer.fit(xs, ts, max_epoch, batch_size, time_size)

| epoch 1 |  iter 1 / 19 | time 0[s] | perplexity 417.61
| epoch 2 |  iter 1 / 19 | time 0[s] | perplexity 402.39
| epoch 3 |  iter 1 / 19 | time 0[s] | perplexity 310.19
| epoch 4 |  iter 1 / 19 | time 0[s] | perplexity 234.39
| epoch 5 |  iter 1 / 19 | time 0[s] | perplexity 214.16
| epoch 6 |  iter 1 / 19 | time 0[s] | perplexity 212.12
| epoch 7 |  iter 1 / 19 | time 0[s] | perplexity 202.63
| epoch 8 |  iter 1 / 19 | time 0[s] | perplexity 201.66
| epoch 9 |  iter 1 / 19 | time 0[s] | perplexity 195.12
| epoch 10 |  iter 1 / 19 | time 0[s] | perplexity 190.62
| epoch 11 |  iter 1 / 19 | time 0[s] | perplexity 193.12
| epoch 12 |  iter 1 / 19 | time 0[s] | perplexity 188.80
| epoch 13 |  iter 1 / 19 | time 0[s] | perplexity 191.83
| epoch 14 |  iter 1 / 19 | time 0[s] | perplexity 186.91
| epoch 15 |  iter 1 / 19 | time 0[s] | perplexity 185.39
| epoch 16 |  iter 1 / 19 | time 0[s] | perplexity 189.59
| epoch 17 |  iter 1 / 19 | time 0[s] | perplexity 187.64
| epoch 18 |  iter 1 / 