In [None]:
"""
PeekyDecoder 的初始化和上一节的 Decoder 基本上是一样的，不同之处仅在于 LSTM 层权重和 Affine 层权重的形状。因为这次的实现要接收编码器编码好的向量，所以权重参数的形状相应地变大了。
接着是 forward() 的实现。这里首先使用 np.repeat() 根据时序大小复制相应份数的 h，并将其设置为 hs。然后，将 hs 和 Embedding 层的输出用np.concatenate() 拼接，并输入 LSTM 层。同样地，Affine 层的输入也是 hs和 LSTM 层的输出的拼接。
"""

In [3]:
import sys

sys.path.append('..')
import numpy as np
import matplotlib.pyplot as plt
from dataset import sequence
from common.optimizer import Adam
from common.trainer import Trainer
from common.util import eval_seq2seq
from peeky_seq2seq import PeekySeq2seq

In [8]:
# 读入数据集
(x_train, t_train), (x_test, t_test) = sequence.load_data('addition.txt')
x_train, x_test = x_train[:, ::-1], x_test[:, ::-1]
char_to_id, id_to_char = sequence.get_vocab()

In [9]:
# 设定超参数
vocab_size = len(char_to_id)
wordvec_size = 16
hidden_size = 128
batch_size = 128
max_epoch = 25
max_grad = 5.0

In [10]:
# 生成模型/优化器/训练器
model = PeekySeq2seq(vocab_size, wordvec_size, hidden_size)
optimizer = Adam()
trainer = Trainer(model, optimizer)

In [11]:
acc_list = []
for epoch in range(max_epoch):
    trainer.fit(x_train, t_train, max_epoch=1,
                batch_size=batch_size, max_grad=max_grad)

    correct_num = 0
    for i in range(len(x_test)):
        question, correct = x_test[[i]], t_test[[i]]
        verbose = i < 10
        correct_num += eval_seq2seq(model, question, correct, id_to_char, verbose)
    acc = float(correct_num) / len(x_test)
    acc_list.append(acc)
    print('val acc %.3f%%' % (acc * 100))

| epoch 1 |  iter 1 / 351 | time 0[s] | loss 2.57
| epoch 1 |  iter 21 / 351 | time 0[s] | loss 2.48
| epoch 1 |  iter 41 / 351 | time 0[s] | loss 2.20
| epoch 1 |  iter 61 / 351 | time 1[s] | loss 1.99
| epoch 1 |  iter 81 / 351 | time 1[s] | loss 1.89
| epoch 1 |  iter 101 / 351 | time 2[s] | loss 1.82
| epoch 1 |  iter 121 / 351 | time 2[s] | loss 1.82
| epoch 1 |  iter 141 / 351 | time 3[s] | loss 1.80
| epoch 1 |  iter 161 / 351 | time 3[s] | loss 1.79
| epoch 1 |  iter 181 / 351 | time 4[s] | loss 1.78
| epoch 1 |  iter 201 / 351 | time 4[s] | loss 1.77
| epoch 1 |  iter 221 / 351 | time 4[s] | loss 1.76
| epoch 1 |  iter 241 / 351 | time 5[s] | loss 1.76
| epoch 1 |  iter 261 / 351 | time 5[s] | loss 1.75
| epoch 1 |  iter 281 / 351 | time 6[s] | loss 1.74
| epoch 1 |  iter 301 / 351 | time 6[s] | loss 1.74
| epoch 1 |  iter 321 / 351 | time 7[s] | loss 1.73
| epoch 1 |  iter 341 / 351 | time 7[s] | loss 1.73
Q   58+77
T 162 
X 100 
---
Q 461+579
T 1139
X 1013
---
Q  48+285
T 66