In [1]:
import sys

sys.path.append("..")
import numpy as np
from common.functions import softmax
from ch06.rnnlm import Rnnlm
from ch06.better_rnnlm import BetterRnnlm


class RnnlmGen(Rnnlm):
    def generate(self, start_id, skip_ids=None, sample_size=100):
        """文書生成

        Args:
            start_id (_type_): 最初に与える単語ID
            skip_ids (_type_, optional): ここで指定した単語IDはサンプリングされないようにする. Defaults to None.
            sample_size (int, optional): サンプリングする単語数. Defaults to 100.

        Returns:
            _type_: _description_
        """
        word_ids = [start_id]
        x = start_id
        while len(word_ids) < sample_size:
            x = np.array(x).reshape(1, 1)
            # 単語id xの次にくる単語を示す確率を取得する
            score = self.predict(x)
            # 確率を正規化し確率分布pにする。
            p = softmax(score.flatten())
            # 確率分布pから単語idをサンプリングする。
            sampled = np.random.choice(len(p), size=1, p=p)
            if (skip_ids is None) or (sampled not in skip_ids):
                x = sampled
                word_ids.append(int(x))

        return word_ids


In [5]:
import sys

sys.path.append("..")
from ch07.rnnlm_gen import RnnlmGen
from dataset import ptb

corpus, word_to_id, id_to_word = ptb.load_data("train")
vocab_size = len(word_to_id)
model = RnnlmGen()
model.load_params("ch06/Rnnlm.pkl")

# start文字とskip文字の設定
start_word = "you"
start_id = word_to_id[start_word]
skip_words = ["N", "<unk>", "$"]
skip_ids = [word_to_id[w] for w in skip_words]

# 文章生成
word_ids = model.generate(start_id, skip_ids)
txt = " ".join([id_to_word[w] for w in word_ids])
txt = txt.replace(" <eos>", ".\n")
print(txt)


you 'll be seen as weekend of the company.
 mr. fleischmann made a firm all linked off the proposals sharply and former president and senate of the chairman and chief executive officer thomas lawson.
 the board is investment in his daughters.
 we need political demands.
 donald e. evans the chairman of the firm 's pilots in new products is not one of his role in an money.
 we need to see that direct competition in multiples.
 its most aggressive junk banks received on its own consent development yesterday morning he believes that the chairman


In [6]:
import sys

sys.path.append("..")
from dataset import sequence

(x_train, t_train), (x_test, t_test) = sequence.load_data("addition.txt", seed=1984)
char_to_id, id_to_char = sequence.get_vocab()
print(x_train.shape, t_train.shape)
print(x_test.shape, t_test.shape)

print(x_train[0])
print(t_train[0])
print("".join([id_to_char[c] for c in x_train[0]]))
print("".join([id_to_char[c] for c in t_train[0]]))


(45000, 7) (45000, 5)
(5000, 7) (5000, 5)
[ 3  0  2  0  0 11  5]
[ 6  0 11  7  5]
71+118 
_189 


In [None]:
from turtle import forward
import numpy as np
from common.base_model import BaseModel

from common.time_layers import TimeAffine, TimeEmbedding, TimeLSTM, TimeSoftmaxWithLoss


class Encoder:
    def __init__(self, vocab_size, wordvec_size, hidden_size):
        V, D, H = vocab_size, wordvec_size, hidden_size
        rn = np.random.randn

        embed_W = (rn(V, D) / 100).astype("f")
        lstm_Wx = (rn(D, 4 * H) / np.sqrt(D)).astype("f")
        lstm_Wh = (rn(H, 4 * H) / np.sqrt(H)).astype("f")
        lstm_b = np.zeros(4 * H).astype("f")

        self.embed = TimeEmbedding(embed_W)
        self.lstm = TimeLSTM(lstm_Wx, lstm_Wh, lstm_b, stateful=False)
        self.params = self.embed.params + self.lstm.params
        self.grads = self.embed.grads + self.lstm.grads
        self.hs = None

    def forward(self, xs):
        xs = self.embed.forward(xs)
        hs = self.lstm.forward(xs)
        self.hs = hs
        return hs[:, -1, :]

    def backward(self, dh):
        dhs = np.zeros_like(self.hs)
        dhs[:, -1, :] = dh

        dout = self.lstm.backward(dhs)
        dout = self.embed.backward(dout)
        return dout


class Decoder:
    def __init__(self, vocab_size, wordvec_size, hidden_size):
        V, D, H = vocab_size, wordvec_size, hidden_size
        rn = np.random.randn

        embed_W = (rn(V, D) / 100).astype("f")
        lstm_Wx = (rn(D, 4 * H) / np.sqrt(D)).astype("f")
        lstm_Wh = (rn(H, 4 * H) / np.sqrt(H)).astype("f")
        lstm_b = np.zeros(4 * H).astype("f")
        affine_W = (rn(H, V) / np.sqrt(H)).astype("f")
        affine_b = np.zeros(V).astype("f")

        self.embed = TimeEmbedding(embed_W)
        self.lstm = TimeLSTM(lstm_Wx, lstm_Wh, lstm_b, stateful=True)
        self.affine = TimeAffine(affine_W, affine_b)

        self.params, self.grads = [], []
        for layer in (self.embed, self.lstm, self.affine):
            self.params += layer.params
            self.grads += layer.grads

    def forward(self, xs, h):
        self.lstm.set_state(h)
        out = self.embed.forward(xs)
        out = self.lstm.forward(out)
        score = self.affine.forward(out)
        return score

    def backward(self, dscore):
        dout = self.affine.backward(dscore)
        dout = self.lstm.backward(dout)
        dout = self.embed.backward(dout)
        dh = self.lstm.dh
        return dh

    def generate(self, h, start_id, sample_size):
        """_summary_

        Args:
            h (_type_): Encoderから受け取る隠れ状態
            start_id (_type_): 最初にあたえる文字ID
            sample_size (_type_): 生成する文字数

        Returns:
            _type_: _description_
        """
        sampled = []
        sample_id = start_id
        self.lstm.set_state(h)

        for _ in range(sample_size):
            x = np.array(sample_id).reshape(1, 1)
            out = self.embed.forward(x)
            out = self.lstm.forward(out)
            score = self.affine.forward(out)

            sample_id = np.argmax(score.flatten())
            sampled.append(int(sample_id))

        return sampled


class Seq2seq(BaseModel):
    def __init__(self, vocab_size, wordvec_size, hidden_size):
        V, D, H = vocab_size, wordvec_size, hidden_size
        self.encoder = Encoder(V, D, H)
        self.decoder = Decoder(V, D, H)
        self.softmax = TimeSoftmaxWithLoss()

        self.params = self.encoder.params + self.decoder.params
        self.grads = self.encoder.grads + self.decoder.grads

    def forward(self, xs, ts):
        decoder_xs, decoder_ts = ts[:, :-1], ts[:, 1:]

        h = self.decoder.forward(xs)
        score = self.decoder.forward(decoder_xs, h)
        loss=self.softmax.forward(score,decoder_ts)

        return loss

    def backward(self,dout=1):
        dout=self.softmax.backward(dout)
        dh=self.decoder.backward(dout)
        dout=self.encoder.backward(dh)
        return dout

    def generate(self, xs,start_id,sample_size):
        h=self.encodre.forward(xs)
        sampled=self.decoder.generate(h,start_id,sample_size)
        return sampled


In [7]:
import sys
sys.path.append("..")
import numpy as np
import matplotlib.pyplot as plt
from dataset import sequence
from common.optimizer import Adam
from common.trainer import Trainer
from common.util import eval_perplexity, eval_seq2seq
from ch07.seq2seq import Seq2seq
from ch07.peeky_seq2seq import PeekySeq2seq

(x_train, t_train),(x_test, t_test) = sequence.load_data("addition.txt")
char_to_id, id_to_char = sequence.get_vocab()
# ハイパーパラメータの設定
vocab_size = len(char_to_id)
wordvec_size=16
hidden_size=128
batch_size=128
max_epoch = 25
max_grad = 5.0

# モデル、オプティマイザ、トレーナーの生成
model = Seq2seq(vocab_size, wordvec_size, hidden_size)
optimizer = Adam()
trainer = Trainer(model, optimizer)

acc_list=[]
for epoch in range(max_epoch):
    trainer.fit(x_train, t_train,max_epoch=1,batch_size=batch_size, max_grad=max_grad)

    correct_num=0
    for i in range(len(x_test)):
        question,correct=x_test[[i]],t_test[[i]]
        verbose=i<10
        correct_num+=eval_seq2seq(model,question,correct, id_to_char,verbose)
    acc=float(correct_num)/len(x_test)
    acc_list.append(acc)
    print("val acc %.3f%%" % (acc*100))

| epoch 1 |  iter 1 / 351 | time 0[s] | loss 2.56
| epoch 1 |  iter 21 / 351 | time 0[s] | loss 2.53
| epoch 1 |  iter 41 / 351 | time 1[s] | loss 2.17
| epoch 1 |  iter 61 / 351 | time 2[s] | loss 1.96
| epoch 1 |  iter 81 / 351 | time 3[s] | loss 1.92
| epoch 1 |  iter 101 / 351 | time 4[s] | loss 1.87
| epoch 1 |  iter 121 / 351 | time 4[s] | loss 1.85
| epoch 1 |  iter 141 / 351 | time 5[s] | loss 1.83
| epoch 1 |  iter 161 / 351 | time 6[s] | loss 1.79
| epoch 1 |  iter 181 / 351 | time 7[s] | loss 1.77
| epoch 1 |  iter 201 / 351 | time 8[s] | loss 1.77
| epoch 1 |  iter 221 / 351 | time 9[s] | loss 1.76
| epoch 1 |  iter 241 / 351 | time 9[s] | loss 1.76
| epoch 1 |  iter 261 / 351 | time 10[s] | loss 1.76
| epoch 1 |  iter 281 / 351 | time 11[s] | loss 1.75
| epoch 1 |  iter 301 / 351 | time 12[s] | loss 1.74
| epoch 1 |  iter 321 / 351 | time 13[s] | loss 1.75
| epoch 1 |  iter 341 / 351 | time 14[s] | loss 1.74
Q 77+85  
T 162 
[91m☒[0m 100 
---
Q 975+164
T 1139
[91m☒[0m 

KeyboardInterrupt: 