In [None]:
import numpy as np

p=[0.3,0.6,0.1]
np.random.choice(3, size=10, p=p)

In [4]:
import sys
sys.path.append('..')
import numpy as np
from common.functions import softmax
from rnnlm import Rnnlm
from better_rnnlm import BetterRnnlm

In [20]:
class RnnlmGen(BetterRnnlm):
    def generate(self, start_id, skip_ids=None, sample_size=100):
        word_ids = [start_id]
        
        x = start_id
        while len(word_ids) < sample_size:
            x = np.array(x).reshape(1,1)
            score = self.predict(x)
            p = softmax(score.flatten())
            
            sampled = np.random.choice(len(p), size=1, p=p)
            if (skip_ids is None) or (sampled not in  skip_ids):
                x = sampled
                word_ids.append(int(x))
            
        return word_ids
        

In [21]:
from dataset import ptb

corpus, word_to_id, id_to_word = ptb.load_data('train')
vocab_size = len(word_to_id)
corpus_size = len(corpus)

model = RnnlmGen()
# model.load_params('Rnnlm.pkl')
model.load_params('BetterRnnlm.pkl')

# start文字とskip文字の設定
start_word = 'you'
start_id = word_to_id[start_word]
skip_words = ['N', '<unk>', '$']
skip_ids = [word_to_id[w] for w in skip_words]

# 文章生成
word_ids = model.generate(start_id, skip_ids)
txt = ' '.join([id_to_word[i] for i in word_ids])
txt = txt.replace('<eos>', '.\n')
print(txt)

you get into it .
 in most cases the best proposals of the government helps be built down the work .
 tw systems plc and studying its accumulation philip morris and lin will continue to make ways to gain its confidence of life insurance patents .
 most of the mirage 's stock they must develop may be in the midst of an effort to submit sea containers ' calculations at least several other companies have that putting them beyond the publicly held .
 option officials maintain the company will reward another said but there is no question that the depress


In [22]:
model.reset_state()

start_words = 'the meaning of life is'
start_ids = [word_to_id[w] for w in start_words.split(' ')]
print(start_ids)

for x in start_ids[:-1]:
    x = np.array(x).reshape(1, 1)
    model.predict(x)

word_ids = model.generate(start_ids[-1], skip_ids)
word_ids = start_ids[:-1] + word_ids
txt = ' '.join([id_to_word[i] for i in word_ids])
txt = txt.replace(' <eos>', '.\n')
print('-' * 50)
print(txt)


[32, 4748, 42, 2262, 40]
--------------------------------------------------
the meaning of life is the chamber of commerce traders.
 contemplated by congressional leaders and several officials of the senate leaders last week that mr. gorbachev had an open choice is keeping the ideas.
 the vote from the difficulties forced to hits on u.s. anti-abortion forces not with the new positions.
 mr. dinkins is now a political administration of the u.s..
 we 're raising his benefits when rates go after republican negotiators.
 carl a deputy pilot leader in the middle east told the republican military regulation group that opposes him for drama and minpeco have a very marginal latin


In [23]:
start_ids[:-1]

[32, 4748, 42, 2262]

In [25]:
from dataset import sequence

(x_train, t_train), (x_test, t_test) = sequence.load_data('addition.txt', seed=1984)
cahr_to_id, id_to_char = sequence.get_vocab()
print(x_train.shape, t_train.shape)
print(x_test.shape, t_test.shape)

print(x_train[0])
print(t_train[0])

print(''.join([id_to_char[c] for c in x_train[0]]))
print(''.join([id_to_char[c] for c in t_train[0]])

(45000, 7) (45000, 5)
(5000, 7) (5000, 5)
[ 3  0  2  0  0 11  5]
[ 6  0 11  7  5]
71+118 
_189 


In [38]:
from common.time_layers import *

class Encoder:
    def __init__(self, vocab_size, wordvec_size, hidden_size):
        V, D, H = vocab_size, wordvec_size, hidden_size
        rn = np.random.randn
        
        embed_w = (rn(V, D) / 100 ).astype('f')
        lstm_wx = (rn(D, 4 * H) / np.sqrt(D)).astype('f')
        lstm_wh = (rn(H, 4 * H) / np.sqrt(H)).astype('f')
        lstm_b = np.zeros(4 * H).astype('f')
        
        self.embed = TimeEmbedding(embed_w)
        self.lstm = TimeLSTM(lstm_wx, lstm_wh, lstm_b, stateful=False)
        
        self.params = self.embed.params + self.lstm.params
        self.grads = self.embed.grads + self.lstm.grads
        self.hs = None
        
    def forward(self, xs):
        xs = self.embed.forward(xs)
        hs = self.lstm.forward(xs)
        self.hs = hs
        return hs[:, -1, :]
    
    def backward(self, dh):
        dhs = np.zeros_like(self.hs)
        dhs[:, -1, :] = dh
        
        dout = self.lstm.backward(dhs)
        dout = self.embed.backward(dout)
        return dout
   

In [63]:
class Decoder:
    def __init__(self, vocab_size, wordvec_size, hidden_size):
        V, D, H = vocab_size, wordvec_size, hidden_size
        rn = np.random.randn
        
        embed_w = (rn(V, D) / 100 ).astype('f')
        lstm_wx = (rn(D, 4 * H) / np.sqrt(D)).astype('f')
        lstm_wh = (rn(H, 4 * H) / np.sqrt(H)).astype('f')
        lstm_b = np.zeros(4 * H).astype('f')
        affine_w = (rn(H, V) / np.sqrt(H)).astype('f')
        affine_b = np.zeros(V).astype('f')
        
        self.embed = TimeEmbedding(embed_w)
        self.lstm = TimeLSTM(lstm_wx, lstm_wh, lstm_b, stateful=True)
        self.affine = TimeAffine(affine_w, affine_b)
        
        self.params = []
        self.grads = []
        for layer in (self.embed, self.lstm, self.affine):
            self.params += layer.params
            self.grads += layer.grads
        
    def forward(self, xs, h):
        self.lstm.set_state(h)
        
        out = self.embed.forward(xs)
        out = self.lstm.forward(out)
        score = self.affine.forward(out)
        return score
    
    def backward(self, dscore):
        dout = self.affine.backward(dscore)
        dout = self.lstm.backward(dout)
        dout = self.embed.backward(dout)
        dh = self.lstm.dh
        return dh
    
    def generate(self, h, start_id, sample_size):
        sampled = []
        sample_id = start_id
        self.lstm.set_state(h)
        
        for _ in range(sample_size):
            x = np.array(sample_id).reshape((1,1))
            out = self.embed.forward(x)
            out = self.lstm.forward(out)
            score = self.affine.forward(out)
            
            sample_id = np.argmax(score.flatten())
            sampled.append(int(sample_id))
            
        return sampled

In [64]:
a = np.arange(12).reshape(2,3,2)
print(a)
print(a.flatten())

[[[ 0  1]
  [ 2  3]
  [ 4  5]]

 [[ 6  7]
  [ 8  9]
  [10 11]]]
[ 0  1  2  3  4  5  6  7  8  9 10 11]


In [65]:
from common.base_model  import BaseModel

class Seq2seq(BaseModel):
    def __init__(self, vocab_size, wordvec_size, hidden_size):
        V, D, H = vocab_size, wordvec_size, hidden_size
        self.encoder = Encoder(V, D, H)
        self.decoder = Decoder(V, D, H)
        self.softmax = TimeSoftmaxWithLoss()
        
        self.params = self.encoder.params + self.decoder.params
        self.grads = self.encoder.grads + self.decoder.grads
        
    def forward(self, xs, ts):
        decoder_xs, decoder_ts = ts[:, :-1], ts[:, 1:]
        
        h = self.encoder.forward(xs)
        score = self.decoder.forward(decoder_xs, h)
        loss = self.softmax.forward(score, decoder_ts)
        return loss
    
    def backward(self, dout=1):
        dout = self.softmax.backward(dout)
        dh = self.decoder.backward(dout)
        dout = self.encoder.backward(dh)
        return dout
    
    def generate(self, xs, start_id, sample_size):
        h = self.encoder.forward(xs)
        sampled = self.decoder.generate(h, start_id, sample_size)
        return sampled
    

In [68]:
import matplotlib.pyplot as plt
from common.optimizer import Adam
from common.trainer import Trainer
from common.util import eval_seq2seq
# from seq2seq import Seq2seq

(x_train, t_train), (x_test, t_test) = sequence.load_data('addition.txt')
# inputの反転
x_train, x_test = x_train[:, ::-1], x_test[:, ::-1]
char_to_id, id_to_char = sequence.get_vocab()

# ハイパーパラメーターの設定
vocab_size = len(char_to_id)
wordvec_size = 16
hidden_size = 128
batch_size = 128
max_epoch = 25
max_grad = 5.0

#  モデル, オプティマイザ, トレーナーの生成
model = Seq2seq(vocab_size, wordvec_size, hidden_size)
optimizer = Adam()
trainer = Trainer(model, optimizer)

acc_list = []
for epoch in range(max_epoch):
    trainer.fit(x_train, t_train, max_epoch=1, batch_size=batch_size, max_grad=max_grad)
    
    correct_num = 0
    for i in range(len(x_test)):
        question, correct = x_test[[i]], t_test[[i]]
        verbose = i < 10
        correct_num += eval_seq2seq(model, question, correct, id_to_char, verbose)
    
    acc = float(correct_num) / len(x_test)
    acc_list.append(acc)
    print('val acc {}'.format(acc * 100))

| epoch 1 |  iter 1 / 351 | time 0[s] | loss 2.56
| epoch 1 |  iter 21 / 351 | time 0[s] | loss 2.52
| epoch 1 |  iter 41 / 351 | time 1[s] | loss 2.17
| epoch 1 |  iter 61 / 351 | time 1[s] | loss 1.96
| epoch 1 |  iter 81 / 351 | time 2[s] | loss 1.91
| epoch 1 |  iter 101 / 351 | time 2[s] | loss 1.87
| epoch 1 |  iter 121 / 351 | time 2[s] | loss 1.86
| epoch 1 |  iter 141 / 351 | time 3[s] | loss 1.84
| epoch 1 |  iter 161 / 351 | time 3[s] | loss 1.80
| epoch 1 |  iter 181 / 351 | time 4[s] | loss 1.78
| epoch 1 |  iter 201 / 351 | time 4[s] | loss 1.77
| epoch 1 |  iter 221 / 351 | time 5[s] | loss 1.77
| epoch 1 |  iter 241 / 351 | time 5[s] | loss 1.76
| epoch 1 |  iter 261 / 351 | time 6[s] | loss 1.75
| epoch 1 |  iter 281 / 351 | time 6[s] | loss 1.74
| epoch 1 |  iter 301 / 351 | time 7[s] | loss 1.74
| epoch 1 |  iter 321 / 351 | time 7[s] | loss 1.74
| epoch 1 |  iter 341 / 351 | time 8[s] | loss 1.73
Q   58+77
T 162 
[91m☒[0m 100 
---
Q 461+579
T 1139
[91m☒[0m 1000


| epoch 7 |  iter 121 / 351 | time 2[s] | loss 0.67
| epoch 7 |  iter 141 / 351 | time 3[s] | loss 0.67
| epoch 7 |  iter 161 / 351 | time 3[s] | loss 0.67
| epoch 7 |  iter 181 / 351 | time 3[s] | loss 0.66
| epoch 7 |  iter 201 / 351 | time 4[s] | loss 0.66
| epoch 7 |  iter 221 / 351 | time 4[s] | loss 0.66
| epoch 7 |  iter 241 / 351 | time 5[s] | loss 0.64
| epoch 7 |  iter 261 / 351 | time 5[s] | loss 0.65
| epoch 7 |  iter 281 / 351 | time 6[s] | loss 0.64
| epoch 7 |  iter 301 / 351 | time 6[s] | loss 0.63
| epoch 7 |  iter 321 / 351 | time 7[s] | loss 0.63
| epoch 7 |  iter 341 / 351 | time 7[s] | loss 0.62
Q   58+77
T 162 
[92m☑[0m 162 
---
Q 461+579
T 1139
[91m☒[0m 1142
---
Q  48+285
T 666 
[92m☑[0m 666 
---
Q   551+8
T 163 
[91m☒[0m 162 
---
Q  55+763
T 422 
[92m☑[0m 422 
---
Q 752+006
T 857 
[91m☒[0m 859 
---
Q 292+167
T 1053
[91m☒[0m 1144
---
Q 795+038
T 1427
[91m☒[0m 1431
---
Q  838+62
T 864 
[91m☒[0m 866 
---
Q  39+341
T 236 
[91m☒[0m 239 
---
val ac

| epoch 13 |  iter 221 / 351 | time 4[s] | loss 0.43
| epoch 13 |  iter 241 / 351 | time 5[s] | loss 0.43
| epoch 13 |  iter 261 / 351 | time 5[s] | loss 0.41
| epoch 13 |  iter 281 / 351 | time 6[s] | loss 0.42
| epoch 13 |  iter 301 / 351 | time 6[s] | loss 0.41
| epoch 13 |  iter 321 / 351 | time 6[s] | loss 0.43
| epoch 13 |  iter 341 / 351 | time 7[s] | loss 0.40
Q   58+77
T 162 
[92m☑[0m 162 
---
Q 461+579
T 1139
[91m☒[0m 1140
---
Q  48+285
T 666 
[92m☑[0m 666 
---
Q   551+8
T 163 
[91m☒[0m 162 
---
Q  55+763
T 422 
[91m☒[0m 424 
---
Q 752+006
T 857 
[91m☒[0m 856 
---
Q 292+167
T 1053
[91m☒[0m 1054
---
Q 795+038
T 1427
[91m☒[0m 1429
---
Q  838+62
T 864 
[92m☑[0m 864 
---
Q  39+341
T 236 
[91m☒[0m 239 
---
val acc 39.42
| epoch 14 |  iter 1 / 351 | time 0[s] | loss 0.41
| epoch 14 |  iter 21 / 351 | time 0[s] | loss 0.41
| epoch 14 |  iter 41 / 351 | time 0[s] | loss 0.41
| epoch 14 |  iter 61 / 351 | time 1[s] | loss 0.40
| epoch 14 |  iter 81 / 351 | time 2[s

| epoch 19 |  iter 301 / 351 | time 5[s] | loss 0.32
| epoch 19 |  iter 321 / 351 | time 6[s] | loss 0.33
| epoch 19 |  iter 341 / 351 | time 6[s] | loss 0.33
Q   58+77
T 162 
[91m☒[0m 161 
---
Q 461+579
T 1139
[91m☒[0m 1140
---
Q  48+285
T 666 
[92m☑[0m 666 
---
Q   551+8
T 163 
[92m☑[0m 163 
---
Q  55+763
T 422 
[92m☑[0m 422 
---
Q 752+006
T 857 
[91m☒[0m 858 
---
Q 292+167
T 1053
[91m☒[0m 1052
---
Q 795+038
T 1427
[91m☒[0m 1430
---
Q  838+62
T 864 
[92m☑[0m 864 
---
Q  39+341
T 236 
[91m☒[0m 238 
---
val acc 47.52
| epoch 20 |  iter 1 / 351 | time 0[s] | loss 0.32
| epoch 20 |  iter 21 / 351 | time 0[s] | loss 0.35
| epoch 20 |  iter 41 / 351 | time 0[s] | loss 0.33
| epoch 20 |  iter 61 / 351 | time 1[s] | loss 0.33
| epoch 20 |  iter 81 / 351 | time 1[s] | loss 0.31
| epoch 20 |  iter 101 / 351 | time 1[s] | loss 0.31
| epoch 20 |  iter 121 / 351 | time 2[s] | loss 0.33
| epoch 20 |  iter 141 / 351 | time 2[s] | loss 0.32
| epoch 20 |  iter 161 / 351 | time 3[s

val acc 54.36
