In [1]:
import sys

In [2]:
sys.path.append('..')
import numpy as np
import matplotlib.pyplot as plt

In [3]:
from dataset import ptb
from common.optimizer import SGD
from common.time_layers import *

In [4]:
class SimpleRnnlm:
    def __init__(self,vocab_size, wordvec_size, hidden_size):
        V,D,H = vocab_size, wordvec_size, hidden_size
        rn = np.random.randn
        
        embed_W = (rn(V,D) / 100).astype('f')
        rnn_Wx = (rn(D,H) / np.sqrt(D)).astype('f')
        rnn_Wh = (rn(H,H) / np.sqrt(H)).astype('f')
        rnn_b = np.zeros(H).astype('f')
        affine_W = (rn(H,V) / np.sqrt(H)).astype('f')
        affine_b = np.zeros(V).astype('f')
        
        self.layers = [
            TimeEmbedding(embed_W),
            TimeRNN(rnn_Wx,rnn_Wh,rnn_b,stateful=True),
            TimeAffine(affine_W,affine_b)
        ]
        self.loss_layer = TimeSoftmaxWithLoss()
        self.rnn_layer = self.layers[1]
        
        self.params,self.grads = [],[]
        for layer in self.layers:
            self.params += layer.params
            self.grads += layer.grads
        
    def forward(self,xs,ts):
        for layer in self.layers:
            xs = layer.forward(xs)
        loss = self.loss_layer.forward(xs,ts)
        return loss
        
    def backward(self,dout=1):
        dout = self.loss_layer.backward(dout)
        for layer in reversed(self.layers):
            dout = layer.backward(dout)
        return dout
        
    def reset_state(self):
        self.rnn_layer.reset_state()

In [1]:
# batch_size = 10
# wordvec_size = 100
# hidden_size = 100
# time_size = 5
# lr = 0.01
# max_epoch = 100

# corpus, word_to_id, id_to_word = ptb.load_data('train')
# corpus_size = 1000
# corpus = corpus[:corpus_size]
# vocab_size = int(max(corpus) + 1)

# xs = corpus[:-1]
# ts = corpus[1:]
# data_size = len(xs)
# print('corpus size: %d, vocabulary size: %d' % (corpus_size, vocab_size))

# max_iters = data_size // (batch_size * time_size)
# time_idx = 0
# total_loss = 0
# loss_count = 0
# ppl_list = []
# model = SimpleRnnlm(vocab_size,wordvec_size,hidden_size)
# optimizer = SGD(lr)

# jump = (corpus_size - 1) // batch_size
# offsets = [i * jump for i in range(batch_size)]

# for epoch in range(max_epoch):
#     for iter in range(max_iters):
#         batch_x = np.empty((batch_size, time_size), dtype='i')
#         batch_Y = np.empty((batch_size, time_size), dtype='i')
#         for t in range(time_size):
#             for i,offset in enumerate(offsets):
#                 batch_x[i,t] = xs[(offset + time_idx) % data_size]
#                 batch_Y[i,t] = ts[(offset + time_idx) % data_size]
#             time_idx += 1
            
#             loss = model.forward(batch_x,batch_Y)
#             model.backward()
#             optimizer.update(model.params, model.grads)
#             total_loss += loss
#             loss_count += 1
        
#         ppl = np.exp(total_loss / loss_count)
#         print('| epoch %d | perplexity %.2f' % (epoch+1, ppl))
#         ppl_list.append(float(ppl))
#         total_loss, loss_count = 0,0