In [1]:
import sys
sys.path.append('../code/')

In [2]:
import numpy as np
from gru import GRU
from runner import Runner
import pandas as pd
from utils import invert_dict, load_np_dataset, docs_to_indices, seqs_to_npXY
from rnnmath import fraq_loss

In [3]:
data_folder = '../data/'
np.random.seed(2018)

In [4]:
train_size = 2000
dev_size = 1000 # ??
vocab_size = 2000
epochs=10

In [5]:
hdims = [10, 25, 50]
lr = 0.5
lookback = 0 # ??
out_vocab_size = 2

In [6]:
vocab = pd.read_table(data_folder + "/vocab.wiki.txt", header=None, sep="\s+", index_col=0,
                      names=['count', 'freq'], )
num_to_word = dict(enumerate(vocab.index[:vocab_size]))
word_to_num = invert_dict(num_to_word)

# calculate loss vocabulary words due to vocab_size
fraction_lost = fraq_loss(vocab, word_to_num, vocab_size)
print(
    "Retained %d words from %d (%.02f%% of all tokens)\n" % (
    vocab_size, len(vocab), 100 * (1 - fraction_lost)))

# load training data
sents = load_np_dataset(data_folder + '/wiki-train.txt')
S_train = docs_to_indices(sents, word_to_num, 0, 0)
X_train, D_train = seqs_to_npXY(S_train)

X_train = X_train[:train_size]
Y_train = D_train[:train_size]

# load development data
sents = load_np_dataset(data_folder + '/wiki-dev.txt')
S_dev = docs_to_indices(sents, word_to_num, 0, 0)
X_dev, D_dev = seqs_to_npXY(S_dev)

X_dev = X_dev[:dev_size]
D_dev = D_dev[:dev_size]


Retained 2000 words from 9954 (88.35% of all tokens)



In [7]:
# Q3.d

for hdim in hdims:
    r = Runner(model=GRU(vocab_size=vocab_size, hidden_dims=hdim, out_vocab_size=out_vocab_size))
    r.train_np(
        X=X_train,
        D=D_train,
        X_dev=X_dev,
        D_dev=D_dev,
        epochs=epochs,
        learning_rate=lr,
        back_steps=lookback
    )
    
    print('######################################################################')
    print('######################################################################')


Training model for 10 epochs
training set: 2000 sentences (batch size 100)
Optimizing loss on 1000 sentences
Vocab size: 2000
Hidden units: 10
Steps for back propagation: 0
Initial learning rate set to 0.5, annealing set to 5

calculating initial mean loss on dev set: 0.6957307846288112
calculating initial acc on dev set: 0.511

epoch 1, learning rate 0.5000	instance 2000	epoch done in 30.70 seconds	new loss: 0.6742038563151017	new acc: 0.601
epoch 2, learning rate 0.4167	instance 2000	epoch done in 39.84 seconds	new loss: 0.6590071662506569	new acc: 0.628
epoch 3, learning rate 0.3571	instance 2000	epoch done in 32.63 seconds	new loss: 0.6470836368833998	new acc: 0.655
epoch 4, learning rate 0.3125	instance 2000	epoch done in 35.17 seconds	new loss: 0.6361240483134482	new acc: 0.659
epoch 5, learning rate 0.2778	instance 2000	epoch done in 30.03 seconds	new loss: 0.6265102334309524	new acc: 0.666
epoch 6, learning rate 0.2500	instance 2000	epoch done in 31.61 seconds	new loss: 0.6181