In [1]:
import sys
sys.path.append('../code/')

In [2]:
import numpy as np
from rnn import RNN
from runner import Runner
import pandas as pd
from utils import invert_dict, load_lm_dataset, docs_to_indices, seqs_to_lmXY
from rnnmath import fraq_loss
import itertools

In [3]:
data_folder = '../data/'
np.random.seed(2018)

In [4]:
learning_rates = [0.5, 0.1, 0.05]
hdims = [25, 50]
lookbacks = [0, 2, 5]

In [5]:
train_size = 1000
dev_size = 1000
vocab_size = 2000
epochs=10

In [6]:
# get the data set vocabulary
vocab = pd.read_table(data_folder + "/vocab.wiki.txt", header=None, sep="\s+", index_col=0,
                      names=['count', 'freq'], )
num_to_word = dict(enumerate(vocab.index[:vocab_size]))
word_to_num = invert_dict(num_to_word)

# calculate loss vocabulary words due to vocab_size
fraction_lost = fraq_loss(vocab, word_to_num, vocab_size)
print(
    "Retained %d words from %d (%.02f%% of all tokens)\n" % (
    vocab_size, len(vocab), 100 * (1 - fraction_lost)))

docs = load_lm_dataset(data_folder + '/wiki-train.txt')
S_train = docs_to_indices(docs, word_to_num, 1, 1)
X_train, D_train = seqs_to_lmXY(S_train)

# Load the dev set (for tuning hyperparameters)
docs = load_lm_dataset(data_folder + '/wiki-dev.txt')
S_dev = docs_to_indices(docs, word_to_num, 1, 1)
X_dev, D_dev = seqs_to_lmXY(S_dev)

X_train = X_train[:train_size]
D_train = D_train[:train_size]
X_dev = X_dev[:dev_size]
D_dev = D_dev[:dev_size]

# q = best unigram frequency from omitted vocab
# this is the best expected loss out of that set
q = vocab.freq[vocab_size] / sum(vocab.freq[vocab_size:])

Retained 2000 words from 9954 (88.35% of all tokens)



In [7]:
s = [learning_rates, hdims, lookbacks]

In [8]:
# Q2.a

# !!! I had to change the train function to make this display in jupyter notebook, which is not allowed
for lr, hdim, lookback in itertools.product(*s):
    r = Runner(model=RNN(vocab_size=vocab_size, hidden_dims=hdim, out_vocab_size=vocab_size))
    r.train(
        X=X_train,
        D=D_train,
        X_dev=X_dev,
        D_dev=D_dev,
        epochs=epochs,
        learning_rate=lr,
        back_steps=lookback
    )
    
    print('######################################################################')
    print('######################################################################')


Training model for 10 epochs
training set: 1000 sentences (batch size 100)
Optimizing loss on 1000 sentences
Vocab size: 2000
Hidden units: 25
Steps for back propagation: 0
Initial learning rate set to 0.5, annealing set to 5

calculating initial mean loss on dev set: 7.798662515757492

epoch 1, learning rate 0.5000	instance 1000	epoch done in 36.99 seconds	new loss: 8.15014653696414
epoch 2, learning rate 0.4167	instance 1000	epoch done in 36.75 seconds	new loss: 5.978210046559437
epoch 3, learning rate 0.3571	instance 1000	epoch done in 35.57 seconds	new loss: 5.746970466401593
epoch 4, learning rate 0.3125	instance 1000	epoch done in 47.49 seconds	new loss: 5.195948023144267
epoch 5, learning rate 0.2778	instance 1000	epoch done in 36.97 seconds	new loss: 5.154120129486588
epoch 6, learning rate 0.2500	instance 1000	epoch done in 37.59 seconds	new loss: 5.11580921789912
epoch 7, learning rate 0.2273	instance 1000	epoch done in 37.26 seconds	new loss: 5.114597758027232
epoch 8, lear

calculating initial mean loss on dev set: 8.623476566919857

epoch 1, learning rate 0.5000	instance 1000	epoch done in 49.50 seconds	new loss: 7.512639839083601
epoch 2, learning rate 0.4167	instance 1000	epoch done in 50.38 seconds	new loss: 6.097188652709496
epoch 3, learning rate 0.3571	instance 1000	epoch done in 50.59 seconds	new loss: 5.232716284227191
epoch 4, learning rate 0.3125	instance 1000	epoch done in 50.82 seconds	new loss: 5.109902864772836
epoch 5, learning rate 0.2778	instance 1000	epoch done in 52.94 seconds	new loss: 5.099857759180985
epoch 6, learning rate 0.2500	instance 1000	epoch done in 53.21 seconds	new loss: 5.0543468434561705
epoch 7, learning rate 0.2273	instance 1000	epoch done in 54.28 seconds	new loss: 5.016407418581707
epoch 8, learning rate 0.2083	instance 1000	epoch done in 50.74 seconds	new loss: 4.995120137679506
epoch 9, learning rate 0.1923	instance 1000	epoch done in 53.76 seconds	new loss: 4.978735220242406
epoch 10, learning rate 0.1786	instanc

epoch 2, learning rate 0.0833	instance 1000	epoch done in 72.50 seconds	new loss: 5.4651961552558985
epoch 3, learning rate 0.0714	instance 1000	epoch done in 66.47 seconds	new loss: 5.351179945648193
epoch 4, learning rate 0.0625	instance 1000	epoch done in 64.32 seconds	new loss: 5.288592158274152
epoch 5, learning rate 0.0556	instance 1000	epoch done in 64.18 seconds	new loss: 5.244431551949272
epoch 6, learning rate 0.0500	instance 1000	epoch done in 63.26 seconds	new loss: 5.210745937843555
epoch 7, learning rate 0.0455	instance 1000	epoch done in 64.81 seconds	new loss: 5.184193722531978
epoch 8, learning rate 0.0417	instance 1000	epoch done in 65.12 seconds	new loss: 5.1601219727632035
epoch 9, learning rate 0.0385	instance 1000	epoch done in 64.67 seconds	new loss: 5.140233154322142
epoch 10, learning rate 0.0357	instance 1000	epoch done in 66.57 seconds	new loss: 5.123840002858845

training finished after reaching maximum of 10 epochs
best observed loss was 5.123840002858845, 

epoch 4, learning rate 0.0312	instance 1000	epoch done in 41.14 seconds	new loss: 5.424441192651989
epoch 5, learning rate 0.0278	instance 1000	epoch done in 38.29 seconds	new loss: 5.381836157626986
epoch 6, learning rate 0.0250	instance 1000	epoch done in 38.32 seconds	new loss: 5.349032180742898
epoch 7, learning rate 0.0227	instance 1000	epoch done in 38.96 seconds	new loss: 5.322961468400411
epoch 8, learning rate 0.0208	instance 1000	epoch done in 38.87 seconds	new loss: 5.301185899650594
epoch 9, learning rate 0.0192	instance 1000	epoch done in 39.17 seconds	new loss: 5.282966962502312
epoch 10, learning rate 0.0179	instance 1000	epoch done in 39.40 seconds	new loss: 5.267207273043814

training finished after reaching maximum of 10 epochs
best observed loss was 5.267207273043814, at epoch 10
setting parameters to matrices from best epoch
######################################################################
######################################################################

