In [1]:
import ModelManager as mm_mod
import config_defaults as cd
from importlib import reload
import numpy as np
import matplotlib.pyplot as plt
import time
import logging

from tqdm import tqdm_notebook as tqdm


%matplotlib inline
%load_ext autotime

### Initializing the Model and Data pipes

In [2]:
reload(mm_mod)
reload(cd)
logger = logging.getLogger('__main__')
logger.setLevel(logging.INFO)

time: 1.09 ms


## extracting the ngrams for n = 1, 2, 3, 4 with both naive and spacy

In [3]:
"""
n_list = (1, 2, 3, 4)
mode_list = ('naive', 'spacy')

for n in n_list:
    for mode in mode_list:
        print("extracting n-grams for: n=%s, mode=%s" % (n, mode))
        param_overrides = {'NGRAM_MODE': mode,
                           'NGRAM_SIZE': n}
        mm = mm_mod.ModelManager(hparams=param_overrides)
        mm.load_data()
"""

'\nn_list = (1, 2, 3, 4)\nmode_list = (\'naive\', \'spacy\')\n\nfor n in n_list:\n    for mode in mode_list:\n        print("extracting n-grams for: n=%s, mode=%s" % (n, mode))\n        param_overrides = {\'NGRAM_MODE\': mode,\n                           \'NGRAM_SIZE\': n}\n        mm = mm_mod.ModelManager(hparams=param_overrides)\n        mm.load_data()\n'

time: 2.2 ms


### Testing the annealing of LR

In [5]:
# training all of these through 1 epoch and seeing results
reload(mm_mod)
mm = mm_mod.ModelManager()
mm.load_data()
mm.data_to_pipe()
param_overrides = {'EARLY_STOP': False}
mm.hparams.update(param_overrides)
mm.train(epoch_override=3, reload_data=False)  

2018-10-06 16:11:39,701 __main__     INFO     initialized model with hyperparametrs:
2018-10-06 16:11:39,702 __main__     INFO     LR: 0.01
2018-10-06 16:11:39,704 __main__     INFO     LR_DECAY_RATE: 0.9
2018-10-06 16:11:39,705 __main__     INFO     NEPOCH: 10
2018-10-06 16:11:39,705 __main__     INFO     BATCH_SIZE: 32
2018-10-06 16:11:39,706 __main__     INFO     NGRAM_SIZE: 2
2018-10-06 16:11:39,706 __main__     INFO     VOC_SIZE: 10000
2018-10-06 16:11:39,707 __main__     INFO     EMBEDDING_DIM: 100
2018-10-06 16:11:39,707 __main__     INFO     NGRAM_MODE: naive
2018-10-06 16:11:39,708 __main__     INFO     VAL_SIZE: 5000
2018-10-06 16:11:39,709 __main__     INFO     OPTIMIZER: <class 'torch.optim.adam.Adam'>
2018-10-06 16:11:39,709 __main__     INFO     VAL_FREQ: 4
2018-10-06 16:11:39,710 __main__     INFO     REMOVE_STOP_WORDS: True
2018-10-06 16:11:39,710 __main__     INFO     REMOVE_PUNC: True
2018-10-06 16:11:39,711 __main__     INFO     EARLY_STOP: True
2018-10-06 16:11:39,7

### Trying to find a good LR

In [None]:
lr_list_exp_neg = np.arange(1,6)
lr_list_neg = 1 / np.power(10, lr_list_exp_neg)
lr_list_exp_pos = np.arange(0,3)
lr_list_pos = np.power(10, lr_list_exp_pos)

lr_list = np.append(lr_list_neg, lr_list_pos)
lr_list.sort()
print(lr_list)

In [None]:
# training all of these through 1 epoch and seeing results
mm = mm_mod.ModelManager()
mm.load_data()
mm.data_to_pipe()

mm.res_df = None  # reset the results dataframe
for cur_lr in lr_list:
    # overriding some hyperparameters
    print("training for initial lr = %s" % cur_lr)
    param_overrides = {'LR': cur_lr,
                       'EARLY_STOP': False}
    mm.hparams.update(param_overrides)
    mm.train(epoch_override=1, reload_data=False)  
display(mm.res_df)

In [None]:
plt.scatter(np.log10(mm.res_df['LR']), mm.res_df['final_val_acc'])
plt.title('Validation Error after 1 epoch')
plt.show()

In [None]:
mm.save_results()

### for each ngram param, find the right vocabulary size:

In [None]:
reload(mm_mod)
logger.setLevel(logging.WARNING)
voc_sizes = np.arange(1, 9) * 10000
n_list = (1, 2, 3, 4)
mode_list = ('naive', 'spacy')

for n in n_list:
    for mode in mode_list:
        for voc_size in voc_sizes:
            start_time = time.time()
            print("training models for: n=%s, mode=%s, voc_size=%s" % (n, mode, voc_size))
            param_overrides = {'NGRAM_MODE': mode,
                               'NGRAM_SIZE': n,
                               'VOC_SIZE': voc_size}
            mm = mm_mod.ModelManager(hparams=param_overrides, res_name='vocab_explore.p')
            mm.train()
            print("Final Validation Acc = %s (train time: %.1fs)\n" % (mm.validation_acc_history[-1], 
                                                                  time.time() - start_time))
    
            mm.save_results()

### Extra vocabulary - for spacy ngram =4, the upper tail hasn't been fully explored

In [None]:
voc_sizes = np.arange(9, 15) * 10000
for voc_size in voc_sizes:
    start_time = time.time()
    print("training models for: n=4, mode=spacy, voc_size=%s" % (voc_size))
    param_overrides = {'NGRAM_MODE': 'spacy',
                       'NGRAM_SIZE': 4,
                       'VOC_SIZE': voc_size}
    mm = mm_mod.ModelManager(hparams=param_overrides, res_name='voc_additional.p')
    mm.train()
    print("Final Validation Acc = %s (train time: %.1fs)\n" % (mm.validation_acc_history[-1], 
                                                          time.time() - start_time))

    mm.save_results()

In [None]:
voc_sizes = np.arange(3, 11) * 100000
voc_sizes

### What if we tried even larger vocabsizes?

In [None]:
voc_sizes = np.arange(3, 11) * 100000
for voc_size in voc_sizes:
    start_time = time.time()
    print("training models for: n=4, mode=spacy, voc_size=%s" % (voc_size))
    param_overrides = {'NGRAM_MODE': 'spacy',
                       'NGRAM_SIZE': 4,
                       'VOC_SIZE': voc_size}
    mm = mm_mod.ModelManager(hparams=param_overrides, res_name='voc_additional.p')
    mm.train()
    print("Final Validation Acc = %s (train time: %.1fs)\n" % (mm.validation_acc_history[-1], 
                                                          time.time() - start_time))

    mm.save_results()

### Embedding size

In [None]:
emb_dims = np.arange(2, 15) * 50
emb_dims

In [None]:
reload(mm_mod)
logger.setLevel(logging.WARNING)
voc_sizes = np.arange(2, 13) * 10000
emb_dims = np.arange(1, 15) * 50

for emb_dim in emb_dims:
    for voc_size in voc_sizes:
        start_time = time.time()
        print("training models for: emb_dim=%s, voc_size=%s" % (emb_dim, voc_size))
        param_overrides = {'VOC_SIZE': voc_size,
                           'NGRAM_MODE':'spacy',
                           'EMBEDDING_DIM':emb_dim}
        mm = mm_mod.ModelManager(hparams=param_overrides, res_name='embdim.p')
        mm.train()
        print("Final Validation Acc = %s (train time: %.1fs)\n" % (mm.validation_acc_history[-1], 
                                                              time.time() - start_time))

        mm.save_results()

In [None]:
plt.plot(mm.res_df['final_val_acc'].sort_values().values)
plt.show()

In [None]:
df = mm.res_df

In [None]:
import torch

In [None]:
opt_list = [torch.optim.RMSprop, torch.optim.Adagrad, torch.optim.Adam]

for opt in opt_list:
    start_time = time.time()
    print("training models for: optimizer = %s" % (str(opt)))
    param_overrides = {'OPTIMIZER': opt}
    mm = mm_mod.ModelManager(hparams=param_overrides, res_name='optim.p')
    mm.train()
    print("Final Validation Acc = %s (train time: %.1fs)\n" % (mm.validation_acc_history[-1], 
                                                          time.time() - start_time))

    mm.save_results()

### Just trying a big model...

In [None]:
reload(mm_mod)
logger.setLevel(logging.INFO)
start_time = time.time()
param_overrides = {'NEPOCH': 50,
                   'VOC_SIZE': 1000000,
                   'NGRAM_SIZE': 4,
                   'NGRAM_MODE':'spacy',
                   'EMBEDDING_DIM':1000,
                   'LR_DECAY_RATE': 0.75,
                   'EARLY_STOP_LOOKBACK': 32}
mm = mm_mod.ModelManager(hparams=param_overrides, res_name='experiment.p')
mm.train()
print("Final Validation Acc = %s (train time: %.1fs)\n" % (mm.validation_acc_history[-1], 
                                                      time.time() - start_time))

2018-10-06 16:57:01,165 __main__     INFO     initialized model with hyperparametrs:
2018-10-06 16:57:01,167 __main__     INFO     LR: 0.01
2018-10-06 16:57:01,169 __main__     INFO     LR_DECAY_RATE: 0.75
2018-10-06 16:57:01,171 __main__     INFO     NEPOCH: 50
2018-10-06 16:57:01,172 __main__     INFO     BATCH_SIZE: 32
2018-10-06 16:57:01,173 __main__     INFO     NGRAM_SIZE: 4
2018-10-06 16:57:01,174 __main__     INFO     VOC_SIZE: 1000000
2018-10-06 16:57:01,175 __main__     INFO     EMBEDDING_DIM: 1000
2018-10-06 16:57:01,175 __main__     INFO     NGRAM_MODE: spacy
2018-10-06 16:57:01,176 __main__     INFO     VAL_SIZE: 5000
2018-10-06 16:57:01,177 __main__     INFO     OPTIMIZER: <class 'torch.optim.adam.Adam'>
2018-10-06 16:57:01,177 __main__     INFO     VAL_FREQ: 4
2018-10-06 16:57:01,178 __main__     INFO     REMOVE_STOP_WORDS: True
2018-10-06 16:57:01,178 __main__     INFO     REMOVE_PUNC: True
2018-10-06 16:57:01,179 __main__     INFO     EARLY_STOP: True
2018-10-06 16:57: