In [2]:
import ModelManager as mm_mod
import config_defaults as cd
from importlib import reload
import numpy as np
import matplotlib.pyplot as plt
import time
import logging
import torch

from tqdm import tqdm_notebook as tqdm


%matplotlib inline
%load_ext autotime

The autotime extension is already loaded. To reload it, use:
  %reload_ext autotime
time: 1.75 ms


### Initializing the Model and Data pipes

In [4]:
reload(mm_mod)
reload(cd)
logger = logging.getLogger('__main__')
logger.setLevel(logging.INFO)

time: 1.14 ms


## extracting the ngrams for n = 1, 2, 3, 4 with both naive and spacy

In [None]:
"""
n_list = (1, 2, 3, 4)
mode_list = ('naive', 'spacy')

for n in n_list:
    for mode in mode_list:
        print("extracting n-grams for: n=%s, mode=%s" % (n, mode))
        param_overrides = {'NGRAM_MODE': mode,
                           'NGRAM_SIZE': n}
        mm = mm_mod.ModelManager(hparams=param_overrides)
        mm.load_data()
"""

### Testing the annealing of LR

In [None]:
# training all of these through 1 epoch and seeing results
reload(mm_mod)
mm = mm_mod.ModelManager()
mm.load_data()
mm.data_to_pipe()
param_overrides = {'EARLY_STOP': False}
mm.hparams.update(param_overrides)
mm.train(epoch_override=3, reload_data=False)  

### Trying to find a good LR

In [None]:
lr_list_exp_neg = np.arange(1,6)
lr_list_neg = 1 / np.power(10, lr_list_exp_neg)
lr_list_exp_pos = np.arange(0,3)
lr_list_pos = np.power(10, lr_list_exp_pos)

lr_list = np.append(lr_list_neg, lr_list_pos)
lr_list.sort()
print(lr_list)

In [None]:
# training all of these through 1 epoch and seeing results
mm = mm_mod.ModelManager()
mm.load_data()
mm.data_to_pipe()

mm.res_df = None  # reset the results dataframe
for cur_lr in lr_list:
    # overriding some hyperparameters
    print("training for initial lr = %s" % cur_lr)
    param_overrides = {'LR': cur_lr,
                       'EARLY_STOP': False}
    mm.hparams.update(param_overrides)
    mm.train(epoch_override=1, reload_data=False)  
display(mm.res_df)

In [None]:
plt.scatter(np.log10(mm.res_df['LR']), mm.res_df['final_val_acc'])
plt.title('Validation Error after 1 epoch')
plt.show()

In [None]:
mm.save_results()

### for each ngram param, find the right vocabulary size:

In [None]:
reload(mm_mod)
logger.setLevel(logging.WARNING)
voc_sizes = np.arange(1, 9) * 10000
n_list = (1, 2, 3, 4)
mode_list = ('naive', 'spacy')

for n in n_list:
    for mode in mode_list:
        for voc_size in voc_sizes:
            start_time = time.time()
            print("training models for: n=%s, mode=%s, voc_size=%s" % (n, mode, voc_size))
            param_overrides = {'NGRAM_MODE': mode,
                               'NGRAM_SIZE': n,
                               'VOC_SIZE': voc_size}
            mm = mm_mod.ModelManager(hparams=param_overrides, res_name='vocab_explore.p')
            mm.train()
            print("Final Validation Acc = %s (train time: %.1fs)\n" % (mm.validation_acc_history[-1], 
                                                                  time.time() - start_time))
    
            mm.save_results()

### Extra vocabulary - for spacy ngram =4, the upper tail hasn't been fully explored

In [None]:
voc_sizes = np.arange(9, 15) * 10000
for voc_size in voc_sizes:
    start_time = time.time()
    print("training models for: n=4, mode=spacy, voc_size=%s" % (voc_size))
    param_overrides = {'NGRAM_MODE': 'spacy',
                       'NGRAM_SIZE': 4,
                       'VOC_SIZE': voc_size}
    mm = mm_mod.ModelManager(hparams=param_overrides, res_name='voc_additional.p')
    mm.train()
    print("Final Validation Acc = %s (train time: %.1fs)\n" % (mm.validation_acc_history[-1], 
                                                          time.time() - start_time))

    mm.save_results()

In [None]:
voc_sizes = np.arange(3, 11) * 100000
voc_sizes

### What if we tried even larger vocabsizes?

In [None]:
voc_sizes = np.arange(3, 11) * 100000
for voc_size in voc_sizes:
    start_time = time.time()
    print("training models for: n=4, mode=spacy, voc_size=%s" % (voc_size))
    param_overrides = {'NGRAM_MODE': 'spacy',
                       'NGRAM_SIZE': 4,
                       'VOC_SIZE': voc_size}
    mm = mm_mod.ModelManager(hparams=param_overrides, res_name='voc_additional.p')
    mm.train()
    print("Final Validation Acc = %s (train time: %.1fs)\n" % (mm.validation_acc_history[-1], 
                                                          time.time() - start_time))

    mm.save_results()

### Embedding size

In [None]:
emb_dims = np.arange(2, 15) * 50
emb_dims

In [None]:
reload(mm_mod)
logger.setLevel(logging.WARNING)
voc_sizes = np.arange(2, 13) * 10000
emb_dims = np.arange(1, 15) * 50

for emb_dim in emb_dims:
    for voc_size in voc_sizes:
        start_time = time.time()
        print("training models for: emb_dim=%s, voc_size=%s" % (emb_dim, voc_size))
        param_overrides = {'VOC_SIZE': voc_size,
                           'NGRAM_MODE':'spacy',
                           'EMBEDDING_DIM':emb_dim}
        mm = mm_mod.ModelManager(hparams=param_overrides, res_name='embdim.p')
        mm.train()
        print("Final Validation Acc = %s (train time: %.1fs)\n" % (mm.validation_acc_history[-1], 
                                                              time.time() - start_time))

        mm.save_results()

In [None]:
plt.plot(mm.res_df['final_val_acc'].sort_values().values)
plt.show()

In [None]:
df = mm.res_df

In [None]:
import torch

In [None]:
opt_list = [torch.optim.RMSprop, torch.optim.Adagrad, torch.optim.Adam]

for opt in opt_list:
    start_time = time.time()
    print("training models for: optimizer = %s" % (str(opt)))
    param_overrides = {'OPTIMIZER': opt}
    mm = mm_mod.ModelManager(hparams=param_overrides, res_name='optim.p')
    mm.train()
    print("Final Validation Acc = %s (train time: %.1fs)\n" % (mm.validation_acc_history[-1], 
                                                          time.time() - start_time))

    mm.save_results()

### Trying different LR decay rates

In [None]:
lr_decays = 0.5 + np.arange(2, 11) * 0.05

for lr_decay in lr_decays:
    param_overrides = {'LR_DECAY_RATE': lr_decay,
                       'LR': 0.001,
                       'NEPOCH': 30}
    mm = mm_mod.ModelManager(hparams=param_overrides, res_name='lr_decay_small_lr.p')
    mm.train()
    mm.save_results()

print("Final Validation Acc = %s" % (mm.validation_acc_history[-1]))

### Just trying a big model...

In [5]:
reload(mm_mod)
logger.setLevel(logging.INFO)
start_time = time.time()
param_overrides = {'NEPOCH': 50,
                   'LR': 0.001,
                   'LR_DECAY_RATE': 0.95,
                   'VOC_SIZE': 1000000,
                   'NGRAM_SIZE': 4,
                   'NGRAM_MODE':'spacy',
                   'EMBEDDING_DIM':100,
                   'EARLY_STOP_LOOKBACK': 32}
mm = mm_mod.ModelManager(hparams=param_overrides, res_name='experiment.p')
mm.train()
print("Final Validation Acc = %s (train time: %.1fs)\n" % (mm.validation_acc_history[-1], 
                                                      time.time() - start_time))

2018-10-07 00:24:41,846 __main__     INFO     initialized model with hyperparametrs:
2018-10-07 00:24:41,846 __main__     INFO     LR: 0.001
2018-10-07 00:24:41,847 __main__     INFO     LR_DECAY_RATE: 0.95
2018-10-07 00:24:41,847 __main__     INFO     NEPOCH: 50
2018-10-07 00:24:41,847 __main__     INFO     BATCH_SIZE: 32
2018-10-07 00:24:41,848 __main__     INFO     NGRAM_SIZE: 4
2018-10-07 00:24:41,848 __main__     INFO     VOC_SIZE: 1000000
2018-10-07 00:24:41,848 __main__     INFO     EMBEDDING_DIM: 100
2018-10-07 00:24:41,849 __main__     INFO     NGRAM_MODE: spacy
2018-10-07 00:24:41,849 __main__     INFO     VAL_SIZE: 5000
2018-10-07 00:24:41,849 __main__     INFO     OPTIMIZER: <class 'torch.optim.adam.Adam'>
2018-10-07 00:24:41,849 __main__     INFO     VAL_FREQ: 4
2018-10-07 00:24:41,850 __main__     INFO     REMOVE_STOP_WORDS: True
2018-10-07 00:24:41,850 __main__     INFO     REMOVE_PUNC: True
2018-10-07 00:24:41,850 __main__     INFO     EARLY_STOP: True
2018-10-07 00:24:

2018-10-07 02:11:02,861 __main__     INFO     Epoch: [16/50], Step: [256/625], Val Acc: 89.92, LR: 0.0005
2018-10-07 02:12:43,511 __main__     INFO     Epoch: [16/50], Step: [384/625], Val Acc: 89.8, LR: 0.0005
2018-10-07 02:14:05,518 __main__     INFO     Epoch: [16/50], Step: [512/625], Val Acc: 89.8, LR: 0.0005
2018-10-07 02:16:50,899 __main__     INFO     Epoch: [17/50], Step: [128/625], Val Acc: 89.74, LR: 0.0004
2018-10-07 02:18:43,499 __main__     INFO     Epoch: [17/50], Step: [256/625], Val Acc: 89.84, LR: 0.0004
2018-10-07 02:20:26,374 __main__     INFO     Epoch: [17/50], Step: [384/625], Val Acc: 89.84, LR: 0.0004
2018-10-07 02:21:52,655 __main__     INFO     Epoch: [17/50], Step: [512/625], Val Acc: 90.22, LR: 0.0004
2018-10-07 02:24:30,486 __main__     INFO     Epoch: [18/50], Step: [128/625], Val Acc: 90.3, LR: 0.0004
2018-10-07 02:26:16,930 __main__     INFO     Epoch: [18/50], Step: [256/625], Val Acc: 90.06, LR: 0.0004
2018-10-07 02:27:59,708 __main__     INFO     Epo

In [6]:
torch.save(mm.model, r'model_state.st')

time: 168 ms
