In [5]:
class Args: pass

In [14]:
# python
import re
import os
import pickle
import time
import math
import random
import itertools

# nltk
import nltk
nltk.download('punkt')

# matplotlib
import matplotlib.pyplot as plt

# numpy
import numpy as np

# torch imports
import torch

# ours
from corpus.utils import create_datasets as create_datasets
from corpus.utils import idx2word as idx2word

from plotting.utils import exponential_smoothing as exponential_smoothing
from plotting.utils import plot as plot
from plotting.utils import plot_elbo as plot_elbo
from plotting.utils import graph as graph

from experiment.utils import args_to_dict as args_to_dict
from experiment.utils import save_args as save_args
from experiment.utils import save_model_printout as save_model_printout
from experiment.utils import save_trackers as save_trackers
from experiment.utils import pretty_print_trackers as pretty_print_trackers
from experiment.utils import convert as convert
from experiment.sample import test as test
from experiment.sample import interpolate as interpolate
from experiment.sample import random_samples as random_samples
from experiment.sample import cold_interpolation as cold_interpolation
from experiment.sample import warm_interpolation as warm_interpolation
from experiment.sample import reconstruction as reconstruction

import util
from util.utils import to_var, expierment_name

from models.utils import create_model as create_model
from models.utils import kl_anneal_function as kl_anneal_function
from models.utils import loss_fn as loss_fn
from models.utils import train as train
from models.bowman import SentenceVAE

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


# Creating the Model/Data Args

In [28]:
# set all model/runtime arguments

args = Args()

args.data_dir = 'data'
args.create_data = True
args.max_sequence_length = 50
args.min_occ = 1
args.test = True
args.epochs = 10
args.batch_size = 64
args.learning_rate = 0.001

args.corpus = 'bible'

args.num_samples = 10
args.num_steps = 10
args.num_cold_interpolations = 5
args.num_warm_interpolations = 5
args.num_reconstructions = 5
args.sample_warmup_period = 100

args.embeddings = True
args.embedding_size = 300
args.rnn_type = 'gru'
args.hidden_size = 256
args.num_layers = 1
args.bidirectional = True
args.latent_size = 20
args.word_dropout = 0.0
args.embedding_dropout = 0.5

args.anneal_function = 'logistic'
args.k = 0.0025
args.x0 = 2500

args.print_every = 50
args.tensorboard_logging = False
args.logdir = 'logs'
args.save_model_path = 'bin/test'
# args.load_checkpoint = 'E9.pytorch'

args.rnn_type = args.rnn_type.lower()
args.anneal_function = args.anneal_function.lower()

assert args.rnn_type in ['rnn', 'lstm', 'gru']
assert args.anneal_function in ['logistic', 'linear', 'const']
assert 0 <= args.word_dropout <= 1
assert args.corpus in ['ptb', 'bible', 'gutenberg', 'brown', 'wikitext-2', 'wikitext-103']

# Experiments

In [29]:
def help(args, verbose=False):
    # create the datasets and model
    datasets = create_datasets(args, verbose=verbose)

    # create a new model
    model = create_model(args, datasets)
    
    # train the model and record its performance
    trackers, model = train(model, datasets, args, verbose=verbose)
    
    return datasets, model, trackers

In [30]:
def run_experiment(args, verbose=False):
    # create the datasets and model
    datasets = create_datasets(args, verbose=verbose)

    # create a new model
    model = create_model(args, datasets)
    
    # train the model and record its performance
    trackers, model = train(model, datasets, args, verbose=verbose)
    # args.best_epoch = 3
    # args.load_checkpoint = 'E3.pytorch'
    
    # write args to file
    save_args(args)
    save_model_printout(args, model)
    
    # save the trackers
    save_trackers(trackers, args)
    
    # graph the results and save
    graph(trackers, datasets, args)
    
    # run the inference/sampling code on the trained model
    # and save the results
    test(args, datasets)

In [31]:
# datasets, model, trackers = help(args)

In [32]:
# run_experiment(args)

# Grid Search

In [33]:
parameters = {
    'hidden_size': [256, 512, 1024],
    'corpus': ['ptb', 'brown'],
    'max_sequence_length': [20, 35, 50],
    'latent_size': [32],
    'bidirectional': [True, False]
}

In [34]:
def grid_search(parameters):
    for parameter_set in itertools.product(*[parameters[k] for k in parameters]):
        for i, key in enumerate(parameters.keys()):
            # change the desired attributes of args
            setattr(args, key, parameter_set[i])
        # run the modified experiment
        run_experiment(args, verbose='False')

In [35]:
grid_search(parameters)

sending model to cuda
SentenceVAE(
  (embedding): Embedding(10009, 300)
  (embedding_dropout): Dropout(p=0.5)
  (encoder_rnn): GRU(300, 256, batch_first=True, bidirectional=True)
  (decoder_rnn): GRU(300, 256, batch_first=True)
  (hidden2mean): Linear(in_features=512, out_features=20, bias=True)
  (hidden2logv): Linear(in_features=512, out_features=20, bias=True)
  (latent2hidden): Linear(in_features=20, out_features=256, bias=True)
  (outputs2vocab): Linear(in_features=256, out_features=10009, bias=True)
)
Beginning training at: 2019-Feb-01-14:24:19
-------------------------------------------


 EPOCH 1, SPLIT = train
-------------------------------------------
TRAIN Batch 0000/267, Loss  116.5624, NLL-Loss  116.5614, KL-Loss    0.4850, KL-Weight  0.002
TRAIN Batch 0050/267, Loss   76.3122, NLL-Loss   76.2576, KL-Loss   25.0142, KL-Weight  0.002
TRAIN Batch 0100/267, Loss   73.7497, NLL-Loss   73.6660, KL-Loss   33.8856, KL-Weight  0.002
TRAIN Batch 0150/267, Loss   81.2749, NLL-Loss 

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


	[Creating dictionaries]


HBox(children=(IntProgress(value=0, max=10005), HTML(value='')))


	[Loading pretrained GLOVE embeddings -- this may take a while the first time]
Loaded 400000 words
	[Mapping vocab to GLOVE embeddings]


HBox(children=(IntProgress(value=0, max=10009), HTML(value='')))


Vocabulary created (10009 word types)!
Creating dataset ...


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Dataset created, with:
	35528 sentences
	736759 word tokens
	20.737418374240036 avg. sentence length


Preprocessing Penn Treebank *val* data:
------------------------------------------
Loading vocab file ...
Creating dataset ...


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Dataset created, with:
	2865 sentences
	58999 word tokens
	20.593019197207678 avg. sentence length


Preprocessing Penn Treebank *test* data:
------------------------------------------
Loading vocab file ...
Creating dataset ...


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Dataset created, with:
	3114 sentences
	63958 word tokens
	20.538856775850995 avg. sentence length


sending model to cuda
SentenceVAE(
  (embedding): Embedding(10009, 300)
  (embedding_dropout): Dropout(p=0.5)
  (encoder_rnn): GRU(300, 256, batch_first=True, bidirectional=True)
  (decoder_rnn): GRU(300, 256, batch_first=True)
  (hidden2mean): Linear(in_features=512, out_features=20, bias=True)
  (hidden2logv): Linear(in_features=512, out_features=20, bias=True)
  (latent2hidden): Linear(in_features=20, out_features=256, bias=True)
  (outputs2vocab): Linear(in_features=256, out_features=10009, bias=True)
)
Beginning training at: 2019-Feb-01-14:26:59
-------------------------------------------


 EPOCH 1, SPLIT = train
-------------------------------------------
TRAIN Batch 0000/555, Loss  186.8918, NLL-Loss  186.8909, KL-Loss    0.4494, KL-Weight  0.002
TRAIN Batch 0050/555, Loss  129.8413, NLL-Loss  129.7867, KL-Loss   25.0120, KL-Weight  0.002
TRAIN Batch 0100/555, Loss  124.5163, N

Process Process-867:
Process Process-870:
Process Process-865:
Process Process-866:
Process Process-869:
Process Process-868:
Process Process-872:
Process Process-871:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/usr/local/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/usr/local/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/usr/local/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/usr/local/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/usr/local/lib/python3.6/multiprocessin

TRAIN Batch 0100/555, Loss  117.9830, NLL-Loss  117.8951, KL-Loss   35.5392, KL-Weight  0.002


KeyboardInterrupt: 

<Figure size 432x288 with 0 Axes>