In [1]:
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sb
sb.set()
plt.rcParams['figure.figsize'] = (12,9)
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import numpy.random as npr
import chainer as ch
import chainer.functions as F
import chainer.links as L
import chainer.training.extensions

In [547]:
from dataset import load_data
from vocab import Vocab
from word_vectors import get_pretrained_vectors

[autoreload of updater failed: Traceback (most recent call last):
  File "/Users/thomaseffland/.virtualenvs/research/lib/python2.7/site-packages/IPython/extensions/autoreload.py", line 247, in check
    superreload(m, reload, self.old_objects)
NameError: name 'convert' is not defined
]


# Setup data

In [4]:
train_data, dev_data = load_data("data/multinli_0.9/", 
                                 matched=True, 
                                 genres=['government'],
                                 drop_confused=True, 
                                 lowercase=True)
print("{} training examples, {} dev examples".format(len(train_data), len(dev_data)))

77350 training examples, 1945 dev examples


In [5]:
vocab = Vocab(min_count=1)
vocab.add([ token for datum in train_data for token in datum['h'] ])
vocab.add([ token for datum in train_data for token in datum['p'] ])
vocab.add([ token for datum in dev_data for token in datum['h'] ])
vocab.add([ token for datum in dev_data for token in datum['p'] ])
vocab.drop_infrequent()

In [6]:
token_embeddings = get_pretrained_vectors(vocab, 'data/word_vectors/glove.6B.300d.txt', 
                                          normed=True, trim=False)

Pretrained coverage: 21967/27285 = 80.51%


In [7]:
class_vocab = Vocab([ datum['c'] for datum in train_data ],
                    min_count=0,
                    pad_token=None,
                    unk_token=None)

# Define Models

In [537]:
from monitor import monitor

# Instantiate models and run experiment

# What to track:
* Number of parameters
* Number of trainable parameters
* Start time
* Examples per second
* Updates per second
* Model score vs time
* Ratio of parameter values to updates (on average for each layer)
* Mean, standard dev, and histograms of param updates, and activations
* Learning rate  over time
* Do model exponentially decayed model weight averaging. Do a moving average.

In [522]:
from retain_grad import RetainGrad

In [551]:
from better_report import BetterLogReport

In [552]:
from activation_monitor import ActivationMonitorExtension
from backprop_monitor import BackpropMonitorExtension

In [561]:
from updater import VariableConverterUpdater
from evaluator import VariableConverterEvaluator
from converter import NLIBatchConverter

In [684]:
cbow = CBOW(token_embeddings)
c_model= MLP([1000, 600, 300, 3])
predictor = NLIPredictor(cbow, c_model)
loss_model = NLILossModel(predictor)


optimizer = RetainGrad(ch.optimizers.Adam)()
# optimizer = ch.optimizers.Adam()
optimizer.setup(loss_model)
# optimizer.add_hook(ForwardBackwardMonitorHook())

batch_size = 256
k = 256
train_iter = ch.iterators.SerialIterator(train_data[:k], batch_size, shuffle=True, repeat=True)
dev_iter = ch.iterators.SerialIterator(dev_data[:k], batch_size, shuffle=False, repeat=False)

In [688]:
import os
import os.path as osp


In [690]:
osp.join('result_test', 'snapshots')

'result_test/snapshots'

In [698]:
print(getattr({'results_dirname':'test'}, 'results_dirname', None))

None


In [692]:
import os
import os.path as osp
outdir = 'result_test'
snapshot_dir = osp.join(outdir, 'snapshots')
tmpsnapshot_dir = osp.join(outdir, 'tmpsnapshots')
if not osp.exists(outdir): os.makedirs(outdir)
if not osp.exists(snapshot_dir): os.makedirs(snapshot_dir)
if not osp.exists(tmpsnapshot_dir): os.makedirs(tmpsnapshot_dir)

In [693]:
converter = NLIBatchConverter(vocab, class_vocab)
updater = VariableConverterUpdater(train_iter, optimizer, converter=converter)
evaluator = VariableConverterEvaluator(dev_iter, loss_model, converter=converter)
activation_monitor = ActivationMonitorExtension()
backprop_monitor = BackpropMonitorExtension(loss_model)
logger = BetterLogReport(trigger=(1,'iteration'))

trainer = ch.training.Trainer(updater, (100, 'epoch'), out=outdir)
trainer.extend(evaluator)
trainer.extend(activation_monitor)
trainer.extend(backprop_monitor)
trainer.extend(logger)
# trainer.extend(ch.training.extensions.LogReport(trigger=(1,'iteration'),
#                                                 postprocess=postprocess))
trainer.extend(ch.training.extensions.PrintReport([
    'epoch', 'main/loss', 'main/accuracy', 'validation/main/accuracy'],
    log_report=logger
))
trainer.extend(ch.training.extensions.snapshot(
    filename='snapshots/snapshot_iter_{.updater.iteration}', 
    trigger=(1, 'epoch')
))
# trainer.extend(ch.training.extensions.ProgressBar())
# trainer.extend(ch.training.extensions.snapshot(
#     trigger=ch.training.triggers.MaxValueTrigger('validation/main/accuracy', (1,'epoch'))
# ))

In [694]:
trainer.run()

epoch       main/loss   main/accuracy  validation/main/accuracy
[J2           3.27839     0.347656       0.339844                  
[J2           3.27839     0.347656       0.339844                  
[J3           2.96289     0.296875       0.339844                  
[J3           2.96289     0.296875       0.339844                  
[J4           1.74889     0.296875       0.328125                  
[J4           1.74889     0.296875       0.328125                  
[J5           1.13267     0.324219       0.324219                  
[J5           1.13267     0.324219       0.324219                  
[J6           1.20713     0.359375       0.324219                  
[J6           1.20713     0.359375       0.324219                  
[J7           1.17865     0.355469       0.335938                  
[J7           1.17865     0.355469       0.335938                  
[J8           1.16577     0.359375       0.332031                  
[J8           1.16577     0.359375    

KeyboardInterrupt: 

In [None]:
ls 

# TODO

* [X] Compute micro, macro, and class-wise f1s
* [X] Report training and validation metrics
* [X] Report activation histograms
* [X] Report activation means, variances
* [X] Report gradient _update_ histograms
* [X] Report gradient _update_ means, variances


* [ ] Checkpoint model and be able to restore model from checkpoint
* [ ] Visualize the training as it progresses
* [ ] Batch out multiple experiments
* [ ] Post output to results folder that is timestamped
* [ ] Output config to results folder
* [ ] 

Need to be able to:
* Run experiments but exchange architectures
* Reproduce all experiments
* Analyze results, diagnose optimization and data errors

In [593]:
main_loss = [l['main/accuracy'] for l in logger._log]

In [607]:
from IPython.display import HTML

In [633]:
css = """
h3 {
    color:grey;
}
"""

In [638]:
viz = """
d3.select('#main-viz')
    .html('d3 inserted this!')
"""

In [641]:
HTML(open('vis_test.html').read())

In [640]:
html_str = """
<style> {css} </style>
<div> <h3> This is a visualization </h3> </div>
<div id="main-viz"></div>
<script> {viz_js} </script>
"""
HTML(html_str.format(css=css, viz_js=viz))

[How to embed d3 in jupyter](http://blog.thedataincubator.com/2015/08/embedding-d3-in-an-ipython-notebook/)

In [606]:
%%javascript
require.config({
  paths: {
      d3: '//cdnjs.cloudflare.com/ajax/libs/d3/3.4.8/d3.min'
  }
});
var margin = {top: 20, right: 10, bottom: 20, left: 10};
var width = 960 - margin.left - margin.right,
    height = 500 - margin.top - margin.bottom;
var svg = d3.select("element").append("svg")
    .attr("width", width + margin.left + margin.right)
    .attr("height", height + margin.top + margin.bottom)
  .append("g")
    .attr("transform", "translate(" + margin.left + "," + margin.top + ")")
var x = d3.scale.linear()
    .range([0, width]);
var y = d3.scale.linear()
    .range([height, 0]);

<IPython.core.display.Javascript object>

In [676]:
np.zeros((4,4))

array([[ 0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.]])

In [700]:
!pip install sh

Collecting sh
  Downloading sh-1.12.13-py2.py3-none-any.whl
Installing collected packages: sh
Successfully installed sh-1.12.13


In [701]:
import sh
sh.git.commit('-am', 'auto commit tracked files')
commit_hash = sh.git('rev-parse', 'HEAD').strip()

In [702]:
o

u'a3b7c1a182f2b804034694b4e000b7eb650c51f0'

In [703]:
!git status

On branch master
Untracked files:
  (use "git add <file>..." to include in what will be committed)

	[31m.gitignore[m
	[31mchainer_bw/converter.py[m
	[31mmodels/cbow.py[m
	[31mnew_experiment.py[m
	[31mresult_test/[m
	[31mresults/[m
	[31mvis_test.html[m

nothing added to commit but untracked files present (use "git add" to track)


In [706]:
'experiment_config.yml'.endswith('.yaml')

False

In [711]:
import logging
logging.basicConfig(level = logging.INFO, 
    format='[%(levelname)s] %(asctime)s: :%(name)s:line %(lineno)d: %(message)s')
logger = logging.getLogger(__name__)

In [712]:
logger.info('yo')

INFO:__main__:yo


In [715]:
osp.pwd()

AttributeError: 'module' object has no attribute 'pwd'

In [717]:
yaml.dump({'test':'test'}, open('test.yaml','w'))

In [718]:
cat test.yaml

{test: test}
