In [1]:
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sb
sb.set()
plt.rcParams['figure.figsize'] = (12,9)

In [2]:
import numpy as np
import numpy.random as npr
import chainer as ch
import chainer.functions as F
import chainer.links as L
import chainer.training.extensions

In [3]:
from dataset import load_data
from vocab import Vocab
from word_vectors import get_pretrained_vectors

ImportError: No module named vocab

In [None]:
train_data, dev_data = load_data("data/multinli_0.9/", 
                                 matched=True, 
                                 genres=[],#['government'],
                                 drop_confused=True, 
                                 lowercase=True)
print("{} training examples, {} dev examples".format(len(train_data), len(dev_data)))

In [None]:
vocab = Vocab(min_count=1)
vocab.add([ token for datum in train_data for token in datum['h'] ])
vocab.add([ token for datum in train_data for token in datum['p'] ])
vocab.add([ token for datum in dev_data for token in datum['h'] ])
vocab.add([ token for datum in dev_data for token in datum['p'] ])
vocab.drop_infrequent()

In [None]:
token_embeddings = get_pretrained_vectors(vocab, 'data/word_vectors/glove.6B.300d.txt', trim=False)

In [None]:
class_vocab = Vocab([ datum['c'] for datum in train_data ],
                    min_count=0,
                    pad_token=None,
                    unk_token=None)

In [None]:
class CBOW(ch.Chain):
    def __init__(self, embeddings):
        super(CBOW, self).__init__(
            token_embeddings=L.EmbedID(embeddings.shape[0], embeddings.shape[1], embeddings)
        )
    
    def __call__(self, xs):
        """ Sum up embeddings for x """
        xs_vecs = [ self.token_embeddings(x) for x in xs ]
        xs_vec = [ F.sum(x_vecs, axis=0) for x_vecs in xs_vecs ]
        return xs_vec
    
class NLIPredictor(ch.Chain):
    """ Convert pairs of premise and hypothesis sentences (tokenized) into an
    entailment class distribution.
    
    """
    def __init__(self, vocab, h_model, c_model, p_model=None):
        super(NLIPredictor, self).__init__()
        self.add_link('h_model', h_model)
        if p_model is not None: 
            self.add_link('p_model', p_model)
        else:
            self.p_model = None
        self.add_link('c_model', c_model)
        self.vocab = vocab
        
    def _preprocess(self, xs, volatile):
        """ Convert a list of NLI data into token index arrays for hs and ps.
        
        These arrays are then sorted by length
        and the corresponding inverted index is also returned.
        
        This allows for chainer RNNs to handle separate sequence batches
        efficiently while restoring the order before the output.
        """
        hs_ids = [ ch.Variable(np.array(
                     [ self.vocab.idx(token) for token in x['h']], 
                   dtype=np.int32), volatile=volatile)
                   for x in xs ]
        hs_ids, hs_sort = zip(*sorted(zip(hs_ids, range(len(hs_ids))),
                                       key=lambda x:len(x[0]),
                                       reverse=True))
        ps_ids = [ ch.Variable(np.array(
                     [ self.vocab.idx(token) for token in x['p']], 
                   dtype=np.int32), volatile=volatile)
                   for x in xs ]
        ps_ids, ps_sort = zip(*sorted(zip(ps_ids, range(len(ps_ids))),
                                       key=lambda x:len(x[0]),
                                       reverse=True))
        return hs_ids, hs_sort, ps_ids, ps_sort
    
    def __call__(self, batch, volatile='off'):
        # convert to ids and sort by descending length
        hs_ids, hs_sort, ps_ids, ps_sort = self._preprocess(batch, volatile)
        
        # get sentence representations 
        hs_reps = self.h_model(hs_ids)
        ps_reps = self.p_model(ps_ids) if self.p_model else self.h_model(ps_ids)
        
        # put them back in original order
        hs_reps = [ hs_reps[i] for i in hs_sort ]
        ps_reps = [ ps_reps[i] for i in ps_sort ]
        
        return self.c_model(hs_reps, ps_reps)
    
class MLP(ch.Chain):
    def __init__(self, h_sizes, fs=[]):
        super(MLP, self).__init__()
        
        # create affine transforms
        self.hs = []
        for i, h_size in enumerate(h_sizes):
            h = L.Linear(None, h_size) # input size is inferred
            self.hs.append(h)
            self.add_link('h_{}'.format(i), h)
        
        # register nonlinearities (as chainer callables)
        if fs:
            assert len(fs) == (len(self.hs)-1), "Must have one less activation than affine transforms"
            self.fs = fs
        else:
            self.fs = [ F.relu for _ in range(len(self.hs)-1) ]
            
    def __call__(self, x):
        z = x
        for i, f in enumerate(self.fs):
            z = f(self.hs[i](z))
        return self.hs[-1](z)
    
class EntailmentConcatModel(ch.Chain):
    def __init__(self, classifier):
        super(EntailmentConcatModel, self).__init__(
            classifier=classifier
        )
    
    def __call__(self, hs, ps):
        # convert to matrices if needed
        if type(hs) is list:
            hs = F.vstack(hs)
        if type(ps) is list:
            ps = F.vstack(ps)
            
        # combine them
        hps = F.hstack([hs, ps])
        return self.classifier(hps)
    
class NLILossModel(ch.Chain):
    """ Wraps NLI """
    def __init__(self, class_vocab, nli_predictor):
        super(NLILossModel, self).__init__(nli_predictor=nli_predictor)
        self.class_vocab = class_vocab
        self.loss = None,
        self.accuracy = -1e5
        self.precision = -1e5
        self.recall = -1e5
        self.f1 = -1e5
        self.support = -1e5
#         ch.reporter.report({'precision':self.precision,
#                             'recall':self.recall,
#                             'f1': self.f1,
#                             'support':self.support}, self)
        
    def __call__(self, batch, volatile='off'):
        c_true = ch.Variable(np.array(
                    [ self.class_vocab.idx(x['c']) for x in batch ], dtype=np.int32
                 ), volatile=volatile)
        c_pred = self.nli_predictor(batch, volatile=volatile)
        self.loss = F.softmax_cross_entropy(c_pred, c_true)
        ch.reporter.report({'loss':self.loss}, self)
        self.accuracy = F.accuracy(c_pred, c_true)
        ch.reporter.report({'accuracy':self.accuracy}, self)
        self.precision, self.recall, self.f1, self.support = F.classification_summary(c_pred, c_true)
        ch.reporter.report({'precision':self.precision.data.mean(),
                            'recall':self.recall.data.mean(),
                            'f1': self.f1.data.mean(),
                            'support':self.support.data.mean()}, self)
        return self.loss

In [None]:
cbow = CBOW(token_embeddings)
c_model = EntailmentConcatModel(MLP([100,100,3]))
predictor = NLIPredictor(vocab, cbow, c_model)
loss_model = NLILossModel(class_vocab, predictor)

In [None]:
optimizer = ch.optimizers.Adam()
optimizer.setup(loss_model)

In [None]:
class PassThroughUpdater(ch.training.StandardUpdater):
    """ An implementation of an updater that doesn't assume 
    the downstream model will receive a tuple or dict of chainer variables.
    
    It leaves the preprocessing of a batch of data up to the first component
    of the downstream model. It does NOT use the `converter` argument.
    """
    def update_core(self):
        batch = self._iterators['main'].next()
        optimizer = self._optimizers['main']
        loss_func = self.loss_func or optimizer.target
        optimizer.update(loss_func, batch)
        
import copy
class PassThroughEvaluator(ch.training.extensions.Evaluator):
    """ An implementation of an evaluator that doesn't assume 
    the downstream model will receive a tuple or dict of chainer variables.
    
    It leaves the preprocessing of a batch of data up to the first component
    of the downstream model. It does NOT use the `converter` argument.
    """
    def evaluate(self):
        """Evaluates the model and returns a result dictionary.

        This method runs the evaluation loop over the validation dataset. It
        accumulates the reported values to :class:`~chainer.DictSummary` and
        returns a dictionary whose values are means computed by the summary.

        Users can override this method to customize the evaluation routine.

        Returns:
            dict: Result dictionary. This dictionary is further reported via
                :func:`~chainer.report` without specifying any observer.

        """
        iterator = self._iterators['main']
        target = self._targets['main']
        eval_func = self.eval_func or target

        if self.eval_hook:
            self.eval_hook(self)
        it = copy.copy(iterator)
        summary = ch.reporter.DictSummary()

        for batch in it:
            observation = {}
            with ch.reporter.report_scope(observation):
                eval_func(batch, volatile='on')
            summary.add(observation)

        return summary.compute_mean()

In [None]:
batch_size = 100
train_iter = ch.iterators.SerialIterator(train_data, batch_size, shuffle=True, repeat=True)
dev_iter = ch.iterators.SerialIterator(dev_data, batch_size, shuffle=False, repeat=False)

In [18]:
updater = PassThroughUpdater(train_iter, optimizer)
trainer = ch.training.Trainer(updater, (10, 'epoch'), out='result')
trainer.extend(PassThroughEvaluator(dev_iter, loss_model))
trainer.extend(ch.training.extensions.LogReport((1,'epoch')))
trainer.extend(ch.training.extensions.PrintReport([
    'epoch', 'main/accuracy', 'main/precision', 'main/recall', 'main/f1', 'main/support',
    'validation/main/accuracy', 'validation/main/precision', 'validation/main/recall', 
    'validation/main/f1', 'validation/main/support']))
trainer.extend(ch.training.extensions.ProgressBar())
trainer.extend(ch.training.extensions.snapshot(
    trigger=ch.training.triggers.MaxValueTrigger('validation/main/f1', (1,'epoch'))
))

In [None]:
trainer.run()

epoch       main/accuracy  main/precision  main/recall  main/f1     main/support  validation/main/accuracy  validation/main/precision  validation/main/recall  validation/main/f1  validation/main/support


  (beta_square * precision + recall)).astype(precision.dtype)


[J     total [..................................................]  0.25%
this epoch [#.................................................]  2.55%
       100 iter, 0 epoch / 10 epochs
       inf iters/sec. Estimated time to finish: 0:00:00.
[4A[J     total [..................................................]  0.51%
this epoch [##................................................]  5.09%
       200 iter, 0 epoch / 10 epochs
   0.10501 iters/sec. Estimated time to finish: 4 days, 7:21:05.315332.
[4A

  precision = tp / relevant


[J     total [..................................................]  0.76%
this epoch [###...............................................]  7.64%
       300 iter, 0 epoch / 10 epochs
   0.10707 iters/sec. Estimated time to finish: 4 days, 5:06:04.479383.
[4A[J     total [..................................................]  1.02%
this epoch [#####.............................................] 10.19%
       400 iter, 0 epoch / 10 epochs
   0.10778 iters/sec. Estimated time to finish: 4 days, 4:10:53.296206.
[4A[J     total [..................................................]  1.27%
this epoch [######............................................] 12.73%
       500 iter, 0 epoch / 10 epochs
   0.10817 iters/sec. Estimated time to finish: 4 days, 3:33:30.513343.
[4A[J     total [..................................................]  1.53%
this epoch [#######...........................................] 15.28%
       600 iter, 0 epoch / 10 epochs
   0.10836 iters/sec. Estimated time to fini

# 


* Compute micro, macro, and class-wise f1s
* Report training and validation metrics
* Report activation histograms
* Report activation variances
* Report gradient histograms
* Report gradient variances
* Checkpoint model every X min

Need to be able to:
* Run experiments but exchange architectures
* Reproduce all experiments
* Analyze results, diagnose optimization and data errors

In [None]:
cat result/log