In [42]:
import argparse
import pandas as pd

import numpy as np
import torch
from torch.autograd import Variable

import data
import utils
import models
from domain import get_domain
from engines.engine import Criterion


In [100]:
parser = argparse.ArgumentParser(description='testing script')
parser.add_argument('--data', type=str, default='data/negotiate',
    help='location of the data corpus')
parser.add_argument('--nembed_word', type=int, default=256,
    help='size of word embeddings')
parser.add_argument('--nembed_ctx', type=int, default=64,
    help='size of context embeddings')
parser.add_argument('--nhid_lang', type=int, default=256,
    help='size of the hidden state for the language module')
parser.add_argument('--nhid_cluster', type=int, default=256,
    help='size of the hidden state for the language module')
parser.add_argument('--nhid_ctx', type=int, default=64,
    help='size of the hidden state for the context module')
parser.add_argument('--nhid_strat', type=int, default=64,
    help='size of the hidden state for the strategy module')
parser.add_argument('--nhid_attn', type=int, default=64,
    help='size of the hidden state for the attention module')
parser.add_argument('--nhid_sel', type=int, default=64,
    help='size of the hidden state for the selection module')
parser.add_argument('--lr', type=float, default=20.0,
    help='initial learning rate')
parser.add_argument('--min_lr', type=float, default=1e-5,
    help='min threshold for learning rate annealing')
parser.add_argument('--decay_rate', type=float,  default=9.0,
    help='decrease learning rate by this factor')
parser.add_argument('--decay_every', type=int,  default=1,
    help='decrease learning rate after decay_every epochs')
parser.add_argument('--momentum', type=float, default=0.0,
    help='momentum for sgd')
parser.add_argument('--clip', type=float, default=0.2,
    help='gradient clipping')
parser.add_argument('--dropout', type=float, default=0.5,
    help='dropout rate in embedding layer')
parser.add_argument('--init_range', type=float, default=0.1,
    help='initialization range')
parser.add_argument('--max_epoch', type=int, default=30,
    help='max number of epochs')
parser.add_argument('--num_clusters', type=int, default=50,
    help='number of clusters')
parser.add_argument('--bsz', type=int, default=25,
    help='batch size')
parser.add_argument('--unk_threshold', type=int, default=20,
    help='minimum word frequency to be in dictionary')
parser.add_argument('--temperature', type=float, default=0.1,
    help='temperature')
parser.add_argument('--partner_ctx_weight', type=float, default=0.0,
    help='selection weight')
parser.add_argument('--sel_weight', type=float, default=0.6,
    help='selection weight')
parser.add_argument('--seed', type=int, default=1,
    help='random seed')
parser.add_argument('--cuda', action='store_true', default=False,
    help='use CUDA')
parser.add_argument('--model_file', type=str,  default='',
    help='path to save the final model')
parser.add_argument('--prediction_model_file', type=str,  default='',
    help='path to save the prediction model')
parser.add_argument('--selection_model_file', type=str,  default='',
    help='path to save the selection model')
parser.add_argument('--cluster_model_file', type=str,  default='',
    help='path to save the cluster model')
parser.add_argument('--lang_model_file', type=str,  default='',
    help='path to save the language model')
parser.add_argument('--visual', action='store_true', default=False,
    help='plot graphs')
parser.add_argument('--skip_values', action='store_true', default=False,
    help='skip values in ctx encoder')
parser.add_argument('--model_type', type=str, default='rnn_model',
    help='model type', choices=models.get_model_names())
parser.add_argument('--domain', type=str, default='object_division',
    help='domain for the dialogue')
parser.add_argument('--clustering', action='store_true', default=False,
    help='use clustering')
parser.add_argument('--sep_sel', action='store_true', default=False,
    help='use separate classifiers for selection')



_StoreTrueAction(option_strings=['--sep_sel'], dest='sep_sel', nargs=0, const=True, default=False, type=None, choices=None, help='use separate classifiers for selection', metavar=None)

In [101]:
#'--cuda',
args = parser.parse_args([ '--cuda', 
'--selection_model_file=selection_model.th',
'--lang_model_file=clustering_language_model.th',
'--model_file=full_model.th',
'--bsz=1', 
'--domain=object_division', 
'--sep_sel', 
'--model_type=latent_clustering_prediction_model', 
'--lr=0.001', "--nembed_ctx=64",  '--selection_model_file=selection_model.th'])

In [102]:
device_id = utils.use_cuda(args.cuda)
utils.set_seed(args.seed)

domain = get_domain(args.domain)
model_ty = models.get_model_type(args.model_type)

corpus = model_ty.corpus_ty(domain, args.data, freq_cutoff=args.unk_threshold,
        verbose=True, sep_sel=args.sep_sel)






dataset data/negotiate/train.txt, total 687919, unks 8718, ratio 1.27%
sample input i2w:  ['2', '1', '1', '6', '2', '1']
sample words:  ['YOU:', "i'd", 'like', 'the', 'hat', 'and', '1', 'ball', '.', '<eos>', 'THEM:', 'i', 'need', 'both', 'balls', '<eos>', 'YOU:', 'ok', '.', '<eos>', 'THEM:', '<selection>']
Sample output:  ['item0=2', 'item1=1', 'item2=0', 'item0=0', 'item1=0', 'item2=2']
dataset data/negotiate/val.txt, total 74653, unks 914, ratio 1.22%
sample input i2w:  ['1', '10', '3', '0', '1', '0']
sample words:  ['YOU:', 'i', 'need', 'the', 'book', 'and', 'two', 'hats', '<eos>', 'THEM:', 'i', 'get', 'the', 'ball', 'and', '1', 'hat', '<eos>', 'YOU:', 'actually', 'i', 'just', 'need', 'the', 'book', ',', 'so', 'you', 'can', 'have', 'the', 'rest', 'of', 'it', '<eos>', 'THEM:', 'you', 'get', 'book', 'since', 'its', 'worth', '10', 'to', 'you', 'i', 'get', 'the', 'rest', '.', 'deal', '<eos>', 'YOU:', '<selection>']
Sample output:  ['item0=1', 'item1=0', 'item2=0', 'item0=0', 'item1=3', 

# make a new model

In [103]:
model = model_ty(corpus.word_dict, corpus.item_dict_old, corpus.context_dict, corpus.count_dict, args)

# load a model

In [104]:
model = utils.load_model(args.model_file)

In [47]:
engine = model_ty.engine_ty(model, args, verbose=True)
testset, testset_stats = corpus.test_dataset(args.bsz)
batch=testset[0]

In [88]:
ctx, _, inpts, lens, tgts, sel_tgt, rev_idxs, hid_idxs, cnt = testset[2]
ctx = Variable(ctx)
cnt = Variable(cnt)
inpts = [Variable(inpt) for inpt in inpts]
tgts = [Variable(tgt) for tgt in tgts]
rev_idxs = [Variable(idx) for idx in rev_idxs]
hid_idxs = [Variable(idx) for idx in hid_idxs]
sel_tgt_probs = engine._make_sel_tgt_probs(inpts, lens, rev_idxs, hid_idxs, ctx)
sel_tgt = Variable(sel_tgt)

inpts, tgts, sel_tgt_probs, lens, rev_idxs, hid_idxs = engine._append_pad(
    inpts, tgts, sel_tgt_probs, lens, rev_idxs, hid_idxs)

#outs, sel_outs, z_probs, z_tgts, stats = engine.model(
#    inpts, tgts, sel_tgt_probs, hid_idxs, ctx, cnt)
sample_in = [[inpts[0:2]], [tgts[0:2]], [sel_tgt_probs[0:2]], [hid_idxs[0:2]], ctx, cnt]

In [105]:
from torchinfo import summary
summary(model, verbose=1)


Layer (type:depth-idx)                                  Param #
LatentClusteringPredictionModel                         --
├─LatentClusteringLanguageModel: 1-1                    --
│    └─LatentClusteringModel: 2-1                       --
│    │    └─MlpContextEncoder: 3-1                      13,760
│    │    └─Embedding: 3-2                              118,528
│    │    └─Sequential: 3-3                             65,792
│    │    └─Linear: 3-4                                 65,792
│    │    └─GRU: 3-5                                    394,752
│    │    └─Sequential: 3-6                             147,712
│    │    └─GRU: 3-7                                    394,752
│    │    └─GRUCell: 3-8                                394,752
│    │    └─ShardedLatentBottleneckModule: 3-9          718,200
│    │    └─GRUCell: 3-10                               394,752
│    │    └─Dropout: 3-11                               --
│    │    └─SimpleSeparateSelectionModule: 3-12         46,828


Layer (type:depth-idx)                                  Param #
LatentClusteringPredictionModel                         --
├─LatentClusteringLanguageModel: 1-1                    --
│    └─LatentClusteringModel: 2-1                       --
│    │    └─MlpContextEncoder: 3-1                      13,760
│    │    └─Embedding: 3-2                              118,528
│    │    └─Sequential: 3-3                             65,792
│    │    └─Linear: 3-4                                 65,792
│    │    └─GRU: 3-5                                    394,752
│    │    └─Sequential: 3-6                             147,712
│    │    └─GRU: 3-7                                    394,752
│    │    └─GRUCell: 3-8                                394,752
│    │    └─ShardedLatentBottleneckModule: 3-9          718,200
│    │    └─GRUCell: 3-10                               394,752
│    │    └─Dropout: 3-11                               --
│    │    └─SimpleSeparateSelectionModule: 3-12         46,828


In [91]:
summary(model, input_data=sample_in, batch_dim=1, col_names=['output_size', 'num_params'], verbose=1)

Layer (type:depth-idx)                        Output Shape              Param #
LatentClusteringModel                         --                        --
├─SimpleSeparateSelectionModule: 1-12         --                        --
│    └─ModuleList: 2-1                        --                        --
├─MlpContextEncoder: 1-1                      [1, 1]                    --
│    └─Sequential: 2-2                        [3, 1, 64]                --
│    │    └─Embedding: 3-1                    [3, 1, 64]                704
│    │    └─Dropout: 3-2                      [3, 1, 64]                --
│    └─Sequential: 2-3                        [3, 1, 64]                --
│    │    └─Embedding: 3-3                    [3, 1, 64]                704
│    │    └─Dropout: 3-4                      [3, 1, 64]                --
│    └─Sequential: 2-4                        [1, 1]                    --
│    │    └─Linear: 3-5                       [1, 1]                    12,352
├─Embedding: 1

Layer (type:depth-idx)                        Output Shape              Param #
LatentClusteringModel                         --                        --
├─SimpleSeparateSelectionModule: 1-12         --                        --
│    └─ModuleList: 2-1                        --                        --
├─MlpContextEncoder: 1-1                      [1, 1]                    --
│    └─Sequential: 2-2                        [3, 1, 64]                --
│    │    └─Embedding: 3-1                    [3, 1, 64]                704
│    │    └─Dropout: 3-2                      [3, 1, 64]                --
│    └─Sequential: 2-3                        [3, 1, 64]                --
│    │    └─Embedding: 3-3                    [3, 1, 64]                704
│    │    └─Dropout: 3-4                      [3, 1, 64]                --
│    └─Sequential: 2-4                        [1, 1]                    --
│    │    └─Linear: 3-5                       [1, 1]                    12,352
├─Embedding: 1

In [110]:
corpus.context_dict.idx2word


['1', '2', '3', '0', '4', '6', '5', '7', '8', '10', '9']

In [None]:
crit = Criterion(model.word_dict, device_id=device_id)
sel_crit = Criterion(model.item_dict, device_id=device_id,
        bad_toks=['<disconnect>', '<disagree>'])

engine = model_ty.engine_ty(model, args, verbose=True)



testset, testset_stats = corpus.test_dataset(args.bsz)

total_valid_loss, total_select_loss, total_partner_ctx_loss, extra = engine.test_pass(testset, testset_stats)


In [61]:

for inp, hid in zip(inpts, hid_idxs):
    hids = ''
    dialog = ''
    for i in inp:
        dialog = dialog + ' '.join(corpus.word_dict.i2w(i)) + ' '
    print(hid)
    print(dialog)

tensor([[[15]]])
THEM: if i can have the ball and two books you can have the rest <eos> 
tensor([[[9]]])
YOU: i keep the ball you get <unk> else <eos> 
tensor([[[22]]])
THEM: i can not make that deal . i need the ball as well . are you willing to make another deal <eos> 
tensor([[[9]]])
YOU: you get the ball i get everything else <eos> 
tensor([[[4]]])
THEM: okay deal . <eos> 
tensor([[[1]]])
YOU: <selection> 
tensor([[[0]]])
<pad> 


In [None]:
    def _forward(self, batch):
        ctx, _, inpts, lens, tgts, sel_tgt, rev_idxs, hid_idxs, cnt = batch
        ctx = Variable(ctx)
        cnt = Variable(cnt)
        inpts = [Variable(inpt) for inpt in inpts]
        tgts = [Variable(tgt) for tgt in tgts]
        rev_idxs = [Variable(idx) for idx in rev_idxs]
        hid_idxs = [Variable(idx) for idx in hid_idxs]

        losses, stats = self.model.forward(inpts, tgts, hid_idxs, ctx, cnt)

        return losses, stats, lens

In [None]:
extra['output'][0].shape

In [None]:
def input_map(batch, sep_sel=True):
    ctx, _, inpts, lens, _, sel_tgt, rev_idxs, hid_idxs, _ = batch
    ctx = Variable(ctx)
    inpts = [Variable(inpt) for inpt in inpts]
    rev_idxs = [Variable(idx) for idx in rev_idxs]
    hid_idxs = [Variable(idx) for idx in hid_idxs]
    if sep_sel:
        sel_tgt = Variable(sel_tgt)
    else:
        sel_tgt = [Variable(t) for t in sel_tgt]

        # remove YOU:/THEM: from the end
    return(inpts[:-1], lens[:-1], rev_idxs[:-1], hid_idxs[:-1], ctx, sel_tgt)

In [None]:
sentences = []
target = []
output = []
for i in range(len(extra['input'])):
    in1, in2, in3, in4, in5, tgt = input_map(extra['input'][i])
    out1 = extra['output'][i] 
    dialog = ''
    for i in in1:
        dialog = dialog + ' '.join(corpus.word_dict.i2w(i)) + ' '
    sentences.append(dialog)
    target.append(corpus.item_dict_old.i2w(tgt))
    output.append(corpus.item_dict_old.i2w(torch.argmax(out1, dim=1)))

In [None]:
df = pd.DataFrame({'sentences': sentences, 'targets':target, 'outputs':output})
df

In [None]:
(df['targets'] == df['outputs']).mean()

In [None]:
for i in in1:
    print(corpus.word_dict.i2w(i))

In [None]:
in4

In [None]:
corpus.word_dict.i2w(in1[0][:,0])

In [None]:
corpus.context_dict.i2w(in5[:,0])

In [None]:
corpus.word_dict.i2w(in1[0][:,2])

In [None]:
corpus.item_dict_old.i2w(in5[:,0])

In [None]:
#             batches.append((ctx, partner_ctx, inpts, lens, tgts, sel_tgt, rev_idxs, hid_idxs, cnt))

In [None]:
def make_readable(corpus, batch):
    

In [None]:
type(corpus)