In [1]:
import argparse
import pandas as pd

import numpy as np
import torch
from torch.autograd import Variable

import data
import utils
import models
from domain import get_domain
from engines.engine import Criterion


In [2]:
parser = argparse.ArgumentParser(description='testing script')
parser.add_argument('--data', type=str, default='data/negotiate', help='location of the data corpus')
parser.add_argument('--unk_threshold', type=int, default=20, help='minimum word frequency to be in dictionary')
parser.add_argument('--model_file', type=str, help='pretrained model file')
parser.add_argument('--seed', type=int, default=1, help='random seed')
parser.add_argument('--hierarchical', action='store_true', default=False, help='use hierarchical model')
parser.add_argument('--bsz', type=int, default=16, help='batch size')
parser.add_argument('--cuda', action='store_true', default=False, help='use CUDA')
parser.add_argument('--domain', type=str, default='object_division', help='domain for the dialogue')
parser.add_argument('--sep_sel', action='store_true', default=False, help='use separate classifiers for selection')
parser.add_argument('--model_type', type=str, default='rnn_model', help='model type', choices=models.get_model_names())
parser.add_argument('--lr', type=float, default=20.0, help='initial learning rate')
parser.add_argument('--min_lr', type=float, default=1e-5, help='min threshold for learning rate annealing')
parser.add_argument('--decay_rate', type=float,  default=9.0, help='decrease learning rate by this factor')
parser.add_argument('--decay_every', type=int,  default=1, help='decrease learning rate after decay_every epochs')
parser.add_argument('--momentum', type=float, default=0.0, help='momentum for sgd')
parser.add_argument('--clip', type=float, default=0.2, help='gradient clipping')
parser.add_argument('--visual', action='store_true', default=False, help='plot graphs')
parser.add_argument('--sample_file', type=str, help='pretrained model file')



_StoreAction(option_strings=['--sample_file'], dest='sample_file', nargs=None, const=None, default=None, type=<class 'str'>, choices=None, help='pretrained model file', metavar=None)

In [3]:
args = parser.parse_args(['--cuda', '--model_file=selection_model.th', '--bsz=1', '--domain=object_division', '--sep_sel', '--model_type=selection_model', '--lr=0.001'])

In [4]:
device_id = utils.use_cuda(args.cuda)
utils.set_seed(args.seed)

domain = get_domain(args.domain)
model_ty = models.get_model_type(args.model_type)

corpus = model_ty.corpus_ty(domain, args.data, freq_cutoff=args.unk_threshold,
        verbose=True, sep_sel=args.sep_sel)

model = utils.load_model(args.model_file)

crit = Criterion(model.word_dict, device_id=device_id)
sel_crit = Criterion(model.item_dict, device_id=device_id,
        bad_toks=['<disconnect>', '<disagree>'])

engine = model_ty.engine_ty(model, args, verbose=True)
test_loss, test_select_loss, extra = engine.test(corpus)

    #testset, testset_stats = corpus.test_dataset(args.bsz)
    #test_loss, test_select_loss = 0, 0

    #N = len(corpus.word_dict)
    #for batch in testset:
        # run forward on the batch, produces output, hidden, target,
        # selection output and selection target
        #out, hid, tgt, sel_out, sel_tgt = engine.forward(model, batch)
        #engine.valid_batch(batch)
        # compute LM and selection losses
        #test_loss += tgt.size(0) * crit(out.view(-1, N), tgt).data[0]
        #test_select_loss += sel_crit(sel_out, sel_tgt).data[0]

    #test_loss /= testset_stats['nonpadn']
    #test_select_loss /= len(testset)
print('testloss %.3f | testppl %.3f' % (test_loss, np.exp(test_loss)))
print('testselectloss %.3f | testselectppl %.3f' % (test_select_loss, np.exp(test_select_loss)))
print()
    #print('extra: ', extra)

dataset data/negotiate/train.txt, total 687919, unks 8718, ratio 1.27%
sample input:  [1, 0, 0, 5, 1, 0]
sample input i2w:  ['2', '1', '1', '6', '2', '1']
sample words:  ['YOU:', "i'd", 'like', 'the', 'hat', 'and', '1', 'ball', '.', '<eos>', 'THEM:', 'i', 'need', 'both', 'balls', '<eos>', 'YOU:', 'ok', '.', '<eos>', 'THEM:', '<selection>']
Sample output:  ['item0=2', 'item1=1', 'item2=0', 'item0=0', 'item1=0', 'item2=2']
dataset data/negotiate/val.txt, total 74653, unks 914, ratio 1.22%
sample input:  [0, 9, 2, 3, 0, 3]
sample input i2w:  ['1', '10', '3', '0', '1', '0']
sample words:  ['YOU:', 'i', 'need', 'the', 'book', 'and', 'two', 'hats', '<eos>', 'THEM:', 'i', 'get', 'the', 'ball', 'and', '1', 'hat', '<eos>', 'YOU:', 'actually', 'i', 'just', 'need', 'the', 'book', ',', 'so', 'you', 'can', 'have', 'the', 'rest', 'of', 'it', '<eos>', 'THEM:', 'you', 'get', 'book', 'since', 'its', 'worth', '10', 'to', 'you', 'i', 'get', 'the', 'rest', '.', 'deal', '<eos>', 'YOU:', '<selection>']
Samp

In [5]:
extra['output'][0].shape

torch.Size([6, 18])

In [6]:
def input_map(batch, sep_sel=True):
    ctx, _, inpts, lens, _, sel_tgt, rev_idxs, hid_idxs, _ = batch
    ctx = Variable(ctx)
    inpts = [Variable(inpt) for inpt in inpts]
    rev_idxs = [Variable(idx) for idx in rev_idxs]
    hid_idxs = [Variable(idx) for idx in hid_idxs]
    if sep_sel:
        sel_tgt = Variable(sel_tgt)
    else:
        sel_tgt = [Variable(t) for t in sel_tgt]

        # remove YOU:/THEM: from the end
    return(inpts[:-1], lens[:-1], rev_idxs[:-1], hid_idxs[:-1], ctx, sel_tgt)

In [10]:
sentences = []
target = []
output = []
for i in range(len(extra['input'])):
    in1, in2, in3, in4, in5, tgt = input_map(extra['input'][i])
    out1 = extra['output'][i] 
    dialog = ''
    for i in in1:
        dialog = dialog + ' '.join(corpus.word_dict.i2w(i)) + ' '
    sentences.append(dialog)
    target.append(corpus.item_dict_old.i2w(tgt))
    output.append(corpus.item_dict_old.i2w(torch.argmax(out1, dim=1)))

In [13]:
pd.DataFrame({'sentences': sentences, 'targets':target, 'outputs':output})

Unnamed: 0,sentences,targets,outputs
0,THEM: if i can have the ball and two books you...,"[item0=3, item1=1, item2=0, item0=0, item1=0, ...","[item0=3, item1=1, item2=0, item0=0, item1=0, ..."
1,THEM: i would like the hat and one book <eos> ...,"[<no_agreement>, <no_agreement>, <no_agreement...","[<no_agreement>, <no_agreement>, <no_agreement..."
2,"THEM: gimme hat , the rest is yours ! <eos> YO...","[item0=2, item1=0, item2=3, item0=0, item1=1, ...","[item0=2, item1=0, item2=3, item0=0, item1=1, ..."
3,"YOU: i'd like the book , one hat , and one bal...","[item0=1, item1=0, item2=3, item0=0, item1=2, ...","[item0=0, item1=2, item2=0, item0=1, item1=0, ..."
4,"THEM: i just want the ball , you can have the ...","[<no_agreement>, <no_agreement>, <no_agreement...","[<no_agreement>, <no_agreement>, <no_agreement..."
...,...,...,...
1047,YOU: hi i would like the books and balls and y...,"[item0=1, item1=0, item2=3, item0=1, item1=2, ...","[item0=1, item1=2, item2=0, item0=1, item1=0, ..."
1048,THEM: how about i take the books and you can h...,"[item0=1, item1=0, item2=2, item0=1, item1=2, ...","[item0=1, item1=0, item2=2, item0=1, item1=2, ..."
1049,"YOU: i'd like to keep two books , you can have...","[<disagree>, <disagree>, <disagree>, <disagree...","[item0=3, item1=0, item2=0, item0=0, item1=2, ..."
1050,YOU: could i have two books and a ball please ...,"[item0=1, item1=1, item2=1, item0=1, item1=1, ...","[item0=1, item1=0, item2=1, item0=1, item1=2, ..."


In [None]:
for i in in1:
    print(corpus.word_dict.i2w(i))

In [None]:
in4

In [None]:
corpus.word_dict.i2w(in1[0][:,0])

In [None]:
corpus.context_dict.i2w(in5[:,0])

In [None]:
corpus.word_dict.i2w(in1[0][:,2])

In [None]:
corpus.item_dict_old.i2w(in5[:,0])

In [None]:
#             batches.append((ctx, partner_ctx, inpts, lens, tgts, sel_tgt, rev_idxs, hid_idxs, cnt))

In [None]:
def make_readable(corpus, batch):
    

In [None]:
type(corpus)