Load model and embedding data.

In [3]:
import theano
import theano.tensor as T
import numpy as np
import cPickle
import random
from utils import *
from state import *
from title_model import TitleModel
from whenst_hour_model import WhenstHourModel

theano.config.floatX='float32'

title_model_name = 'model/title_emb256_h256_f32_tok10_model.npz'

title_state = title_state()
title_model = TitleModel(title_state, test_mode=True)
title_model.load(title_model_name)

whenst_hour_model_name = 'model/whenst_hour_emb256_h256_f32_model.npz'

whenst_hour_state = whenst_hour_state()
whenst_hour_model = WhenstHourModel(whenst_hour_state, test_mode=True)
whenst_hour_model.load(whenst_hour_model_name)

(ind2word, word2ind, _, _, _) = cPickle.load(open('data/dict.pkl'))

print('Data loaded.')

Data loaded.


Now we try to mannually provide an input sentence (if a word is out of vocab, we simply remove it).

In [7]:
def restoreW(ind_lst, ind2word, tmp_map):
    res = []
    for ind in ind_lst:
        if ind in tmp_map:
            res.append(tmp_map[ind])
        else:
            res.append(ind2word[ind])
    return ' '.join(res)
    

test_sents = ['i would like to go hiking at three pm tomorrow', 'i want to sleep all day', 'i want to play video games', \
              'i want to visit my dental', 'i will wash my car this afternoon', 'i want to play chess this afternoon']

for (k, test_sent) in enumerate(test_sents):
    print "Test sent:", test_sent
    
    # Process title
    words = test_sent.split()
    nat_coded = [1]
    tmp_map = {}
    tok_set = range(10)
    for w in words:
        if w in word2ind:
            nat_coded.append(word2ind[w])
        else:
            tok_ind = random.choice(tok_set)
            tok_s = '<TOK%d>' % tok_ind
            tok_set.remove(tok_ind)
            nat_coded.append(word2ind[tok_s])
            tmp_map[word2ind[tok_s]] = w
            print '  out of vocab: %s, replaced with %s' % (w, tok_s)
    nat_coded.append(0)
    print 'Coded input:', nat_coded
    print restoreW(nat_coded, ind2word, tmp_map)

    m = title_state['seq_len_in']
    nat_coded_mat = numpy.zeros((m, 2), dtype='int32')
    nat_mask = numpy.zeros((m, 2), dtype='float32')
    sent_len = len(nat_coded)
    nat_coded_mat[:sent_len, 0] = nat_coded
    nat_mask[:sent_len, 0] = 1
    nat_coded_mat[:sent_len, 1] = nat_coded
    nat_mask[:sent_len, 1] = 1
    pred_fn = title_model.build_gen_function()
    
    res = [1]
    abs_in = 1
    title_model.gen_reset()
    while True:
        abs_in_mat = np.zeros((2, ), dtype='int32') + abs_in
        #print 'abs_in', abs_in_mat
        [p_t, o_t, alpha_t] = pred_fn(nat_coded_mat, nat_mask, abs_in_mat)
        #print "ot", o_t, ind2word[o_t[0]]
        pt_col = p_t[0]
        alpha_t = alpha_t[:, 0]
        #print alpha_t
        alpha_s = alpha_t.argsort()[::-1]
        #print sum(pt_col)
        pt_norm = [1.0 * a / sum(pt_col) for a in pt_col]
        #print pt_norm
        ind = np.asarray(pt_norm).argmax()
        abs_in = ind
        res.append(ind)
        if ind == 0 or len(res) > 10:        
            break
        
        #print 'Explanation of: %s' % restoreW([ind], ind2word, tmp_map)
        #for k in alpha_s[:len(nat_coded)]:
        #    print "    %s: %.4f" % (restoreW([nat_coded[k]], ind2word, tmp_map), alpha_t[k])
    print
    print restoreW(res, ind2word, tmp_map)
    print
    
    # Whenst hour
    whenst_hour_pred = theano.function([whenst_hour_model.x_data, whenst_hour_model.xmask], \
                                      whenst_hour_model.ot)
    whenst_hour_ot = whenst_hour_pred(nat_coded_mat, nat_mask)
    print whenst_hour_ot

Test sent: i would like to go hiking at three pm tomorrow
  out of vocab: hiking, replaced with <TOK8>
  out of vocab: tomorrow, replaced with <TOK5>
Coded input: [1, 2227, 109, 2268, 1186, 1800, 10, 1139, 404, 1578, 7, 0]
<START> i would like to go hiking at three pm tomorrow <END>

<START> go hiking <END>

[15 15]
Test sent: i want to sleep all day
  out of vocab: sleep, replaced with <TOK5>
Coded input: [1, 2227, 152, 1186, 7, 1587, 1630, 0]
<START> i want to sleep all day <END>

<START> sleep out <END>

[24 24]
Test sent: i want to play video games
  out of vocab: play, replaced with <TOK3>
  out of vocab: video, replaced with <TOK2>
  out of vocab: games, replaced with <TOK6>
Coded input: [1, 2227, 152, 1186, 5, 4, 8, 0]
<START> i want to play video games <END>

<START> video play <END>

[24 24]
Test sent: i want to visit my dental
Coded input: [1, 2227, 152, 1186, 689, 137, 534, 0]
<START> i want to visit my dental <END>

<START> visit my dental <END>

[24 24]
Test sent: i will w