Load model and embedding data.

In [1]:
import theano
import theano.tensor as T
import numpy as np
import cPickle
import random
from utils import *
from state import *
from emb_model import EmbModel

theano.config.floatX='float32'

model_name = 'model/trainemb_emb100_h100'
nat_path = model_name + '_natural_model.npz'
abs_path = model_name + '_abstract_model.npz'
emb_path = 'model/word_emb.npz'

state = simple_state()
embModel = EmbModel(state)
nat_model = embModel.natural_encoder
abs_model = embModel.abstract_encoder

nat_model.load(nat_path)
abs_model.load(abs_path)
W_emb = embModel.W_emb
W_emb.set_value(numpy.load(emb_path)[W_emb.name])

print('Model loaded.')

emb_data = cPickle.load(open('tmp/emb_train.pkl'))
(word2ind, ind2word) = cPickle.load(open('tmp/dict.pkl'))
acttype = cPickle.load(open('tmp/acttype_cnt.pkl')).keys()
print acttype

print('Data loaded.')

print('Embedding examples:')
for (abs_coded, nat_coded, abs_emb, nat_emb) in emb_data[:1]:
    print "abstract act:", abs_translate(abs_coded, acttype)
    print "abstract embeding (first 10 dims):", abs_emb[:10]
    print "natural act:", nat_translate(nat_coded, ind2word)
    print "natural embeding (first 10 dims):", nat_emb[:10]

Model loaded.
[u'requestaddr', u'informpricerange', u'requestarea', u'negate', u'requestpricerange', u'reqmore', u'informname', u'denyfood', u'confirmpricerange', u'requestsignature', u'requestphone', u'informarea', u'reqalts', u'informfood', u'denyname', u'repeat', u'confirmarea', u'requestfood', u'informthis', u'requestname', u'restart', u'affirm', u'bye', u'thankyou', u'ack', u'confirmfood', u'hello', u'requestpostcode']
Data loaded.
Embedding examples:
abstract act: informpricerange informfood
abstract embeding (first 10 dims): [-1.07515895 -1.07764149  1.10352218 -1.07548642  1.10030317 -1.10138834
 -1.1110394  -1.08907247 -1.10309875 -1.10277915]
natural act: <START> moderately priced swedish food <END>
natural embeding (first 10 dims): [-1.18781614 -1.23067629  1.2489723  -1.19667137  1.25797606 -1.23147237
 -1.25716376 -1.21695387 -1.25545728 -1.24380517]


Randomly selects a sentence, and finds its k-NN abstract actions and natural sentences in the embedding space.

In [2]:
def kNN_natural(emb_data, nat_emb, k):
    ind = range(len(emb_data))
    ind.sort(key=lambda x: emb_dist(emb_data[x][3], nat_emb))
    res = ind[:k]
    return res
    
def kNN_abstract(emb_data, abs_emb, k):
    ind = range(len(emb_data))
    ind.sort(key=lambda x: emb_dist(emb_data[x][2], abs_emb))
    res = ind[:k]
    return res
    
n_test = 5
k = 5
for n in range(n_test):
    print 'Test %d:' % n
    r = random.randint(0, len(emb_data)-1)
    print 'natural sentence: %s' % nat_translate(emb_data[r][1], ind2word)
    knn_nat = kNN_natural(emb_data, emb_data[r][3], k)
    print '(Ground truth) abstract action: %s' % abs_translate(emb_data[r][0], acttype)
    print 'kNN natural sentences (k = %d):' % k
    for x in knn_nat:
        print nat_translate(emb_data[x][1], ind2word)
    knn_abs = kNN_abstract(emb_data, emb_data[r][3], k)
    print 'kNN abstract actions (k = %d):' % k
    for x in knn_abs:
        print abs_translate(emb_data[x][0], acttype)
    print

Test 0:
natural sentence: <START> anything else <END>
(Ground truth) abstract action: reqalts
kNN natural sentences (k = 5):
<START> is there anything else <END>
<START> is there anything else <END>
<START> is there anything else <END>
<START> do you have another option anything else <END>
<START> is there anything else <END>
kNN abstract actions (k = 5):
reqalts
requestpricerange
requestpricerange
requestpricerange
requestpricerange

Test 1:
natural sentence: <START> unintelligible <END>
(Ground truth) abstract action: None
kNN natural sentences (k = 5):
<START> noise <END>
<START> hello welcome to the cambridge system <END>
<START> system <END>
<START> unintelligible <END>
<START> unintelligible <END>
kNN abstract actions (k = 5):
requestpostcode
None
None
None
None

Test 2:
natural sentence: <START> cheap <END>
(Ground truth) abstract action: informpricerange
kNN natural sentences (k = 5):
<START> moderate <END>
<START> i dont care about the price range <END>
<START> cheap <END>
<ST

Now we try to mannually provide an input sentence (if a word is out of vocab, we simply remove it).

In [3]:
test_sent = 'i want cheap chinese food at town center'
words = test_sent.split()
nat_coded = [1]
for w in words:
    if w in word2ind:
        nat_coded.append(word2ind[w])
    else:
        print '  out of vocab: %s' % w
m = state['seqlen']
nat_coded_mat = numpy.zeros((m, 1), dtype='int32')
sent_len = len(nat_coded)
nat_coded_mat[:sent_len, 0] = nat_coded
nat_output_fn = theano.function([embModel.natural_input],
                                embModel.nat_output)
nat_emb = nat_output_fn(nat_coded_mat)[0]

print 'The embedding of the given sentence is (first 10 dims):'
print nat_emb[:10]

knn_nat = kNN_natural(emb_data, nat_emb, k)
print 'kNN natural sentences (k = %d):' % k
for x in knn_nat:
    print nat_translate(emb_data[x][1], ind2word)
knn_abs = kNN_abstract(emb_data, nat_emb, k)
print 'kNN abstract actions (k = %d):' % k
for x in knn_abs:
    print abs_translate(emb_data[x][0], acttype)
print

The embedding of the given sentence is (first 10 dims):
[-1.22900498 -1.23601556  1.26215184 -1.23137248  1.25976586 -1.2596302
 -1.27010119 -1.24573755 -1.26133811 -1.26154995]
kNN natural sentences (k = 5):
<START> restaurant in the south part of town that serves mexican food <END>
<START> um im looking for a restaurant in the center part of town that serves lebanese food <END>
<START> i want to find a restaurant in the center and it should serve lebanese food <END>
<START> i need a restaurant serving crossover food in the west part of town <END>
<START> im looking for a restaurant in the north part of town that serves european food <END>
kNN abstract actions (k = 5):
informarea informfood
informarea informfood
informpricerange informfood
informarea informfood
informarea informfood

