In [1]:
import pandas as pd
import re
import random
import sys
import re

import nltk
from nltk.tag import StanfordNERTagger
from nltk.tokenize import word_tokenize, sent_tokenize
import itertools
from collections import defaultdict

import numpy as np

import pickle

from __future__ import division

In [2]:
import tensorflow as tf
import tensorlayer as tl
from tensorlayer.layers import *

In [3]:
with open('../models/metadata_punc.pkl', 'rb') as f:
    metadata = pickle.load(f)

In [6]:
xvocab_size = metadata["xvocab_size"]
w2idx = metadata["w2idx"]
idx2w = metadata["idx2w"]
emb_dim = metadata["emb_dim"]

start_id = w2idx["start_id"]
end_id = w2idx["end_id"]

#### Model

In [7]:
def model(encode_seqs, decode_seqs, is_train=True, reuse=False):
    with tf.variable_scope("model", reuse=reuse):
        # for chatbot, you can use the same embedding layer,
        # for translation, you may want to use 2 separated embedding layers
        with tf.variable_scope("embedding") as vs:
            net_encode = EmbeddingInputlayer(
                inputs = encode_seqs,
                vocabulary_size = xvocab_size,
                embedding_size = emb_dim,
                name = 'seq_embedding')
            
            vs.reuse_variables()
            tl.layers.set_name_reuse(True)
            
            net_decode = EmbeddingInputlayer(
                inputs = decode_seqs,
                vocabulary_size = xvocab_size,
                embedding_size = emb_dim,
                name = 'seq_embedding')
            
        net_rnn = Seq2Seq(net_encode, net_decode,
                cell_fn = tf.contrib.rnn.BasicLSTMCell,
                n_hidden = emb_dim,
                initializer = tf.random_uniform_initializer(-0.1, 0.1),
                encode_sequence_length = retrieve_seq_length_op2(encode_seqs),
                decode_sequence_length = retrieve_seq_length_op2(decode_seqs),
                initial_state_encode = None,
                dropout = (0.5 if is_train else None),
                n_layer = 3,
                return_seq_2d = True,
                name = 'seq2seq')
        
        net_out = DenseLayer(net_rnn, n_units=xvocab_size, act=tf.identity, name='output')
    return net_out, net_rnn

In [8]:
# model for training
batch_size = 32

encode_seqs = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="encode_seqs")
decode_seqs = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="decode_seqs")
target_seqs = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="target_seqs")
target_mask = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="target_mask") # tl.prepro.sequences_get_mask()
net_out, _ = model(encode_seqs, decode_seqs, is_train=True, reuse=False)

  [TL] EmbeddingInputlayer model/embedding/seq_embedding: (6004, 1024)
  [TL] EmbeddingInputlayer model/embedding/seq_embedding: (6004, 1024)
  [**] Seq2Seq model/seq2seq: n_hidden:1024 cell_fn:BasicLSTMCell dropout:0.5 n_layer:3
  [TL] DynamicRNNLayer model/seq2seq/seq2seq_encode: n_hidden:1024, in_dim:3 in_shape:(32, ?, 1024) cell_fn:BasicLSTMCell dropout:0.5 n_layer:3
       batch_size (concurrent processes): 32
  [TL] DynamicRNNLayer model/seq2seq/seq2seq_decode: n_hidden:1024, in_dim:3 in_shape:(32, ?, 1024) cell_fn:BasicLSTMCell dropout:0.5 n_layer:3
       batch_size (concurrent processes): 32
  [TL] DenseLayer  model/output: 6004 identity


In [9]:
# model for inferencing
encode_seqs2 = tf.placeholder(dtype=tf.int64, shape=[1, None], name="encode_seqs")
decode_seqs2 = tf.placeholder(dtype=tf.int64, shape=[1, None], name="decode_seqs")
net, net_rnn = model(encode_seqs2, decode_seqs2, is_train=False, reuse=True)
y = tf.nn.softmax(net.outputs)

  [TL] EmbeddingInputlayer model/embedding/seq_embedding: (6004, 1024)
  [TL] EmbeddingInputlayer model/embedding/seq_embedding: (6004, 1024)
  [**] Seq2Seq model/seq2seq: n_hidden:1024 cell_fn:BasicLSTMCell dropout:None n_layer:3
  [TL] DynamicRNNLayer model/seq2seq/seq2seq_encode: n_hidden:1024, in_dim:3 in_shape:(1, ?, 1024) cell_fn:BasicLSTMCell dropout:None n_layer:3
       batch_size (concurrent processes): 1
  [TL] DynamicRNNLayer model/seq2seq/seq2seq_decode: n_hidden:1024, in_dim:3 in_shape:(1, ?, 1024) cell_fn:BasicLSTMCell dropout:None n_layer:3
       batch_size (concurrent processes): 1
  [TL] DenseLayer  model/output: 6004 identity


In [10]:
sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False))
tl.layers.initialize_global_variables(sess)
tl.files.load_and_assign_npz(sess=sess, name='n_withPunc.npz', network=net)

[*] Load n_withPunc.npz SUCCESS!


<tensorlayer.layers.DenseLayer at 0x7f0caeb62890>

In [16]:
 seeds = ["i can not find my luggage", "im stuck in yvr", "i want to rebook my flight", "where is the restroom", "hi"]

In [17]:
for seed in seeds:
    print "Query >" + seed
    seed_id = [w2idx[w] for w in seed.split(" ") if w in w2idx.keys()]
    
    for _ in range(5):  # 1 Query --> 5 Reply
        # 1. encode, get state
        state = sess.run(net_rnn.final_state_encode, {encode_seqs2: [seed_id]})
        # 2. decode, feed start_id, get first word
        #   ref https://github.com/zsdonghao/tensorlayer/blob/master/example/tutorial_ptb_lstm_state_is_tuple.py
        o, state = sess.run([y, net_rnn.final_state_decode], {net_rnn.initial_state_decode: state, 
                                                              decode_seqs2: [[start_id]]})
        w_id = tl.nlp.sample_top(o[0], top_k=3)
        w = idx2w[w_id]
        
        # sort and save probabilities
        probs = []
        probabilities = o[0][w_id]
        
        # this stores the probability of the top word each time
        probs = np.append(probs, probabilities)
        
        # 3. decode, feed state iteratively
        sentence = [w]
        for _ in range(500): # max sentence length
            o, state = sess.run([y, net_rnn.final_state_decode], {net_rnn.initial_state_decode: state,
                                                                  decode_seqs2: [[w_id]]})
            w_id = tl.nlp.sample_top(o[0], top_k=2)
            w = idx2w[w_id]
            
            # sort and save probabilities
            probabilities = np.sort(o[0])
            probabilities = probabilities[::-1]
            
            probs = np.append(probs, probabilities[0:1])
            
            if w_id == end_id:
                break
            sentence = sentence + [w]
        print " >" + ' '.join(sentence)
        print(probs)
    print ""


Query >i can not find my luggage
 >we are sorry to hear that your bag is delayed , have you filed a claim with baggage service ?
[ 0.18993774  0.65783632  0.87407374  0.43616086  0.98127925  0.34152246
  0.31070092  0.56916988  0.4902693   0.93209684  0.6693821   0.17464805
  0.9933477   0.84235668  0.99366623  0.64778697  0.89538014  0.76933485
  0.62730265  0.98704588  0.96256745]
 >please dm us your bag file number . we will take a look .
[ 0.10109677  0.27507511  0.60650462  0.90097743  0.46034697  0.38147968
  0.46390492  0.56463665  0.91043484  0.66198343  0.39469746  0.99943811
  0.93408662  0.60682893  0.98128271]
 >hi unk , we are sorry to hear this . can you dm us your baggage reference , email address and contact number and we can check 1 / 2 for an update for you . thanks . 2 / 2
[ 0.15161397  0.23477286  0.84240943  0.20274828  0.67203593  0.7384606
  0.60005528  0.92334145  0.35418338  0.93596154  0.20327432  0.98452318
  0.50128043  0.89411527  0.83794165  0.79068768  0.