In [1]:
import daily_dialogue
import glob
import numpy as np
import pickle
from tqdm import tqdm
import tensorflow as tf
import tensorflow.contrib.legacy_seq2seq as seq2seq
import random
import json
import os
import time
from IPython.display import HTML
from time import gmtime, strftime
from nltk.translate.bleu_score import sentence_bleu

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

  from ._conv import register_converters as _register_converters


In [2]:
data_X, data_Y, max_dialogue_len = daily_dialogue.create_dataset()
print (len(data_X))
print (data_X[1])
print (data_Y[1])

150946
['of', 'course', ',', 'this', 'is', 'my', 'first', 'time', 'here', '.', 'my', 'mom', 'has', 'a', 'membership', 'here', '.', 'here', 'is', 'the', 'card', '.', '<EOS>']
['<START>', 'okay', 'then', '.', 'you', 'are', 'the', 'younger', 'customer', 'here', 'ever', '.', '<EOS>']


In [3]:
def get_unique_words(data_X, data_Y):
    unique_words = []

    for index in range(len(data_X)):
        unique_words.extend(set(data_X[index] + data_Y[index]))

    unique_words = list(set(unique_words))
    unique_words = ["<PAD>"] + unique_words

    return unique_words

In [4]:
def build_vocabs(unique_words):
    word2idx = {value:index for index, value in enumerate(unique_words)}
    idx2word = {index:value for index, value in enumerate(unique_words)}
    
    return word2idx, idx2word, len(word2idx)

In [5]:
# unique_words = get_unique_words(data_X, data_Y)
# with open("aug_unique_words.p", "wb") as pickle_d:
#     pickle.dump(unique_words, pickle_d)

In [6]:
unique_words = pickle.load(open('aug_unique_words.p', 'rb'))
word2idx, idx2word, vocab_size = build_vocabs(unique_words)
print (vocab_size, word2idx["<START>"], word2idx["<PAD>"], word2idx["<EOS>"], word2idx["<unk>"])

24043 906 0 11676 17782


In [7]:
train_X, train_Y = data_X[:145000], data_Y[:145000]
test_X, test_Y = data_X[145000:], data_Y[145000:]

In [8]:
def pad_sequences(sequences, lengths, batch_size):
    max_len = max(lengths)
    for i in range (batch_size):
        diff = max_len - lengths[i]
        sequences[i] += [word2idx["<PAD>"]] * diff

    return np.asarray(sequences)

In [9]:
def batch_data(X, Y, batch_size):
    start = 0
    while start + batch_size <= len(X):
        enc_batch_input = list()
        dec_batch_input = list()
        dec_batch_target = list()
        enc_inp_lens = list()
        dec_inp_lens = list()
        target_w = list()
        for index in range(start, start + batch_size):
            batch_index = index - start

            enc_batch_input.append([])
            for word_index, word in enumerate(X[index]):
                enc_batch_input[-1].append(word2idx[word])
            enc_inp_lens.append(len(enc_batch_input[-1]))

            dec_batch_input.append([])
            for word_index, word in enumerate(Y[index][:-1]):
                dec_batch_input[-1].append(word2idx[word])
            dec_inp_lens.append(len(dec_batch_input[-1]))

            dec_batch_target.append([])
            for word_index, word in enumerate(Y[index][1:]):
                dec_batch_target[-1].append(word2idx[word])
        
        for batch_i in range(batch_size):
            pad = [1] * dec_inp_lens[batch_i]
            diff = max(dec_inp_lens) - dec_inp_lens[batch_i]
            pad.extend([0] * diff)
            target_w.append(pad)

        enc_batch_input = pad_sequences(enc_batch_input, enc_inp_lens, batch_size)
        dec_batch_input = pad_sequences(dec_batch_input, dec_inp_lens, batch_size)
        dec_batch_target = pad_sequences(dec_batch_target, dec_inp_lens, batch_size)

        enc_inp_lens = np.asarray(enc_inp_lens)
        dec_inp_lens = np.asarray(dec_inp_lens)
        target_w = np.asarray(target_w)

        yield enc_batch_input, dec_batch_input, dec_batch_target, enc_inp_lens, dec_inp_lens, target_w
        
        start += batch_size

In [10]:
input_num_units = 128
decoder_num_units = 256

assert input_num_units*2 == decoder_num_units

keep_prob = 0.75
embedding_size = 300

In [11]:
# dont run for demo
tf.reset_default_graph()
sess = tf.InteractiveSession(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=True))

encoder_inputs = tf.placeholder(tf.int32, [None, None], 'encoder_inputs')
decoder_inputs = tf.placeholder(tf.int32, [None, None], 'decoder_inputs')
decoder_targets = tf.placeholder(tf.int32, [None, None], 'decoder_targets')
encoder_lengths = tf.placeholder(tf.int32, [None], 'encoder_lengths')
decoder_lengths = tf.placeholder(tf.int32, [None], 'decoder_lengths')
target_weights = tf.placeholder(tf.float32, [None, None], 'target_weights')
learning_rate = tf.placeholder(tf.float32, [], 'learning_rate')
batch_size = tf.placeholder(tf.int32, [], 'batch_size')

# Embedding
with tf.variable_scope("embeddings"):
    embedding_encoder = tf.get_variable(
        "embedding_encoder", [vocab_size, embedding_size])

    encoder_emb_inp = tf.nn.embedding_lookup(
        embedding_encoder, encoder_inputs)

    decoder_emb_inp = tf.nn.embedding_lookup(
        embedding_encoder, decoder_inputs)

# Encoder (dynamic, bi-directional network)
with tf.variable_scope('encoder_lstm'):
    enc_fw_cell = tf.nn.rnn_cell.BasicLSTMCell(
        input_num_units, state_is_tuple=True, name="enc_fw")
    
    enc_bw_cell = tf.nn.rnn_cell.BasicLSTMCell(
        input_num_units, state_is_tuple=True, name="enc_bw")
    
    enc_fw_cell = tf.contrib.rnn.DropoutWrapper(
        enc_fw_cell, input_keep_prob = keep_prob)
    
    enc_bw_cell = tf.contrib.rnn.DropoutWrapper(
        enc_bw_cell, input_keep_prob = keep_prob)

    enc_bi_outputs, encoder_state = tf.nn.bidirectional_dynamic_rnn(enc_fw_cell, enc_bw_cell, \
                                                                encoder_emb_inp, \
                                                                sequence_length=encoder_lengths, \
                                                                time_major=False, dtype=tf.float32)
    encoder_outputs = tf.concat(enc_bi_outputs, -1)

# Decoder with attention mechanism
with tf.variable_scope('decoder_lstm'):
    total_c_state = tf.concat(axis=1,values=[encoder_state[0].c, encoder_state[1].c])
    total_h_state = tf.concat(axis=1,values=[encoder_state[0].h, encoder_state[1].h])

    total_state = tf.contrib.rnn.LSTMStateTuple(total_c_state, total_h_state)

    decoder_cell = tf.nn.rnn_cell.BasicLSTMCell(decoder_num_units, state_is_tuple=True, name="dec_lstm")
    
    decoder_cell = tf.contrib.rnn.DropoutWrapper(
        decoder_cell, input_keep_prob = keep_prob)

    projection_layer = tf.layers.Dense(
        vocab_size, use_bias=False)

    attention_states = encoder_outputs

    attention_mechanism = tf.contrib.seq2seq.LuongAttention(
        decoder_num_units, attention_states,
        memory_sequence_length=decoder_lengths)
    
    decoder_cell = tf.contrib.seq2seq.AttentionWrapper(
        decoder_cell, attention_mechanism,
        attention_layer_size=decoder_num_units)
    
    initial_state = decoder_cell.zero_state(dtype = tf.float32, batch_size=batch_size)
    initial_state = initial_state.clone(cell_state=total_state)

    helper = tf.contrib.seq2seq.TrainingHelper(
        decoder_emb_inp, decoder_lengths)

    decoder = tf.contrib.seq2seq.BasicDecoder(
        decoder_cell, helper, initial_state,
        output_layer=projection_layer)

    decoder_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(decoder)

    logits = decoder_outputs.rnn_output

with tf.variable_scope('loss'):
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
        labels=decoder_targets, logits=logits)

    train_loss = tf.reduce_sum(loss * target_weights)

with tf.variable_scope('optimization'):
    max_gradient_norm = 1
    params = tf.trainable_variables()
    gradients = tf.gradients(train_loss, params)
    clipped_gradients, _ = tf.clip_by_global_norm(
        gradients, max_gradient_norm)

    optimizer = tf.train.AdamOptimizer(learning_rate)
    update_step = optimizer.apply_gradients(
        zip(clipped_gradients, params))

saver = tf.train.Saver(max_to_keep=3)

In [12]:
# dont run for demo
sess.run(tf.global_variables_initializer())

In [13]:
# dont run for demo
num_epochs = 0

In [15]:
# dont run for demo
# Training
epochs = 20
bs = 50
for epoch_i in range(epochs):
    if epoch_i < 3:
        lr = 0.01
    elif epoch_i >= 3 and epoch_i < 6:
        lr = 0.005
    elif epoch_i >= 7 and epoch_i < 10:
        lr = 0.001
    elif epoch_i >= 11 and epoch_i < 14:
        lr = 0.0005
    elif epoch_i >= 15 and epoch_i < epochs:
        lr = 0.0001
    start_time = time.time()
    for batch_i, (enc_batch_inputs, dec_batch_inputs, dec_batch_targets, enc_inp_lens, dec_inp_lens, target_w) \
                in enumerate(tqdm(batch_data(train_X, train_Y, bs))):

        _, batch_loss, batch_logits = sess.run([update_step, train_loss, logits],
            feed_dict = {encoder_inputs: enc_batch_inputs,
             decoder_inputs: dec_batch_inputs,
             decoder_targets: dec_batch_targets,
             encoder_lengths: enc_inp_lens,
             decoder_lengths: dec_inp_lens,
             target_weights: target_w,
             learning_rate: lr,
             batch_size: bs})
        
    num_epochs += 1
    accuracy = np.mean(batch_logits.argmax(axis=-1) == dec_batch_targets)
    print('Epoch:', epoch_i+1, 'Loss:', batch_loss/bs, 'Accuracy:', accuracy, 'Epoch duration:', (time.time() - start_time), 's')
    saver.save(sess, './mod_checkpoints/epoch'+str(num_epochs)+"_"+str(strftime("%Y-%m-%d_%H:%M:%S")))


0it [00:00, ?it/s][A
1it [00:00,  3.35it/s][A
2it [00:00,  2.58it/s][A
1870it [14:27,  2.15it/s]

ResourceExhaustedError: OOM when allocating tensor with shape[50,205,24043] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[Node: decoder_lstm/decoder/transpose = Transpose[T=DT_FLOAT, Tperm=DT_INT32, _device="/job:localhost/replica:0/task:0/device:GPU:0"](decoder_lstm/decoder/TensorArrayStack/TensorArrayGatherV3, decoder_lstm/decoder/concat_2)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

	 [[Node: optimization/gradients/decoder_lstm/decoder/while/BasicDecoderStep/TrainingHelperNextInputs/cond/Merge_grad/cond_grad/_173 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_1063_.../cond_grad", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](^_cloopoptimization/gradients/Switch_8/_1)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.


Caused by op 'decoder_lstm/decoder/transpose', defined at:
  File "/home/pritish/anaconda2/envs/tensorflow/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/pritish/anaconda2/envs/tensorflow/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/pritish/anaconda2/envs/tensorflow/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/pritish/anaconda2/envs/tensorflow/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/pritish/anaconda2/envs/tensorflow/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 486, in start
    self.io_loop.start()
  File "/home/pritish/anaconda2/envs/tensorflow/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 112, in start
    self.asyncio_loop.run_forever()
  File "/home/pritish/anaconda2/envs/tensorflow/lib/python3.6/asyncio/base_events.py", line 422, in run_forever
    self._run_once()
  File "/home/pritish/anaconda2/envs/tensorflow/lib/python3.6/asyncio/base_events.py", line 1432, in _run_once
    handle._run()
  File "/home/pritish/anaconda2/envs/tensorflow/lib/python3.6/asyncio/events.py", line 145, in _run
    self._callback(*self._args)
  File "/home/pritish/anaconda2/envs/tensorflow/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 102, in _handle_events
    handler_func(fileobj, events)
  File "/home/pritish/anaconda2/envs/tensorflow/lib/python3.6/site-packages/tornado/stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/pritish/anaconda2/envs/tensorflow/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 450, in _handle_events
    self._handle_recv()
  File "/home/pritish/anaconda2/envs/tensorflow/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 480, in _handle_recv
    self._run_callback(callback, msg)
  File "/home/pritish/anaconda2/envs/tensorflow/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 432, in _run_callback
    callback(*args, **kwargs)
  File "/home/pritish/anaconda2/envs/tensorflow/lib/python3.6/site-packages/tornado/stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/pritish/anaconda2/envs/tensorflow/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/home/pritish/anaconda2/envs/tensorflow/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "/home/pritish/anaconda2/envs/tensorflow/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/home/pritish/anaconda2/envs/tensorflow/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/pritish/anaconda2/envs/tensorflow/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/pritish/anaconda2/envs/tensorflow/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2662, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/home/pritish/anaconda2/envs/tensorflow/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2785, in _run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/pritish/anaconda2/envs/tensorflow/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2903, in run_ast_nodes
    if self.run_code(code, result):
  File "/home/pritish/anaconda2/envs/tensorflow/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2963, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-11-3de848bfc45d>", line 79, in <module>
    decoder_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(decoder)
  File "/home/pritish/anaconda2/envs/tensorflow/lib/python3.6/site-packages/tensorflow/contrib/seq2seq/python/ops/decoder.py", line 324, in dynamic_decode
    final_outputs = nest.map_structure(_transpose_batch_time, final_outputs)
  File "/home/pritish/anaconda2/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/util/nest.py", line 459, in map_structure
    structure[0], [func(*x) for x in entries])
  File "/home/pritish/anaconda2/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/util/nest.py", line 459, in <listcomp>
    structure[0], [func(*x) for x in entries])
  File "/home/pritish/anaconda2/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/ops/rnn.py", line 74, in _transpose_batch_time
    ([1, 0], math_ops.range(2, x_rank)), axis=0))
  File "/home/pritish/anaconda2/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/ops/array_ops.py", line 1453, in transpose
    ret = transpose_fn(a, perm, name=name)
  File "/home/pritish/anaconda2/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/ops/gen_array_ops.py", line 5658, in transpose
    "Transpose", x=x, perm=perm, name=name)
  File "/home/pritish/anaconda2/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/home/pritish/anaconda2/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3271, in create_op
    op_def=op_def)
  File "/home/pritish/anaconda2/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1650, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

ResourceExhaustedError (see above for traceback): OOM when allocating tensor with shape[50,205,24043] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[Node: decoder_lstm/decoder/transpose = Transpose[T=DT_FLOAT, Tperm=DT_INT32, _device="/job:localhost/replica:0/task:0/device:GPU:0"](decoder_lstm/decoder/TensorArrayStack/TensorArrayGatherV3, decoder_lstm/decoder/concat_2)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

	 [[Node: optimization/gradients/decoder_lstm/decoder/while/BasicDecoderStep/TrainingHelperNextInputs/cond/Merge_grad/cond_grad/_173 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_1063_.../cond_grad", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](^_cloopoptimization/gradients/Switch_8/_1)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.



In [11]:
def num2sent(pred, mode, seq_len=None):
    res = ""
    if mode == "i":
        pred = np.flip(pred, 0)
        pred = pred[-seq_len:-1]        
        for idx in pred:
            res += idx2word[idx] + " "
    elif mode == "r":
        pred = pred[1:-1]
        for idx in pred:
            res += idx2word[idx] + " "
    elif mode == "t":
        pred = pred[:-1]
        for idx in pred:
            res += idx2word[idx] + " "
    return res, pred

In [12]:
def test_sample(test_sess, ques, ques_len, bs=1):
    dec_input = np.zeros((1, 1)) + word2idx['<START>']
    dec_len = [1]
    while dec_input[0, -1] != word2idx['<EOS>']:
        batch_logits = test_sess.run("decoder_lstm/decoder/transpose:0",
                       feed_dict = {"encoder_inputs:0": [ques],
                                    "decoder_inputs:0": dec_input,
                                    "encoder_lengths:0": [ques_len],
                                    "decoder_lengths:0": dec_len,
                                    "batch_size:0": bs})
        prediction = batch_logits[:,-1].argmax(axis=-1)
        dec_len[0] += 1

        dec_input = np.hstack([dec_input, prediction[:,None]])

    return dec_input[0]

In [2]:
# dont run for demo
# Testing
test_batch_size = 40
model = "epoch20_2018-04-26_06:48:02"
with tf.Session() as sess:
    saver = tf.train.import_meta_graph('./mod_checkpoints/'+model+'.meta')
    saver.restore(sess, tf.train.latest_checkpoint('./mod_checkpoints/'))
    bleu_scores = []
    for i, (source_batch, dec_batch_inputs, target_batch, enc_inp_lens, dec_inp_lens, target_w) in enumerate(tqdm(batch_data(test_X, test_Y, test_batch_size))):
        for index, sample in enumerate(source_batch):
            pred = test_sample(sess, sample, enc_inp_lens[index])

            ip_str, ip_list = num2sent(sample, mode="i", seq_len=enc_inp_lens[index])
            target_str, target_list = num2sent(target_batch[index], mode="t", seq_len=dec_inp_lens[index])
            pred_str, pred_list = num2sent(pred, mode="r")

In [52]:
# Demo
model = "epoch20_2018-04-26_06:48:02" # model trained for 20 epochs
with tf.Session() as sess:
    saver = tf.train.import_meta_graph('./mod_checkpoints/'+model+'.meta')
    saver.restore(sess, tf.train.latest_checkpoint('./mod_checkpoints/'))
    print ("Enter '!q' to quit")
    t = input("User:\t")
    while t != "!q":
        ip = t.lower().strip()
        ip = ip.split()
        for idx, word in enumerate(ip):
            if word in word2idx:
                ip[idx] = word2idx[word]
            else:
                ip[idx] = word2idx["<unk>"]

        ip += [word2idx['<EOS>']]
        
        predict = test_sample(sess, ip, len(ip))
        prediction, _ = num2sent(predict, mode="r")

        print ("Jarvis:\t", prediction)

        t = input("User:\t")

INFO:tensorflow:Restoring parameters from ./mod_checkpoints/epoch20_2018-04-26_06:48:02
Enter '!q' to quit
User:	Who are you ?
Jarvis:	 nobody for now . : ) 
User:	Are you a human or a robot ?
Jarvis:	 i am a robot , but i have been programmed and trained to be anthropomorphic . 
User:	What 's your name , Jarvis ?
Jarvis:	 i am a robot , and i do n't have a family name . 
User:	what are your hobbies ?
Jarvis:	 i like drawing and painting . 
User:	What kind of music do you enjoy listening to ?
Jarvis:	 i enjoy listening to interpret . 
User:	Is it raining outside ?
Jarvis:	 yes , it 's raining cats and dogs . 
User:	Who taught you all these interesting stuff ?
Jarvis:	 i am happy to hear what he is my father who teaches me every day . he is a software engineer who works all the time . 
User:	who is your creator ?
Jarvis:	 my father is a secret for now . : ) 
User:	bye .
Jarvis:	 thank you . 
User:	!q
