In [13]:
#hyperparameters.py

import tensorflow as tf
from collections import namedtuple


HParams = namedtuple(
  "HParams",
  [
    "batch_size",
    "embedding_dim",
    "eval_batch_size",
    "learning_rate",
    "max_context_len",
    "max_utterance_len",
    "optimizer",
    "rnn_dim",
    "vocab_size",
    "glove_path",
    "vocab_path"
  ])

def create_hyperparameters():
    return HParams(
    batch_size=128,
    eval_batch_size=8,
    vocab_size=91620,
    optimizer="Adam",
    learning_rate=0.001,
    embedding_dim=100,
    max_context_len=160,
    max_utterance_len=80,
    glove_path=None,
    vocab_path=None,
    rnn_dim=128)

In [14]:
#metrics.py

import functools
from tensorflow.contrib.learn.python.learn.metric_spec import MetricSpec


def create_evaluation_metrics():
    evaluation_metrics = {}
    for k in [1, 2, 5, 10]:
        evaluation_metrics["recall_@_%d" % k] = MetricSpec(metric_fn=functools.partial(
            tf.contrib.metrics.streaming_sparse_recall_at_k,
            k=k))
    return evaluation_metrics


In [54]:
#!/usr/bin/python
# -*- coding: utf-8 -*-
# dual_encoder.py

import numpy as np


def dual_encoder_model(
    hparams,
    mode,
    context,
    context_len,
    utterance,
    utterance_len,
    targets,
    ):

    #print ('targets', targets)

  # initialize word embeddings with random initialization

    tf.logging.info('No glove/vocab path specificed, starting with random embeddings.'
                    )
    init = tf.random_uniform_initializer(-0.25, 0.25)

    word_embeddings = tf.get_variable('word_embeddings',
            shape=[hparams.vocab_size, hparams.embedding_dim],
            initializer=init)

  # Embedding context and utterance

    context_embedded = tf.nn.embedding_lookup(word_embeddings, context,
            name='contect_embedding')
    utterance_embedded = tf.nn.embedding_lookup(word_embeddings,
            utterance, name='utterrance_embedding')

  # Build the RNN

    with tf.variable_scope('rnn') as vs:

    # LSTm cell usage

        lstm_cell = tf.contrib.rnn.LSTMCell(hparams.rnn_dim,
                forget_bias=2.0, use_peepholes=True,
                state_is_tuple=True)

    # pass context and utterrances to network

        (rnn__lstm_outputs, rnn_lstm_states) = \
            tf.nn.dynamic_rnn(lstm_cell, tf.concat([context_embedded,
                              utterance_embedded], 0),
                              sequence_length=tf.concat([context_len,
                              utterance_len], 0), dtype=tf.float32)
        (context_encoded, response_encoded) = \
            tf.split(rnn_lstm_states.h, 2, 0)

    with tf.variable_scope('prediction') as vs:
        W = tf.get_variable('W', shape=[hparams.rnn_dim,
                            hparams.rnn_dim],
                            initializer=tf.truncated_normal_initializer())

    # perferming elementary prediction operation by multiplying c * W

        r_dash = tf.matmul(context_encoded, W)
        r_dash = tf.expand_dims(r_dash, 2)
        response_encoded = tf.expand_dims(response_encoded, 2)

    # Dot product between generated response and actual response
    # (c * W) * r

        logits = tf.matmul(r_dash, response_encoded, True)
        logits = tf.squeeze(logits, [2])

    # logit to probablities

        probabilities = tf.sigmoid(logits)

        if mode == tf.contrib.learn.ModeKeys.INFER:
            return (probabilities, None)

    # Calculate the binary cross-entropy loss

        loss = \
            tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.to_float(targets),
                logits=logits)

  # Mean loss across the batch of examples

    mean_loss = tf.reduce_mean(loss, name='mean_loss')
    return (probabilities, mean_loss)


In [53]:
#!/usr/bin/python
# -*- coding: utf-8 -*-
# model.py

import sys


def get_feature_ids(
    features,
    key,
    key_leng,
    max_len,
    ):
    ids = features[key]
    len_of_id = tf.squeeze(features[key_leng], [1])
    len_of_id = tf.minimum(len_of_id, tf.constant(max_len,
                           dtype=tf.int64))
    return (ids, len_of_id)


def start_training_operation(loss, hyperparams):
    training_operation = tf.contrib.layers.optimize_loss(loss=loss,
            global_step=tf.contrib.framework.get_global_step(),
            learning_rate=hyperparams.learning_rate,
            clip_gradients=10.0, optimizer=hyperparams.optimizer)
    return training_operation


def generate_model(hparams, model_impl):

    def model_function(features, targets, mode):
        #print ('===========Chatbot-RNN & LSTM===============')
        (context, context_len) = get_feature_ids(features, 'context',
                'context_len', hparams.max_context_len)

        (utterance, utterance_len) = get_feature_ids(features,
                'utterance', 'utterance_len', hparams.max_utterance_len)
        
        
        if targets!= None:
            batch_size = targets.get_shape().as_list()[0]

        if mode == tf.contrib.learn.ModeKeys.TRAIN:
            (probs, loss) = model_impl(
                hparams,
                mode,
                context,
                context_len,
                utterance,
                utterance_len,
                targets,
                )
            train_op = start_training_operation(loss, hparams)
            return (probs, loss, train_op)

        if mode == tf.contrib.learn.ModeKeys.INFER:
            (probs, loss) = model_impl(
                hparams,
                mode,
                context,
                context_len,
                utterance,
                utterance_len,
                None,
                )
            return (probs, 0.0, None)

        if mode == tf.contrib.learn.ModeKeys.EVAL:

      # We have 10 exampels per record, so we accumulate them

            all_contexts = [context]
            all_context_lens = [context_len]
            all_utterances = [utterance]
            all_utterance_lens = [utterance_len]
            all_targets = [tf.ones([batch_size, 1], dtype=tf.int64)]

            for i in range(9):
                (distractor, distractor_len) = \
                    get_feature_ids(features,
                                    'distractor_{}'.format(i),
                                    'distractor_{}_len'.format(i),
                                    hparams.max_utterance_len)
                all_contexts.append(context)
                all_context_lens.append(context_len)
                all_utterances.append(distractor)
                all_utterance_lens.append(distractor_len)
                all_targets.append(tf.zeros([batch_size, 1],
                                   dtype=tf.int64))

            (probs, loss) = model_impl(
                hparams,
                mode,
                tf.concat(all_contexts, 0),
                tf.concat(all_context_lens, 0),
                tf.concat(all_utterances, 0),
                tf.concat(all_utterance_lens, 0),
                tf.concat(all_targets, 0),
                )

            split_probs = tf.split(probs, 10, 0)
            shaped_probs = tf.concat(split_probs, 1)

      # Add summaries

            tf.summary.histogram('eval_correct_probs_hist',
                                 split_probs[0])
            tf.summary.scalar('eval_correct_probs_average',
                              tf.reduce_mean(split_probs[0]))
            tf.summary.histogram('eval_incorrect_probs_hist',
                                 split_probs[1])
            tf.summary.scalar('eval_incorrect_probs_average',
                              tf.reduce_mean(split_probs[1]))

            return (shaped_probs, loss, None)

    return model_function


In [17]:
#inputs.py

#!/usr/bin/python
# -*- coding: utf-8 -*-


TEXT_FEATURE_SIZE = 160


def create_input_fn(
    mode,
    input_files,
    batch_size,
    num_epochs,
    ):

    def input_fn():
        columns_attribs = []
        columns_attribs.append(tf.contrib.layers.real_valued_column(column_name='context'
                               , dimension=TEXT_FEATURE_SIZE,
                               dtype=tf.int64))
        columns_attribs.append(tf.contrib.layers.real_valued_column(column_name='context_len'
                               , dimension=1, dtype=tf.int64))
        columns_attribs.append(tf.contrib.layers.real_valued_column(column_name='utterance'
                               , dimension=TEXT_FEATURE_SIZE,
                               dtype=tf.int64))
        columns_attribs.append(tf.contrib.layers.real_valued_column(column_name='utterance_len'
                               , dimension=1, dtype=tf.int64))

        if mode == tf.contrib.learn.ModeKeys.TRAIN:

      # During training we have a label feature

            columns_attribs.append(tf.contrib.layers.real_valued_column(column_name='label'
                                   , dimension=1, dtype=tf.int64))

        if mode == tf.contrib.learn.ModeKeys.EVAL:

      # During evaluation we have distractors

            for i in range(9):
                columns_attribs.append(tf.contrib.layers.real_valued_column(column_name='distractor_{}'.format(i),
                        dimension=TEXT_FEATURE_SIZE, dtype=tf.int64))
                columns_attribs.append(tf.contrib.layers.real_valued_column(column_name='distractor_{}_len'.format(i),
                        dimension=1, dtype=tf.int64))

        features = \
            tf.contrib.layers.create_feature_spec_for_parsing(set(columns_attribs))

        feature_map = tf.contrib.learn.io.read_batch_features(
            file_pattern=input_files,
            batch_size=batch_size,
            features=features,
            reader=tf.TFRecordReader,
            randomize_input=True,
            num_epochs=num_epochs,
            queue_capacity=200000 + batch_size * 10,
            name='read_batch_features_{}'.format(mode),
            )

        if mode == tf.contrib.learn.ModeKeys.TRAIN:
            target = feature_map.pop('label')
        else:

      # In evaluation we have 10 classes (utterances).
      # The first one (index 0) is always the correct one

            target = tf.zeros([batch_size, 1], dtype=tf.int64)
        return (feature_map, target)

    return input_fn


In [18]:
#!/usr/bin/python
# -*- coding: utf-8 -*-
# train

import os
import time
import itertools


TIMESTAMP = int(time.time())

MODEL_DIR = os.path.abspath(os.path.join('./runs', str(TIMESTAMP)))

TRAIN_FILE = os.path.abspath(os.path.join('./data', 'train.tfrecords'))
VALIDATION_FILE = os.path.abspath(os.path.join('./data',
                                  'validation.tfrecords'))

tf.logging.set_verbosity(20)

hyperparams = create_hyperparameters()

tf_model_function = generate_model(hyperparams,
            model_impl=dual_encoder_model)

tf_estimator = \
        tf.contrib.learn.Estimator(model_fn=tf_model_function,
                                   model_dir=MODEL_DIR,
                                   config=tf.contrib.learn.RunConfig())

input_training_function = \
        create_input_fn(mode=tf.contrib.learn.ModeKeys.TRAIN,
                                  input_files=[TRAIN_FILE],
                                  batch_size=hyperparams.batch_size,
                                  num_epochs=None)

input_evaluation_function = \
        create_input_fn(mode=tf.contrib.learn.ModeKeys.EVAL,
                                  input_files=[VALIDATION_FILE],
                                  batch_size=hyperparams.eval_batch_size,
                                  num_epochs=1)

recallAtk_evaluation_metric = create_evaluation_metrics()

monitor_recall = \
        tf.contrib.learn.monitors.ValidationMonitor(input_fn=input_evaluation_function,
            every_n_steps=2000, metrics=recallAtk_evaluation_metric)

tf_estimator.fit(input_fn=input_training_function, steps=30000,
                     monitors=[])

tf.app.run()

sys.exit(0)



INFO:tensorflow:Using config: {'_save_checkpoints_steps': None, '_task_type': None, '_session_config': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f46646d1828>, '_model_dir': '/home/babyeagle/MachineLearning/Project/chatbot-rnn and lstm/runs/1525381531', '_tf_random_seed': None, '_save_checkpoints_secs': 600, '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1.0
}
, '_is_chief': True, '_task_id': 0, '_evaluation_master': '', '_environment': 'local', '_save_summary_steps': 100, '_num_worker_replicas': 0, '_keep_checkpoint_every_n_hours': 10000, '_master': '', '_log_step_count_steps': 100, '_keep_checkpoint_max': 5, '_num_ps_replicas': 0}
targets Tensor("read_batch_features_train/fifo_queue_Dequeue:2", shape=(128, 1), dtype=int64)
INFO:tensorflow:No glove/vocab path specificed, starting with random embeddings.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO

INFO:tensorflow:loss = 0.550617, step = 6401 (50.036 sec)
INFO:tensorflow:global_step/sec: 1.99779
INFO:tensorflow:loss = 0.508454, step = 6501 (50.055 sec)
INFO:tensorflow:global_step/sec: 2.00425
INFO:tensorflow:loss = 0.58267, step = 6601 (49.894 sec)
INFO:tensorflow:global_step/sec: 2.00047
INFO:tensorflow:loss = 0.553195, step = 6701 (49.988 sec)
INFO:tensorflow:global_step/sec: 2.00475
INFO:tensorflow:loss = 0.494295, step = 6801 (49.882 sec)
INFO:tensorflow:global_step/sec: 2.00276
INFO:tensorflow:loss = 0.453959, step = 6901 (49.932 sec)
INFO:tensorflow:global_step/sec: 1.99996
INFO:tensorflow:loss = 0.483793, step = 7001 (50.001 sec)
INFO:tensorflow:global_step/sec: 2.00153
INFO:tensorflow:loss = 0.510505, step = 7101 (49.962 sec)
INFO:tensorflow:global_step/sec: 2.00335
INFO:tensorflow:loss = 0.457296, step = 7201 (49.917 sec)
INFO:tensorflow:Saving checkpoints for 7205 into /home/babyeagle/MachineLearning/Project/chatbot-rnn and lstm/runs/1525381531/model.ckpt.
INFO:tensorfl

SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [55]:
#!/usr/bin/python
# -*- coding: utf-8 -*-
import os
import time
import itertools
import sys
import numpy as np
import tensorflow as tf

tf.logging.set_verbosity(tf.logging.ERROR)

def tokenizer_fn(iterator):
    return (x.split(' ') for x in iterator)


# Create vocabulary ourselves or load saved one
if not "./data/vocab_processor.bin":
    vp = tf.contrib.learn.preprocessing.VocabularyProcessor(100000)
    vp.fit(open("./data/vocab_processor.bin"))
    vp.save('./data/vocab_processor.bin')
else:
    vp = tf.contrib.learn.preprocessing.VocabularyProcessor.restore(
    "./data/vocab_processor.bin")


# Load your own data here

INPUT_CONTEXT = 'hi'
POTENTIAL_RESPONSES = ['hello', 'goodbye', 'maybe']


def get_features(context, utterance):
    context_matrix = np.array(list(vp.transform([context])))
    utterance_matrix = np.array(list(vp.transform([utterance])))
    context_len = len(context.split(" "))
    utterance_len = len(utterance.split(" "))
    features = {
     "context": tf.convert_to_tensor(context_matrix, dtype=tf.int64),
     "context_len": tf.constant(context_len, shape=[1,1], dtype=tf.int64),
     "utterance": tf.convert_to_tensor(utterance_matrix, dtype=tf.int64),
     "utterance_len": tf.constant(utterance_len, shape=[1,1], dtype=tf.int64),
   }
    return features, None


if __name__ == "__main__":
    hparams = create_hyperparameters()
    model_fn = generate_model(hparams, model_impl=dual_encoder_model)
    estimator = tf.contrib.learn.Estimator(model_fn=model_fn, model_dir='./runs/1525381531')
    print("Context: {}".format(INPUT_CONTEXT))
    dict_final = {}
    for r in POTENTIAL_RESPONSES:
        prob = estimator.predict(input_fn=lambda: get_features(INPUT_CONTEXT, r))
        dict_final[r] = next(prob)
        #print("{}: {}".format(r, next(prob)))
    for k in dict_final:
        print(k, dict_final[k])

Context: hi
hello [ 0.54939836]
maybe [ 0.36490214]
goodbye [ 0.39229482]
