In [1]:
import tensorflow as tf
import numpy as np

from sklearn.model_selection import train_test_split

import json

In [2]:
Q1_TRAINING_DATA_FILE = 'q1_train.npy'
Q2_TRAINING_DATA_FILE = 'q2_train.npy'
LABEL_TRAINING_DATA_FILE = 'label_train.npy'
NB_WORDS_DATA_FILE = 'nb_words.json'

TEST_SPLIT = 0.1
RNG_SEED = 13371447

In [3]:
q1_data = np.load(open(Q1_TRAINING_DATA_FILE, 'rb'))
q2_data = np.load(open(Q2_TRAINING_DATA_FILE, 'rb'))
labels = np.load(open(LABEL_TRAINING_DATA_FILE, 'rb'))
prepro_configs = None

with open(NB_WORDS_DATA_FILE, 'r') as f:
    prepro_configs = json.load(f)

In [4]:
X = np.stack((q1_data, q2_data), axis=1)
y = labels
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=TEST_SPLIT, random_state=RNG_SEED)

Q1_train = X_train[:,0]
Q2_train = X_train[:,1]
Q1_test = X_test[:,0]
Q2_test = X_test[:,1]

In [22]:
y_test

array([0, 0, 0, ..., 0, 1, 0])

In [5]:
def rearrange(base, hypothesis, label):
    features = {"base": base, "hypothesis": hypothesis}
    return features, label

def train_input_fn():
    dataset = tf.data.Dataset.from_tensor_slices((Q1_train, Q2_train, y_train))
    dataset = dataset.shuffle(buffer_size=100)
    dataset = dataset.batch(64)
    dataset = dataset.map(rearrange)
    #dataset = dataset.repeat(count=1)
    iterator = dataset.make_one_shot_iterator()
    
    return iterator.get_next()

def test_input_fn():
    dataset = tf.data.Dataset.from_tensor_slices((Q1_test, Q2_test, y_test))
    dataset = dataset.shuffle(buffer_size=100)
    dataset = dataset.batch(64)
    dataset = dataset.map(rearrange)
   # dataset = dataset.repeat()
    iterator = dataset.make_one_shot_iterator()
    
    return iterator.get_next()

In [6]:
VOCAB_SIZE = prepro_configs['vocab_size']

WORD_EMBEDDING_DIM = 300
CONV_FEATURE_DIM = 50
CONV_WINDOW_SIZE = 3
BASE_SEQ_LIMIT_LEN = 25
HYPOTHESIS_SEQ_LIMIT_LEN = 25

In [7]:
def euclidian_distance(a, b):
    return tf.sqrt(tf.reduce_sum(tf.squared_difference(a, b),axis=-1))

In [8]:
def match_score(a, b):
    return tf.divide(1., tf.add(1., euclidian_distance(a, b)))

In [9]:
base_sequence_limit_len, hypothesis_sequence_limit_len = BASE_SEQ_LIMIT_LEN, HYPOTHESIS_SEQ_LIMIT_LEN

def create_attention_matrix_old(base, hypothesis):
    temp_row_list = list()
    for i in range(base_sequence_limit_len):
        temp_col_list = list()
        for j in range(hypothesis_sequence_limit_len):
            temp_col_list.append(match_score(base[:, i, :], hypothesis[:, j, :]))
        
        temp_row_list.append(tf.stack(temp_col_list, axis=-1))
                             
    attention_matrix = tf.stack(temp_row_list, axis=-2)
    return attention_matrix

In [10]:
def create_attention_matrix(base, hypothesis):
    base = tf.expand_dims(base, 1)
    hypothesis = tf.expand_dims(hypothesis, 2)
    
    euclidian = tf.sqrt(tf.reduce_sum(tf.square(base - hypothesis), axis=-1))
    
    return 1 / (1 + euclidian)

In [11]:
def abcnn_1_attention(base, hypothesis):
    with tf.variable_scope('abcnn_1_attention'):
        attention_matrix = create_attention_matrix(base, hypothesis)
        transposed_attention_matrix = tf.transpose(attention_matrix, perm=[0,2,1])
    
        base_attention_matrix = tf.layers.dense(attention_matrix, 100, use_bias=False)
        hypothesis_attention_matrix = tf.layers.dense(transposed_attention_matrix, 100, use_bias=False)

    return base_attention_matrix, hypothesis_attention_matrix

In [12]:
def abcnn_2_attention(base, hypothesis):
    with tf.variable_scope('abcnn_2_attention'):
        attention_matrix = create_attention_matrix(base, hypothesis)

        base_attention = tf.reduce_sum(attention_matrix, axis=2)
        base_attention = tf.tile(tf.expand_dims(base_attention, axis=-1), [1,1,base.shape[-1]])

        hypothesis_attention = tf.reduce_sum(attention_matrix, axis=1)
        hypothesis_attention = tf.tile(tf.expand_dims(hypothesis_attention, axis=-1), [1,1,hypothesis.shape[-1]])

        base_attention_matrix = tf.multiply(base_attention, base)
        hypothesis_attention_matrix = tf.multiply(hypothesis_attention, hypothesis)

    return base_attention_matrix, hypothesis_attention_matrix

In [14]:
vocabulary_size = VOCAB_SIZE
embedding_size = WORD_EMBEDDING_DIM
conv_channel_size = CONV_FEATURE_DIM
conv_window_size = CONV_WINDOW_SIZE

base_sequence_limit_len, hypothesis_sequence_limit_len = BASE_SEQ_LIMIT_LEN, HYPOTHESIS_SEQ_LIMIT_LEN

def model_fn(features, labels, mode, params):
    word_embeddings = tf.get_variable('word_embeddings', [vocabulary_size, embedding_size])
    
    base_embedded_matrix = tf.nn.embedding_lookup(word_embeddings, features['base'])
    hypothesis_embedded_matrix = tf.nn.embedding_lookup(word_embeddings, features['hypothesis'])
    
    #base_attn, hypothesis_attn = abcnn_1_attention(base_embedded_matrix, hypothesis_embedded_matrix)

    #base_with_attn_matrix = tf.concat([base_embedded_matrix, base_attn], -1)
    #hypothesis_with_attn_matrix = tf.concat([hypothesis_embedded_matrix, hypothesis_attn], -1)

#     base_conv_layer = tf.layers.conv1d(base_with_attn_matrix, conv_channel_size, 
#                                        conv_window_size, name='base_conv', 
#                                        activation=tf.nn.tanh,
#                                        padding='same')
#     hypothesis_conv_layer = tf.layers.conv1d(hypothesis_with_attn_matrix, conv_channel_size,
#                                          conv_window_size, name='hypothesis_conv', 
#                                          activation=tf.nn.tanh,
#                                          padding='same')
    base_conv_layer = tf.layers.conv1d(base_embedded_matrix, conv_channel_size, 
                                       conv_window_size, name='base_conv', 
                                       activation=tf.nn.tanh,
                                       padding='same')
    hypothesis_conv_layer = tf.layers.conv1d(hypothesis_embedded_matrix, conv_channel_size,
                                         conv_window_size, name='hypothesis_conv', 
                                         activation=tf.nn.tanh,
                                         padding='same')
    
    #base_attn_2, hypothesis_attn_2 = abcnn_2_attention(base_conv_layer, hypothesis_conv_layer)

    #base_pooled = tf.layers.average_pooling1d(base_attn_2, conv_window_size, 1, 
    #                                          padding='same')
    #hypothesis_pooled = tf.layers.average_pooling1d(hypothesis_attn_2, conv_window_size, 1, 
    #                                                padding='same')

    #base_feature_vector = tf.layers.average_pooling1d(base_pooled, base_sequence_limit_len, 1)
    #hypothesis_feature_vector = tf.layers.average_pooling1d(hypothesis_pooled, hypothesis_sequence_limit_len, 1)
    
    base_pooled = tf.layers.average_pooling1d(base_conv_layer, conv_window_size, 1, 
                                              padding='same')
    hypothesis_pooled = tf.layers.average_pooling1d(hypothesis_conv_layer, conv_window_size, 1, 
                                                    padding='same')
    
    base_feature_vector = tf.layers.average_pooling1d(base_pooled, base_sequence_limit_len, 1)
    hypothesis_feature_vector = tf.layers.average_pooling1d(hypothesis_pooled, hypothesis_sequence_limit_len, 1)
    
    feature_vector = tf.concat([base_feature_vector, hypothesis_feature_vector], -1)
    feature_vector = tf.squeeze(feature_vector, 1)
    
    logits = tf.layers.dense(feature_vector, 1)
    logits = tf.squeeze(logits, 1)
    
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(
                  mode=mode,
                  predictions={
                      'base_attn': base_attn,
                      'hypothesis_attn': hypothesis_attn,
                      'base_attn_2': base_attn_2,
                      'hypothesis_attn': hypothesis_attn_2,
                      'prob': tf.nn.sigmoid(logits)
                  })
    elif mode == tf.estimator.ModeKeys.EVAL:
        log_loss = tf.losses.log_loss(labels, tf.nn.sigmoid(logits))
        accuracy = tf.metrics.accuracy(labels, tf.round(tf.nn.sigmoid(logits)))
        eval_metric_ops = {'acc': accuracy}
        
        return tf.estimator.EstimatorSpec(mode, loss=log_loss, eval_metric_ops=eval_metric_ops)
    else:
        global_step = tf.train.get_global_step()
        loss = tf.losses.sigmoid_cross_entropy(labels, logits)
        train_op = tf.train.AdamOptimizer(1e-3).minimize(loss, global_step)
    
        return tf.estimator.EstimatorSpec(
                  mode=mode,
                  train_op=train_op,
                  loss=loss)

In [15]:
est = tf.estimator.Estimator(model_fn, model_dir='models_abcnn_1_1')

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'models_abcnn_1_1', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fd7380cb7b8>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [17]:
for epoch in range(20):
    print('epoch ', epoch)
    est.train(train_input_fn)
    est.evaluate(test_input_fn)
    print('epoch done!')

epoch  0
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 1 into models_abcnn_1/model.ckpt.
INFO:tensorflow:loss = 0.69347996, step = 0
INFO:tensorflow:global_step/sec: 53.2119
INFO:tensorflow:loss = 0.5869912, step = 100 (1.881 sec)
INFO:tensorflow:global_step/sec: 109.409
INFO:tensorflow:loss = 0.58358836, step = 200 (0.914 sec)
INFO:tensorflow:global_step/sec: 99.5171
INFO:tensorflow:loss = 0.53418756, step = 300 (1.005 sec)
INFO:tensorflow:global_step/sec: 96.2335
INFO:tensorflow:loss = 0.662761, step = 400 (1.040 sec)
INFO:tensorflow:global_step/sec: 116.562
INFO:tensorflow:loss = 0.5485172, step = 500 (0.856 sec)
INFO:tensorflow:global_step/sec: 107.842
INFO:tensorflow:loss = 0.5382327, step = 600 (0.929 sec)
INFO:tensorflow:global_step/sec: 102.698
INFO:te

INFO:tensorflow:loss = 0.40862942, step = 6986 (1.021 sec)
INFO:tensorflow:global_step/sec: 95.3336
INFO:tensorflow:loss = 0.46914595, step = 7086 (1.047 sec)
INFO:tensorflow:global_step/sec: 116.778
INFO:tensorflow:loss = 0.39647844, step = 7186 (0.857 sec)
INFO:tensorflow:global_step/sec: 100.219
INFO:tensorflow:loss = 0.4376062, step = 7286 (0.999 sec)
INFO:tensorflow:global_step/sec: 92.5907
INFO:tensorflow:loss = 0.49982262, step = 7386 (1.081 sec)
INFO:tensorflow:global_step/sec: 93.4792
INFO:tensorflow:loss = 0.46074325, step = 7486 (1.069 sec)
INFO:tensorflow:global_step/sec: 105.366
INFO:tensorflow:loss = 0.47911277, step = 7586 (0.949 sec)
INFO:tensorflow:global_step/sec: 99.3959
INFO:tensorflow:loss = 0.52391374, step = 7686 (1.006 sec)
INFO:tensorflow:global_step/sec: 99.2813
INFO:tensorflow:loss = 0.4892831, step = 7786 (1.007 sec)
INFO:tensorflow:global_step/sec: 99.4896
INFO:tensorflow:loss = 0.48959446, step = 7886 (1.005 sec)
INFO:tensorflow:global_step/sec: 101.182
IN

INFO:tensorflow:global_step/sec: 105.278
INFO:tensorflow:loss = 0.40703383, step = 14272 (0.948 sec)
INFO:tensorflow:global_step/sec: 104.08
INFO:tensorflow:loss = 0.26121306, step = 14372 (0.962 sec)
INFO:tensorflow:global_step/sec: 102.346
INFO:tensorflow:loss = 0.35393947, step = 14472 (0.977 sec)
INFO:tensorflow:global_step/sec: 106.588
INFO:tensorflow:loss = 0.52221256, step = 14572 (0.938 sec)
INFO:tensorflow:global_step/sec: 111.539
INFO:tensorflow:loss = 0.33705243, step = 14672 (0.897 sec)
INFO:tensorflow:global_step/sec: 105.576
INFO:tensorflow:loss = 0.39056885, step = 14772 (0.948 sec)
INFO:tensorflow:global_step/sec: 99.4
INFO:tensorflow:loss = 0.38839823, step = 14872 (1.006 sec)
INFO:tensorflow:global_step/sec: 105.416
INFO:tensorflow:loss = 0.27858114, step = 14972 (0.948 sec)
INFO:tensorflow:global_step/sec: 105.532
INFO:tensorflow:loss = 0.32415596, step = 15072 (0.948 sec)
INFO:tensorflow:global_step/sec: 105.399
INFO:tensorflow:loss = 0.3394801, step = 15172 (0.948 

INFO:tensorflow:global_step/sec: 92.3212
INFO:tensorflow:loss = 0.40429854, step = 21458 (1.083 sec)
INFO:tensorflow:global_step/sec: 94.5737
INFO:tensorflow:loss = 0.30795816, step = 21558 (1.057 sec)
INFO:tensorflow:global_step/sec: 100.9
INFO:tensorflow:loss = 0.24074855, step = 21658 (0.992 sec)
INFO:tensorflow:global_step/sec: 96.7708
INFO:tensorflow:loss = 0.34519956, step = 21758 (1.033 sec)
INFO:tensorflow:global_step/sec: 101.05
INFO:tensorflow:loss = 0.22860833, step = 21858 (0.989 sec)
INFO:tensorflow:global_step/sec: 102.436
INFO:tensorflow:loss = 0.26817578, step = 21958 (0.977 sec)
INFO:tensorflow:global_step/sec: 108.236
INFO:tensorflow:loss = 0.2494629, step = 22058 (0.923 sec)
INFO:tensorflow:global_step/sec: 102.296
INFO:tensorflow:loss = 0.20721658, step = 22158 (0.978 sec)
INFO:tensorflow:global_step/sec: 102.28
INFO:tensorflow:loss = 0.3356899, step = 22258 (0.978 sec)
INFO:tensorflow:global_step/sec: 102.763
INFO:tensorflow:loss = 0.3838889, step = 22358 (0.972 se

INFO:tensorflow:Starting evaluation at 2018-06-30-14:51:21
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from models_abcnn_1/model.ckpt-28430
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-06-30-14:51:22
INFO:tensorflow:Saving dict for global step 28430: acc = 0.7354869, global_step = 28430, loss = 0.67761374
epoch done!
epoch  5
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from models_abcnn_1/model.ckpt-28430
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 28431 into models_abcnn_1/model.ckpt.
INFO:tensorflow:loss = 0.37696654, step = 28430
INFO:tensorflow:global_step/sec: 55.1198
INFO:tensorflow:loss = 0.2677336, step = 28530 (1.816 sec)
INFO:tensorflow:global_s

INFO:tensorflow:global_step/sec: 94.2536
INFO:tensorflow:loss = 0.16510499, step = 34816 (1.063 sec)
INFO:tensorflow:global_step/sec: 101.363
INFO:tensorflow:loss = 0.244077, step = 34916 (0.988 sec)
INFO:tensorflow:global_step/sec: 99.0552
INFO:tensorflow:loss = 0.19933233, step = 35016 (1.007 sec)
INFO:tensorflow:global_step/sec: 104.071
INFO:tensorflow:loss = 0.25358167, step = 35116 (0.959 sec)
INFO:tensorflow:global_step/sec: 108.193
INFO:tensorflow:loss = 0.3493779, step = 35216 (0.924 sec)
INFO:tensorflow:global_step/sec: 114.71
INFO:tensorflow:loss = 0.31109113, step = 35316 (0.873 sec)
INFO:tensorflow:global_step/sec: 102.085
INFO:tensorflow:loss = 0.16680914, step = 35416 (0.978 sec)
INFO:tensorflow:global_step/sec: 108.524
INFO:tensorflow:loss = 0.2854645, step = 35516 (0.923 sec)
INFO:tensorflow:global_step/sec: 102.073
INFO:tensorflow:loss = 0.11573992, step = 35616 (0.980 sec)
INFO:tensorflow:global_step/sec: 106.678
INFO:tensorflow:loss = 0.26188603, step = 35716 (0.937 

INFO:tensorflow:global_step/sec: 106.396
INFO:tensorflow:loss = 0.13801679, step = 42002 (0.939 sec)
INFO:tensorflow:global_step/sec: 102.201
INFO:tensorflow:loss = 0.18367356, step = 42102 (0.979 sec)
INFO:tensorflow:global_step/sec: 103.763
INFO:tensorflow:loss = 0.3441192, step = 42202 (0.963 sec)
INFO:tensorflow:global_step/sec: 104.932
INFO:tensorflow:loss = 0.18269461, step = 42302 (0.953 sec)
INFO:tensorflow:global_step/sec: 97.7584
INFO:tensorflow:loss = 0.18341906, step = 42402 (1.023 sec)
INFO:tensorflow:global_step/sec: 99.585
INFO:tensorflow:loss = 0.1549523, step = 42502 (1.004 sec)
INFO:tensorflow:global_step/sec: 96.7764
INFO:tensorflow:loss = 0.18527082, step = 42602 (1.034 sec)
INFO:tensorflow:global_step/sec: 101.938
INFO:tensorflow:loss = 0.21719992, step = 42702 (0.980 sec)
INFO:tensorflow:global_step/sec: 105.29
INFO:tensorflow:loss = 0.24377835, step = 42802 (0.950 sec)
INFO:tensorflow:global_step/sec: 101.384
INFO:tensorflow:loss = 0.36179626, step = 42902 (0.986

KeyboardInterrupt: 

In [16]:
est.train(train_input_fn, steps=1)
est.evaluate(train_input_fn)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from models_abcnn_1_1/model.ckpt-1


KeyboardInterrupt: 