In [1]:
import tensorflow as tf
import numpy as np

from sklearn.model_selection import train_test_split

import json

In [2]:
Q1_TRAINING_DATA_FILE = 'q1_train.npy'
Q2_TRAINING_DATA_FILE = 'q2_train.npy'
LABEL_TRAINING_DATA_FILE = 'label_train.npy'
NB_WORDS_DATA_FILE = 'nb_words.json'

TEST_SPLIT = 0.1
RNG_SEED = 13371447

In [3]:
q1_data = np.load(open(Q1_TRAINING_DATA_FILE, 'rb'))
q2_data = np.load(open(Q2_TRAINING_DATA_FILE, 'rb'))
labels = np.load(open(LABEL_TRAINING_DATA_FILE, 'rb'))
prepro_configs = None

with open(NB_WORDS_DATA_FILE, 'r') as f:
    prepro_configs = json.load(f)

In [4]:
X = np.stack((q1_data, q2_data), axis=1)
y = labels
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=TEST_SPLIT, random_state=RNG_SEED)

Q1_train = X_train[:,0]
Q2_train = X_train[:,1]
Q1_test = X_test[:,0]
Q2_test = X_test[:,1]

In [5]:
def rearrange(base, hypothesis, label):
    features = {"base": base, "hypothesis": hypothesis}
    return features, label

def train_input_fn():
    dataset = tf.data.Dataset.from_tensor_slices((Q1_train, Q2_train, y_train))
    dataset = dataset.shuffle(buffer_size=100)
    dataset = dataset.batch(16)
    dataset = dataset.map(rearrange)
    dataset = dataset.repeat()
    iterator = dataset.make_one_shot_iterator()
    
    return iterator.get_next()

def test_input_fn():
    dataset = tf.data.Dataset.from_tensor_slices((Q1_test, Q2_test, y_test))
    dataset = dataset.shuffle(buffer_size=100)
    dataset = dataset.batch(16)
    dataset = dataset.map(rearrange)
    dataset = dataset.repeat()
    iterator = dataset.make_one_shot_iterator()
    
    return iterator.get_next()

In [6]:
VOCAB_SIZE = prepro_configs['vocab_size']

WORD_EMBEDDING_DIM = 100
CONV_FEATURE_DIM = 300
CONV_OUTPUT_DIM = 128
CONV_WINDOW_SIZE = 3

SIMILARITY_DENSE_FEATURE_DIM = 200

In [7]:
vocabulary_size = VOCAB_SIZE
embedding_size = WORD_EMBEDDING_DIM
conv_channel_size = CONV_FEATURE_DIM
conv_window_size = CONV_WINDOW_SIZE
conv_output_feature_size = CONV_OUTPUT_DIM

similairiry_dense_dim = SIMILARITY_DENSE_FEATURE_DIM

def model_fn(features, labels, mode, params):
    word_embeddings = tf.get_variable('word_embeddings', [vocabulary_size, embedding_size])
    #Embedding
    
    def conv_sementic_network(inputs, reuse=tf.AUTO_REUSE):
        conv_layers = [tf.layers.conv1d(inputs, conv_channel_size, conv_window_size, 
                                        activation=tf.nn.relu, 
                                        name='conv1d_'+ str(i),
                                        reuse=reuse)
                       for i in range(5)]
        pool_layers = tf.concat([tf.layers.max_pooling1d(conv_layer, int(conv_layer.shape[1]), 1)
                            for conv_layer in conv_layers],
                            2)
        output_layer = tf.layers.dense(pool_layers, conv_output_feature_size, 
                                        activation=tf.nn.relu,
                                        reuse=reuse,
                                        name='conv_sementic_output')
        output_layer = tf.squeeze(output_layer, 1)
        
        return output_layer
    
    base_embedded_matrix = tf.nn.embedding_lookup(word_embeddings, features['base'])
    hypothesis_embedded_matrix = tf.nn.embedding_lookup(word_embeddings, features['hypothesis'])
    
    base_sementic_matrix = conv_sementic_network(base_embedded_matrix)
    hypothesis_sementic_matrix = conv_sementic_network(hypothesis_embedded_matrix)
    
    merged_matrix = tf.concat([base_sementic_matrix, hypothesis_sementic_matrix], -1)
    #norm_merged_matrix = tf.layers.batch_normalization(merged_matrix)
    similarity_dense_layer = tf.layers.dense(merged_matrix, similairiry_dense_dim,
                                             activation=tf.nn.relu)
    
    logit_layer = tf.layers.dense(similarity_dense_layer, 1,
                    activation=tf.nn.sigmoid)
    logit_layer = tf.squeeze(logit_layer, 1)
    
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(
                  mode=mode,
                  predictions={
                      'prob':tf.nn.sigmoid(logit_layer)
                  })
    else:
        global_step = tf.train.get_global_step()
        loss = tf.losses.mean_squared_error(labels, logit_layer)
        train_op = tf.train.AdamOptimizer(1e-4).minimize(loss, global_step)
        
        accuracy = tf.metrics.accuracy(labels, tf.nn.sigmoid(logit_layer))
        eval_metric_ops = {'acc': accuracy}
    
        return tf.estimator.EstimatorSpec(
                  mode=mode,
                  train_op=train_op,
                  loss=loss,
                  eval_metric_ops=eval_metric_ops)

In [8]:
est = tf.estimator.Estimator(model_fn, model_dir='models_3')

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'models_3', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x118291048>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [9]:
est.train(train_input_fn)

INFO:tensorflow:Calling model_fn.
Instructions for updating:
`NHWC` for data_format is deprecated, use `NWC` instead
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 1 into models_3/model.ckpt.
INFO:tensorflow:loss = 0.2502206, step = 1
INFO:tensorflow:global_step/sec: 10.5334
INFO:tensorflow:loss = 0.2021451, step = 101 (9.495 sec)
INFO:tensorflow:global_step/sec: 12.6659
INFO:tensorflow:loss = 0.2220306, step = 201 (7.895 sec)
INFO:tensorflow:global_step/sec: 12.5941
INFO:tensorflow:loss = 0.15368058, step = 301 (7.940 sec)
INFO:tensorflow:global_step/sec: 12.5916
INFO:tensorflow:loss = 0.19491744, step = 401 (7.942 sec)
INFO:tensorflow:global_step/sec: 12.569
INFO:tensorflow:loss = 0.17726651, step = 501 (7.956 sec)
INFO:tensorflow:global_step/sec: 12.6257
INFO:tensorflow:loss = 0.20290086, st

KeyboardInterrupt: 