In [28]:
import tensorflow as tf

from tensorflow.python.keras._impl.keras.preprocessing.text import Tokenizer
from tensorflow.python.keras._impl.keras.preprocessing.sequence import pad_sequences
from tensorflow.python.keras._impl.keras.utils.data_utils import get_file

import os
from datetime import datetime

# from tensorflow.python.keras._impl.keras.preprocessing import sequence
from tensorflow.python.keras._impl.keras.layers import Embedding
# from tensorflow.python.keras._impl.keras.layers import Reshape, Flatten, Dropout, Concatenate, dot, add

# from tensorflow.python.keras._impl.keras.optimizers import Adam
# from tensorflow.python.keras._impl.keras.models import Model
# from tensorflow.python.keras._impl.keras.layers import LSTM
# from tensorflow.python import keras
# from tensorflow.python.keras._impl.keras.layers.wrappers import TimeDistributed, Bidirectional

import numpy as np
import json
from sklearn.model_selection import train_test_split

# Initial global var

In [16]:
Q1_TRAINING_DATA_FILE = 'q1_train.npy'
Q2_TRAINING_DATA_FILE = 'q2_train.npy'
LABEL_TRAINING_DATA_FILE = 'label_train.npy'
WORD_EMBEDDING_MATRIX_FILE = 'word_embedding_matrix.npy'
NB_WORDS_DATA_FILE = 'nb_words.json'
MODEL_WEIGHTS_FILE = 'question_pairs_weights.h5'
SENT_EMB_DIM = 50
MAX_SEQ_LEN = 25
EMBEDDING_DIM = 50
VALIDATION_SPLIT = 0.1
TEST_SPLIT = 0.1
RNG_SEED = 13371447
NB_EPOCHS = 25
DROPOUT = 0.1
BATCH_SIZE = 32
DATA_PATH = './data/'

# Load Dataset

In [3]:
q1_data = np.load(open(DATA_PATH + Q1_TRAINING_DATA_FILE, 'rb'))
q2_data = np.load(open(DATA_PATH + Q2_TRAINING_DATA_FILE, 'rb'))
labels = np.load(open(DATA_PATH + LABEL_TRAINING_DATA_FILE, 'rb'))
word_embedding_matrix = np.load(open(DATA_PATH+WORD_EMBEDDING_MATRIX_FILE, 'rb'))
with open(DATA_PATH+NB_WORDS_DATA_FILE, 'r') as f:
    nb_words = json.load(f)['nb_words']

# Split train and test dataset

In [7]:
X = np.stack((q1_data, q2_data), axis=1)
y = labels
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=TEST_SPLIT, random_state=RNG_SEED)
Q1_train = X_train[:,0]
Q2_train = X_train[:,1]
Q1_test = X_test[:,0]
Q2_test = X_test[:,1]

In [17]:
def rearrange(q, sim_q, labels):
    features = {"q": q, "sim_q": sim_q}
    return features, labels

def train_input_fn():
    dataset = tf.data.Dataset.from_tensor_slices((Q1_train, Q2_train, y_train))
    dataset = dataset.shuffle(buffer_size=len(Q1_train))
    dataset = dataset.batch(16)
    dataset = dataset.map(rearrange)
    dataset = dataset.repeat()
    iterator = dataset.make_one_shot_iterator()
    
    return iterator.get_next()

def test_input_fn():
    dataset = tf.data.Dataset.from_tensor_slices((Q1_test, Q2_test, y_test))
    dataset = dataset.shuffle(buffer_size=100)
    dataset = dataset.batch(16)
    dataset = dataset.map(rearrange)
    dataset = dataset.repeat()
    iterator = dataset.make_one_shot_iterator()
    
    return iterator.get_next()

In [26]:
def RNN_attention(features, labels, mode):
    
    TRAIN = mode == tf.estimator.ModeKeys.TRAIN
    EVAL = mode == tf.estimator.ModeKeys.EVAL
    PREDICT = mode == tf.estimator.ModeKeys.PREDICT

    def q_emb(sent_input):
        q_emb_layer = Embedding(nb_words + 1,
                                EMBEDDING_DIM,
                                weights=[word_embedding_matrix],
                                input_length=MAX_SEQ_LEN,
                                trainable=False,
                                name='query_embedding')
        
        return q_emb_layer(features['q'])

    def sim_q_emb(sent_input):
        q_emb_layer = Embedding(nb_words + 1,
                                EMBEDDING_DIM,
                                weights=[word_embedding_matrix],
                                input_length=MAX_SEQ_LEN,
                                trainable=False,
                                name='sim_query_embedding')
        
        return q_emb_layer(features['sim_q'])
    
    q_encoded = q_emb(features['q'])
    sim_q_encoded = sim_q_emb(features['sim_q'])
    
    with tf.variable_scope('q_bi_gru'):
        gru_fw_cell = tf.contrib.rnn.GRUCell(num_units = 64, activation = tf.nn.tanh)
        gru_bw_cell = tf.contrib.rnn.GRUCell(num_units = 64, activation = tf.nn.tanh)
        _, output_states = tf.nn.bidirectional_dynamic_rnn(cell_fw = gru_fw_cell,
                                                          cell_bw = gru_bw_cell,
                                                          inputs = q_encoded,
                                                          dtype = tf.float32)

        q_final_state = tf.concat([output_states[0], output_states[1]], axis=1)
        q_dense_layer = tf.layers.dense(q_final_state, 128, name='q_dense')
        
    with tf.variable_scope('sim_q_bi_gru'):
        gru_fw_cell = tf.contrib.rnn.GRUCell(num_units = 64, activation = tf.nn.tanh)
        gru_bw_cell = tf.contrib.rnn.GRUCell(num_units = 64, activation = tf.nn.tanh)
        _, output_states = tf.nn.bidirectional_dynamic_rnn(cell_fw = gru_fw_cell,
                                                          cell_bw = gru_bw_cell,
                                                          inputs = sim_q_encoded,
                                                          dtype = tf.float32)
        
        sim_q_final_state = tf.concat([output_states[0], output_states[1]], axis=1)
        sim_q_dense_layer = tf.layers.dense(sim_q_final_state, 128, name='sim_q_dense')
        
    with tf.variable_scope('output_layer'):
        
        merged_matrix = tf.concat([q_dense_layer, sim_q_dense_layer], -1)
        dense_layer = tf.layers.dense(merged_matrix, 64, activation=tf.nn.tanh, name='dense_concat')
        output = tf.layers.dense(inputs=dense_layer, units=1, activation=tf.nn.sigmoid, name='dense_final')
        print(output)
        logit_layer = tf.squeeze(output, 1)
        print(logit_layer)
    
    if TRAIN:
    
        global_step = tf.train.get_global_step()        
        loss = tf.losses.mean_squared_error(labels, output)
        tf.summary.scalar('loss', loss)

        train_op = tf.train.AdamOptimizer(1e-4).minimize(loss, global_step)

        return tf.estimator.EstimatorSpec(mode=mode, train_op=train_op, loss = loss)
    
    elif EVAL:
        loss = tf.losses.mean_squared_error(labels, output)
        pred = tf.nn.sigmoid(output)
        accuracy = tf.metrics.accuracy(labels, tf.round(pred))
        return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops={'acc': accuracy})
        
    elif PREDICT:
        return tf.estimator.EstimatorSpec(
            mode=mode,
            predictions={
                'prob': output,
                'q_sem': q_dense_layer,
                'sim_q_sem': sim_q_dense_layer
            }
        )

In [24]:
model_dir = os.path.join(os.getcwd(), "data/checkpoint/")
os.makedirs(model_dir, exist_ok=True)

config_tf = tf.estimator.RunConfig()
config_tf._save_checkpoints_steps = 100
config_tf._save_checkpoints_secs = None
config_tf._keep_checkpoint_max =  2
config_tf._log_step_count_steps = 100

In [29]:
time_start = datetime.now()

# validation_monitor = tf.contrib

print("Experiment started at {}".format(time_start.strftime("%H:%M:%S")))
print(".......................................") 

tf.logging.set_verbosity(tf.logging.INFO)
print(tf.__version__)

train_spec = tf.estimator.TrainSpec(train_input_fn, max_steps=1000)
# eval_spec = tf.estimator.EvalSpec(eval_input_fn, steps=100)

dssm_est = tf.estimator.Estimator(RNN_attention, model_dir=model_dir, config=config_tf)
dssm_est.train(train_input_fn, steps=10)

# tf.estimator.train_and_evaluate(dssm_est, train_spec, eval_spec)

# dssm_est.train(train_input_fn, steps=30000)
# For prediction or extract values
# prediction = est.predict(eval_input_fn)

# for i, p in enumerate(prediction):
#     print(i, p['sim_q_sem'])

time_end = datetime.now()

print(".......................................")
print("Experiment finished at {}".format(time_end.strftime("%H:%M:%S")))
print("")
time_elapsed = time_end - time_start
print("Experiment elapsed time: {} seconds".format(time_elapsed.total_seconds()))

Experiment started at 19:17:33
.......................................
1.8.0
INFO:tensorflow:Using config: {'_model_dir': '/Users/user/git/DeepNLP/Kaggle/QuoraQuestionPairs/data/checkpoint/', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 100, '_save_checkpoints_secs': None, '_session_config': None, '_keep_checkpoint_max': 2, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x102dd5b70>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.
Tensor("output_layer/dense_final/Sigmoid:0", shape=(?, 1), dtype=float32)
Tensor("output_layer/Squeeze:0", shape=(?,), dtype=float32)


ValueError: Shapes (?, 1) and (?,) are incompatible

In [None]:
https://github.com/bradleypallen/keras-quora-question-pairs/blob/master/quora-question-pairs-data-prep.ipynb
https://github.com/Smerity/keras_snli
https://github.com/bradleypallen/keras-quora-question-pairs/blob/master/quora-question-pairs-training-attention.ipynb
https://github.com/bradleypallen/keras-quora-question-pairs/blob/master/quora-question-pairs-training.ipynb
http://www.wildml.com/2016/01/attention-and-memory-in-deep-learning-and-nlp/
