## Identification of Quora Duplicates using LSTMN
(Experimatental) Continuation of DL methods explored in encode_attend.ipynb. This method attempts to improve on the attention based methods by using LSTMN to incorporate both intra-sentence and inter-sentence attention in the process of finding text similarity

### Method 3 - Long Short-Term Memory-Networks
(Code implemented based on paper Long Short-Term Memory-Networks for Machine Reading by Cheng et al. https://arxiv.org/pdf/1601.06733.pdf). 

#### load previously pickled word vectors and train and test token IDs

In [1]:
import spacy
import numpy as np

In [2]:
import pandas as pd
import numpy as np
import pickle

In [3]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

In [4]:
data_folder = '/media/siri/78C6823EC681FD1E/minio/data/dq-data/dl/'
input_folder = '/media/siri/78C6823EC681FD1E/minio/data/dq-data/'
logs_folder = '/media/siri/78C6823EC681FD1E/tf-logs/dq/'

In [6]:
X_train_f = pickle.load(open(data_folder+'X_train_f.p', 'rb'))
X_test = pickle.load(open(input_folder+'X_test.p', 'rb'))
X_valid = pickle.load(open(data_folder+'X_valid.p', 'rb'))

In [7]:
y_train_f = pickle.load(open(data_folder+'y_train_f.p', 'rb'))
y_test = pickle.load(open(input_folder+'y_test.p', 'rb'))
y_valid = pickle.load(open(data_folder+'y_valid.p', 'rb'))

In [8]:
w2v = pickle.load(open(data_folder+'w2v.p', 'rb'))
q1_train_w2v = pickle.load(open(data_folder+'q1_train_w2v.p', 'rb'))
q2_train_w2v = pickle.load(open(data_folder+'q2_train_w2v.p', 'rb'))
q1_test_w2v = pickle.load(open(data_folder+'q1_test_w2v.p', 'rb'))
q2_test_w2v = pickle.load(open(data_folder+'q2_test_w2v.p', 'rb'))
q1_valid_w2v = pickle.load(open(data_folder+'q1_valid_w2v.p', 'rb'))
q2_valid_w2v = pickle.load(open(data_folder+'q2_valid_w2v.p', 'rb'))

In [9]:
import tensorflow as tf
config = tf.ConfigProto(allow_soft_placement = True)
config.gpu_options.allow_growth = True  # dynamically grow the memory used on the GPU
config.log_device_placement = True  # to log device placement (on which device the operation ran)
                                    # (nothing gets printed in Jupyter, only if you run it standalone)

In [10]:
def create_embedding(vectors, max_length, projected_dim):
    return tf.keras.models.Sequential([
        tf.keras.layers.Embedding(
            vectors.shape[0],
            vectors.shape[1],
            input_length=max_length,
            weights=[vectors],
            trainable=False),
        
        tf.keras.layers.TimeDistributed(
            tf.keras.layers.Dense(projected_dim,
                         activation=None,
                         use_bias=False))
    ])

In [11]:
from __future__ import print_function, division
import numpy as np
from tensorflow.python import debug as tf_debug
from tensorflow.python.client import timeline

In [12]:
num_epochs = 5
state_size = 200
num_classes = 1
echo_step = 3
max_length = 50
# batch_size must be an exact factor of total_series_length
train_batch_size = 62
valid_batch_size = 16
truncated_backprop_length = max_length
train_series_length = q1_train_w2v.shape[0] - q1_train_w2v.shape[0] % train_batch_size
valid_series_length = q1_valid_w2v.shape[0] - q1_valid_w2v.shape[0] % valid_batch_size
num_train_batches = train_series_length//train_batch_size
num_valid_batches = valid_series_length//valid_batch_size

vocab_size = w2v.shape[0]
embedding_dim = w2v.shape[1]
beta = 0.01
epsilon = 1e-3

In [13]:
def generateData(series_length, num_batches, is_train):
    if is_train:
        q1 = q1_train_w2v
        q2 = q2_train_w2v
        y = y_train_f
    else:
        q1 = q1_valid_w2v
        q2 = q2_valid_w2v
        y = y_valid
        
    x1 = np.array_split(q1[:series_length], num_batches)
    x2 = np.array_split(q2[:series_length], num_batches)
    y_split = np.array_split(y.values[:series_length], num_batches)

    return (x1, x2, y_split)

In [14]:
def red_dims(projected_dim):
     return tf.keras.layers.TimeDistributed(
            tf.keras.layers.Dense(projected_dim,
                         activation=None,
                         use_bias=False))
        
def preprocess_input(X1, X2, projected_dim):
    with tf.device('/cpu:0'):
        with tf.variable_scope('embedding', reuse=tf.AUTO_REUSE):
            embed_vectors = tf.get_variable('embed_vectors', initializer=tf.constant(w2v))
            x1 = red_dims(projected_dim)(tf.nn.embedding_lookup(embed_vectors, X1))
            x2 = red_dims(projected_dim)(tf.nn.embedding_lookup(embed_vectors, X2))
            x1_b = [tf.squeeze(x, [0]) for x in 
                    tf.split(tf.transpose(x1, [1,0,2]), max_length,)]
            x2_b = [tf.squeeze(x, [0]) for x in 
                    tf.split(tf.transpose(x2, [1,0,2]), max_length,)]    
        return x1_b, x2_b

In [15]:
def batch_normalize_layer(x_t, scale, beta, epsilon):
    batch_mean, batch_var = tf.nn.moments(x_t, [0])
    return tf.nn.batch_normalization(x_t, batch_mean, batch_var, scale, beta, epsilon)

In [16]:
def split_sum_join_batches(x, axis=0):
    batch_size = tf.shape(x)[0]
    return tf.concat([tf.reduce_sum(batch, axis=0, keepdims=True) 
                      for batch in tf.split(x, batch_size,)], axis=0)

In [17]:
def att_adapted_encoder_layer(x_t, adapted_hidden_prev, hidden, compute, W, v, W_h, time_step):
    b_t = tf.concat([x_t, adapted_hidden_prev], 1)
    b_t_dot_W = tf.matmul(b_t, W)
    #b = tf.expand_dims(b_t_dot_W, 0)
    #W_h_exp = tf.expand_dims(W_h, 0)
    #v_exp = tf.expand_dims(v, 0)
    num_repeats = time_step
    if num_repeats == 0:
        num_repeats = 1
    b_arr = []
    W_h_arr = []
    v_arr = []
    for i in range(num_repeats):
        b_arr.append(b_t_dot_W)
        W_h_arr.append(W_h)
        v_arr.append(v)
    #b = tf.contrib.seq2seq.tile_batch(b, time_step)
    #W_h_exp = tf.contrib.seq2seq.tile_batch(W_h_exp, time_step)
    #v_exp = tf.contrib.seq2seq.tile_batch(v_exp, time_step)
    b_exp = tf.stack(b_arr)
    W_h_exp = tf.stack(W_h_arr)
    v_exp = tf.stack(v_arr)
    #b_t = tf.concat([b, hidden], 1)
    
    #b_t_dot_W = tf.matmul(b_t, W)

    v_t = tf.tanh(tf.matmul(tf.concat([b_exp, hidden], -1), W_h_exp))

    a_t = tf.matmul(v_t, v_exp)

    s_t = tf.nn.softmax(a_t)
    ## attention adapted hidden and compute memory for input1
    adapted_hidden_t = tf.reduce_sum(hidden*s_t, 0)

    adapted_compute_t = tf.reduce_sum(compute*s_t, 0)

    return adapted_hidden_t, adapted_compute_t

In [18]:
def inter_attention(x_t, src_adapted_hidden_t, W_inter):
    b_t = tf.concat([x_t, src_adapted_hidden_t], 1)
    b_t_dot_W = tf.matmul(b_t, W_inter)
    return tf.sigmoid(b_t_dot_W)

def compute_LSTM_gates(x_t, adapted_hidden_t, W):
    b_t = tf.concat([x_t, adapted_hidden_t], 1)
    b_t_dot_W = tf.matmul(b_t, W)
    i_t = tf.sigmoid(b_t_dot_W)
    f_t = i_t
    o_t = i_t
    compute_hat_t = tf.tanh(b_t_dot_W)
    return i_t, f_t, o_t, compute_hat_t
    
def LSTMN_encoder_layer(x_t, adapted_hidden_t, adapted_compute_t , W):
    i_t, f_t, o_t, compute_hat_t = compute_LSTM_gates(x_t, adapted_hidden_t, W)

    ## final hidden and compute memory for input1
    compute_t = f_t*adapted_compute_t + i_t*compute_hat_t
    
    hidden_t = o_t*tf.tanh(compute_t)
    
    return hidden_t, compute_t
    
def LSTMN_inter_att_encoder_layer(x_t, adapted_hidden_t, adapted_compute_t , W, 
                        src_adapted_hidden_t, src_adapted_compute_t, W_inter):
    i_t, f_t, o_t, compute_hat_t = compute_LSTM_gates(x_t, adapted_hidden_t, W)

    ## final hidden and compute memory for input1
    compute_t = f_t*adapted_compute_t + i_t*compute_hat_t
    ## inter attention between input1 and input2
    compute_t = compute_t+inter_attention(x_t, src_adapted_hidden_t, W_inter)*src_adapted_compute_t
    
    hidden_t = o_t*tf.tanh(compute_t)
    
    return hidden_t, compute_t

In [19]:
def extract_ith_state(states, i, total):
    states = states[i-1:]
    ts = tf.concat([tf.expand_dims(state, 0) for j,state in enumerate(states) if j%total == 0], axis=0)
    return ts

In [20]:
def get_prev_states(current_state, states_series):
        gamma_tilde_t1 = current_state[0]
        gamma = extract_ith_state(states=states_series, i=2, total=6)
        alpha = extract_ith_state(states=states_series, i=3, total=6)
        h_tilde_t1 = current_state[3]
        h = extract_ith_state(states=states_series, i=5, total=6)
        c = extract_ith_state(states=states_series, i=6, total=6)
        return gamma_tilde_t1, gamma, alpha, h_tilde_t1, h, c

In [21]:
def deep_attention_lstmn(x1_batch, x2_batch, epsilon, scale1, beta1, W1, v1, W1_h, W2,
                              scale2, beta2, W3, v3, W3_h, W4, W5):
        #current_state = init_state
        states_series = [] 
        for time_step, current_x in enumerate(zip(x1_batch, x2_batch)):
            current_x1, current_x2 = current_x

            # get encoded outputs of previous time-step (or initilaize if this is the first time-step)
            if len(states_series) == 0:
                init_state = tf.zeros_like(current_x1)
                init_h = tf.expand_dims(init_state, 0)
                gamma_tilde_t1 = init_state
                h_tilde_t1 = init_state
                gamma = init_h
                alpha = init_h
                h = init_h
                c = init_h
            else:
                gamma_tilde_t1, gamma, alpha, h_tilde_t1, h, c = get_prev_states(current_state, states_series)


            #### input1 attention and encoding

            ## batch normalize input1  
            current_x1_hat = batch_normalize_layer(current_x1, scale1, beta1, epsilon)

            ## input1 attention layer
            ## attention adapted hidden and compute memory for input1
            gamma_tilde_t, alpha_tilde_t = att_adapted_encoder_layer(current_x1_hat, gamma_tilde_t1, gamma, 
                                                                        alpha, W1, v1, W1_h, time_step)

            ## LSTM gates for input1
            gamma_t, alpha_t = LSTMN_encoder_layer(current_x1_hat, gamma_tilde_t, alpha_tilde_t , W2) 



            #### input2 attention and encoding (including inter-attention with input1)

            ## batch normalize input2
            current_x2_hat = batch_normalize_layer(current_x2, scale2, beta2, epsilon)

            ## input2 attention layer
            ## attention adapted hidden and compute memory for input2 
            h_tilde_t, c_tilde_t = att_adapted_encoder_layer(current_x2_hat, h_tilde_t1, h, 
                                                                        c, W3, v3, W3_h, time_step)  

            ## LSTM gates for input2 
            h_t, c_t = LSTMN_inter_att_encoder_layer(current_x2_hat, h_tilde_t, c_tilde_t , W4,
                                                         src_adapted_hidden_t=gamma_tilde_t, 
                                                         src_adapted_compute_t=alpha_tilde_t, 
                                                         W_inter=W5)        

            # store states for processing next time-step
            current_state = [gamma_tilde_t, gamma_t, alpha_t, h_tilde_t, h_t, c_t]
            for state in current_state:
                states_series.append(state)
                
        return h_t

In [22]:
def lstmn_model(x1_b, x2_b, y_b, epsilon, beta):
    # Forward pass  
    
    with tf.variable_scope("src_batch_norm_layer", reuse=tf.AUTO_REUSE): 
        scale1 = tf.get_variable(name='scale1', shape=[state_size], initializer=tf.ones_initializer(), trainable=True)
        beta1 = tf.get_variable(name='beta1', shape=[state_size], initializer=tf.zeros_initializer(), trainable=True)   
        
    with tf.variable_scope("src_att_adapted_encoder_layer", reuse=tf.AUTO_REUSE):
        W1 = tf.get_variable(name='W1', shape=[2*state_size, state_size], dtype=tf.float32, 
                             initializer=tf.random_uniform_initializer(), trainable=True)
        v1 = tf.get_variable(name='v1', shape=[state_size, 1], dtype=tf.float32, 
                             initializer=tf.random_uniform_initializer(), trainable=True)
        W1_h = tf.get_variable(name='W1_h', shape=[2*state_size, state_size], dtype=tf.float32, 
                             initializer=tf.random_uniform_initializer(), trainable=True)
        regularizer = tf.nn.l2_loss(W1) + tf.nn.l2_loss(W1_h)
        
    with tf.variable_scope("src_LSTMN_encoder_layer", reuse=tf.AUTO_REUSE):
        W2 = tf.get_variable(name='W2', shape=[2*state_size, state_size], dtype=tf.float32, 
                             initializer=tf.random_uniform_initializer(), trainable=True)
        regularizer = regularizer + tf.nn.l2_loss(W2)
        
    with tf.variable_scope("tgt_batch_norm_layer", reuse=tf.AUTO_REUSE):  
        scale2 = tf.get_variable(name='scale2', shape=[state_size], initializer=tf.ones_initializer(), trainable=True)
        beta2 = tf.get_variable(name='beta2', shape=[state_size], initializer=tf.zeros_initializer(), trainable=True) 
        
    with tf.variable_scope("tgt_att_adapted_encoder_layer", reuse=tf.AUTO_REUSE):
        W3 = tf.get_variable(name='W3', shape=[2*state_size, state_size], dtype=tf.float32, 
                             initializer=tf.random_uniform_initializer(), trainable=True)
        v3 = tf.get_variable(name='v3', shape=[state_size, 1], dtype=tf.float32, 
                             initializer=tf.random_uniform_initializer(), trainable=True)
        W3_h = tf.get_variable(name='W3_h', shape=[2*state_size, state_size], dtype=tf.float32, 
                             initializer=tf.random_uniform_initializer(), trainable=True) 
        regularizer = regularizer + tf.nn.l2_loss(W3) + tf.nn.l2_loss(W3_h)
        
    with tf.variable_scope("tgt_LSTMN_encoder_layer", reuse=tf.AUTO_REUSE):
        W4 = tf.get_variable(name='W4', shape=[2*state_size, state_size], dtype=tf.float32, 
                             initializer=tf.random_uniform_initializer(), trainable=True) 
        W5 = tf.get_variable(name='W5', shape=[2*state_size, state_size], dtype=tf.float32, 
                             initializer=tf.random_uniform_initializer(), trainable=True) 
        regularizer = regularizer + tf.nn.l2_loss(W4) + tf.nn.l2_loss(W5)
        
    h_t = deep_attention_lstmn(x1_b, x2_b, epsilon, scale1, beta1, W1, v1, W1_h, W2,
                              scale2, beta2, W3, v3, W3_h, W4, W5)
    
    with tf.variable_scope("prediction_layer", reuse=tf.AUTO_REUSE):  
        W6 = tf.get_variable(name='W6', shape=[state_size, num_classes], dtype=tf.float32, 
                             initializer=tf.random_uniform_initializer(), trainable=True) 
        b6 = tf.get_variable(name='b6', shape=[1, num_classes], dtype=tf.float32, 
                             initializer=tf.zeros_initializer(), trainable=True) 
        logits = tf.matmul(h_t, W6) + b6
        regularizer = regularizer + tf.nn.l2_loss(W6)
        
    predictions = tf.round(tf.nn.sigmoid(logits))
    
    losses = tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=y_b)      
        
    return tf.reduce_mean(losses+beta*regularizer), predictions

In [23]:
def input_fn(run):
    if run == 'train':
        ds = tf.data.Dataset.from_tensor_slices((q1_train_w2v, q2_train_w2v, y_train_f.values.reshape(-1,1)))
        ds = ds.shuffle(q1_train_w2v.shape[0]).batch(train_batch_size).prefetch(1)
    elif run == 'test':
        ds = tf.data.Dataset.from_tensor_slices((q1_test_w2v, q2_test_w2v, y_test.values.reshape(-1,1)))
        ds = ds.shuffle(q1_test_w2v.shape[0])
    else:
        ds = tf.data.Dataset.from_tensor_slices((q1_valid_w2v, q2_valid_w2v, y_valid.values.reshape(-1,1)))
        ds = ds.shuffle(q1_valid_w2v.shape[0]).batch(valid_batch_size).prefetch(1)
        
    iterator = ds.make_initializable_iterator()
    
    (q1, q2, labels) = iterator.get_next()
    init_op = iterator.initializer

    # Build and return a dictionnary containing the nodes / ops
    inputs = {
        'q1': q1,
        'q2': q2,
        'labels': labels,
        'iterator_init_op': init_op
    }
    return inputs 

In [24]:
def model_fn(mode, inputs):
    is_training = (mode == 'train')
    #run_metadata = tf.RunMetadata()
    #X1 = tf.placeholder(tf.int32, [None, max_length])
    #X2 = tf.placeholder(tf.int32, [None, max_length])
    #Y = tf.placeholder(tf.float32, [None, 1])
    #init_state = tf.placeholder(tf.float32, [None, state_size])
    #valid_inputs = input_fn('valid')
    #x1_b, x2_b = preprocess_input(X1, X2, state_size)
    x1_b, x2_b = preprocess_input(inputs['q1'], inputs['q2'], state_size)
    Y = tf.cast(inputs['labels'], tf.float32)

    with tf.device('/device:GPU:0'):   

        total_loss, predictions = lstmn_model(x1_b, x2_b, Y, epsilon, beta)
        accuracy = tf.reduce_mean(tf.cast(tf.equal(Y, predictions), tf.float32))

        # Decaying learning rate
        if is_training:
            global_step = tf.Variable(0)  # count the number of steps taken.
            start_learning_rate = 0.05
            learning_rate = tf.train.exponential_decay(start_learning_rate, global_step, 10000, 0.95, staircase=True)

            train_step = tf.train.RMSPropOptimizer(learning_rate=learning_rate, momentum=0.75)\
                        .minimize(total_loss, global_step=global_step) 


        with tf.variable_scope("metrics"):
            metrics = {
                'accuracy': tf.metrics.accuracy(labels=Y, predictions=predictions),
                'loss': tf.metrics.mean(total_loss)
            }

         # Group the update ops for the tf.metrics
        update_metrics_op = tf.group(*[op for _, op in metrics.values()])

        # Get the op to reset the local variables used in tf.metrics
        metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="metrics")
        metrics_init_op = tf.variables_initializer(metric_variables)  

        tf.summary.scalar('loss', total_loss)
        tf.summary.scalar('accuracy', accuracy)
        #summary = tf.summary.merge_all()
        variable_init_op = tf.group(*[tf.global_variables_initializer(), tf.tables_initializer()])
        model_spec = inputs
        model_spec['metrics_init_op'] = metrics_init_op
        model_spec['metrics'] = metrics
        model_spec['update_metrics'] = update_metrics_op,
        model_spec['summary_op'] = tf.summary.merge_all(),
        model_spec['variable_init_op'] = variable_init_op,
        model_spec['predictions'] = predictions,
        model_spec['loss'] = total_loss,
        model_spec['accuracy'] = accuracy
        
        if is_training:
            model_spec['train_step'] = train_step
        
        return model_spec

In [25]:
tf.reset_default_graph()
train_inputs = input_fn('train')
train_model_spec = model_fn('train', train_inputs)
eval_inputs = input_fn('eval')
eval_model_spec = model_fn('eval', eval_inputs)

In [26]:
train_model_spec

{'q1': <tf.Tensor 'IteratorGetNext:0' shape=(?, 50) dtype=int64>,
 'q2': <tf.Tensor 'IteratorGetNext:1' shape=(?, 50) dtype=int64>,
 'labels': <tf.Tensor 'IteratorGetNext:2' shape=(?, 1) dtype=int64>,
 'iterator_init_op': <tf.Operation 'MakeIterator' type=MakeIterator>,
 'metrics_init_op': <tf.Operation 'init' type=NoOp>,
 'metrics': {'accuracy': (<tf.Tensor 'metrics/accuracy/value:0' shape=() dtype=float32>,
   <tf.Tensor 'metrics/accuracy/update_op:0' shape=() dtype=float32>),
  'loss': (<tf.Tensor 'metrics/mean/value:0' shape=() dtype=float32>,
   <tf.Tensor 'metrics/mean/update_op:0' shape=() dtype=float32>)},
 'update_metrics': (<tf.Operation 'group_deps' type=NoOp>,),
 'summary_op': (<tf.Tensor 'Merge/MergeSummary:0' shape=() dtype=string>,),
 'variable_init_op': (<tf.Operation 'group_deps_1' type=NoOp>,),
 'predictions': (<tf.Tensor 'Round:0' shape=(?, 1) dtype=float32>,),
 'loss': (<tf.Tensor 'Mean:0' shape=() dtype=float32>,),
 'accuracy': <tf.Tensor 'Mean_1:0' shape=() dtype=

In [27]:
eval_model_spec

{'q1': <tf.Tensor 'IteratorGetNext_1:0' shape=(?, 50) dtype=int64>,
 'q2': <tf.Tensor 'IteratorGetNext_1:1' shape=(?, 50) dtype=int64>,
 'labels': <tf.Tensor 'IteratorGetNext_1:2' shape=(?, 1) dtype=int64>,
 'iterator_init_op': <tf.Operation 'MakeIterator_1' type=MakeIterator>,
 'metrics_init_op': <tf.Operation 'init_2' type=NoOp>,
 'metrics': {'accuracy': (<tf.Tensor 'metrics_1/accuracy/value:0' shape=() dtype=float32>,
   <tf.Tensor 'metrics_1/accuracy/update_op:0' shape=() dtype=float32>),
  'loss': (<tf.Tensor 'metrics_1/mean/value:0' shape=() dtype=float32>,
   <tf.Tensor 'metrics_1/mean/update_op:0' shape=() dtype=float32>)},
 'update_metrics': (<tf.Operation 'group_deps_2' type=NoOp>,),
 'summary_op': (<tf.Tensor 'Merge_1/MergeSummary:0' shape=() dtype=string>,),
 'variable_init_op': (<tf.Operation 'group_deps_3' type=NoOp>,),
 'predictions': (<tf.Tensor 'Round_1:0' shape=(?, 1) dtype=float32>,),
 'loss': (<tf.Tensor 'Mean_2:0' shape=() dtype=float32>,),
 'accuracy': <tf.Tensor 

In [28]:
with tf.Session(config=config) as sess:
    #sess.run(tf.initializers.global_variables(), 
    #         options=tf.RunOptions(report_tensor_allocations_upon_oom = True))
    sess.run(train_model_spec['variable_init_op'])
    #initial_state_train = np.zeros((train_batch_size, state_size), dtype=np.float32)
    #initial_state_valid = np.zeros((valid_batch_size, state_size), dtype=np.float32)
    #x1_valid,x2_valid, y_v = generateData(valid_series_length, num_valid_batches, is_train=False)
    #x1,x2, y = generateData(train_series_length, num_train_batches, is_train=True)
    #training_writer = tf.summary.FileWriter(logs_folder+'training', sess.graph)
    #validation_writer = tf.summary.FileWriter(logs_folder+'validation', sess.graph)

    for epoch_idx in range(num_epochs):
        print("New data, epoch", epoch_idx)  
        
        sess.run(train_model_spec['iterator_init_op'])
        print("Train Iterator initiated")
        sess.run(train_model_spec['metrics_init_op'])
        print("Train Metrics initiated")
        _, _ = sess.run(
                [train_model_spec['train_step'], train_model_spec['update_metrics']])
        print("Train Session train_step and update_metrics ops run")
        
        train_metrics_values = {k: v[0] for k, v in train_model_spec['metrics'].items()}
        train_metrics_val = sess.run(train_metrics_values)
        print("Train Session metrics_vals run")
        train_metrics_string = " ; ".join("{}: {:05.3f}".format(k, v) for k, v in train_metrics_val.items())
        
        
        sess.run(eval_model_spec['iterator_init_op'])
        print("Eval Iterator initiated")
        sess.run(eval_model_spec['metrics_init_op']) 
        print("Eval Metrics initiated")      
        _ = sess.run(
                eval_model_spec['update_metrics'])
        print("Eval Session update_metrics ops run")
        
        eval_metrics_values = {k: v[0] for k, v in eval_model_spec['metrics'].items()}
        eval_metrics_val = sess.run(eval_metrics_values)
        print("Eval Session metrics_vals run")
        eval_metrics_string = " ; ".join("{}: {:05.3f}".format(k, v) for k, v in eval_metrics_val.items())
        print("epoch:{} - Train: {}   Eval: {}".format(epoch_idx, train_metrics_string, eval_metrics_string))

New data, epoch 0
Train Iterator initiated
Train Metrics initiated
Train Session train_step and update_metrics ops run
Train Session metrics_vals run
Eval Iterator initiated
Eval Metrics initiated


FailedPreconditionError: Error while reading resource variable embedding_1/time_distributed_3/kernel from Container: localhost. This could mean that the variable was uninitialized. Not found: Resource localhost/embedding_1/time_distributed_3/kernel/N10tensorflow3VarE does not exist.
	 [[{{node embedding_1/time_distributed_3/MatMul/ReadVariableOp}} = ReadVariableOp[dtype=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](embedding_1/time_distributed_3/kernel)]]
	 [[{{node embedding_1/Squeeze_33/_5021}} = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device_incarnation=1, tensor_name="edge_5206_embedding_1/Squeeze_33", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]

Caused by op 'embedding_1/time_distributed_3/MatMul/ReadVariableOp', defined at:
  File "/home/siri/miniconda3/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/siri/miniconda3/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/siri/miniconda3/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/siri/miniconda3/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/siri/miniconda3/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 505, in start
    self.io_loop.start()
  File "/home/siri/miniconda3/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 132, in start
    self.asyncio_loop.run_forever()
  File "/home/siri/miniconda3/lib/python3.6/asyncio/base_events.py", line 438, in run_forever
    self._run_once()
  File "/home/siri/miniconda3/lib/python3.6/asyncio/base_events.py", line 1451, in _run_once
    handle._run()
  File "/home/siri/miniconda3/lib/python3.6/asyncio/events.py", line 145, in _run
    self._callback(*self._args)
  File "/home/siri/miniconda3/lib/python3.6/site-packages/tornado/ioloop.py", line 758, in _run_callback
    ret = callback()
  File "/home/siri/miniconda3/lib/python3.6/site-packages/tornado/stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/siri/miniconda3/lib/python3.6/site-packages/tornado/gen.py", line 1233, in inner
    self.run()
  File "/home/siri/miniconda3/lib/python3.6/site-packages/tornado/gen.py", line 1147, in run
    yielded = self.gen.send(value)
  File "/home/siri/miniconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 357, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "/home/siri/miniconda3/lib/python3.6/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/home/siri/miniconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 267, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "/home/siri/miniconda3/lib/python3.6/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/home/siri/miniconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 534, in execute_request
    user_expressions, allow_stdin,
  File "/home/siri/miniconda3/lib/python3.6/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/home/siri/miniconda3/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 294, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/siri/miniconda3/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/siri/miniconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2819, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/home/siri/miniconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2845, in _run_cell
    return runner(coro)
  File "/home/siri/miniconda3/lib/python3.6/site-packages/IPython/core/async_helpers.py", line 67, in _pseudo_sync_runner
    coro.send(None)
  File "/home/siri/miniconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3020, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/siri/miniconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3185, in run_ast_nodes
    if (yield from self.run_code(code, result)):
  File "/home/siri/miniconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3267, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-25-c62980a4060a>", line 5, in <module>
    eval_model_spec = model_fn('eval', eval_inputs)
  File "<ipython-input-24-34715537bc29>", line 10, in model_fn
    x1_b, x2_b = preprocess_input(inputs['q1'], inputs['q2'], state_size)
  File "<ipython-input-14-55ab30466b22>", line 12, in preprocess_input
    x2 = red_dims(projected_dim)(tf.nn.embedding_lookup(embed_vectors, X2))
  File "/home/siri/miniconda3/lib/python3.6/site-packages/tensorflow/python/keras/engine/base_layer.py", line 769, in __call__
    outputs = self.call(inputs, *args, **kwargs)
  File "/home/siri/miniconda3/lib/python3.6/site-packages/tensorflow/python/keras/layers/wrappers.py", line 269, in call
    y = self.layer.call(inputs, **kwargs)
  File "/home/siri/miniconda3/lib/python3.6/site-packages/tensorflow/python/keras/layers/core.py", line 947, in call
    outputs = gen_math_ops.mat_mul(inputs, self.kernel)
  File "/home/siri/miniconda3/lib/python3.6/site-packages/tensorflow/python/ops/gen_math_ops.py", line 4560, in mat_mul
    name=name)
  File "/home/siri/miniconda3/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 510, in _apply_op_helper
    preferred_dtype=default_dtype)
  File "/home/siri/miniconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1144, in internal_convert_to_tensor
    ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
  File "/home/siri/miniconda3/lib/python3.6/site-packages/tensorflow/python/ops/resource_variable_ops.py", line 1286, in _dense_var_to_tensor
    return var._dense_var_to_tensor(dtype=dtype, name=name, as_ref=as_ref)  # pylint: disable=protected-access
  File "/home/siri/miniconda3/lib/python3.6/site-packages/tensorflow/python/ops/resource_variable_ops.py", line 1241, in _dense_var_to_tensor
    return self.value()
  File "/home/siri/miniconda3/lib/python3.6/site-packages/tensorflow/python/ops/resource_variable_ops.py", line 672, in value
    return self._read_variable_op()
  File "/home/siri/miniconda3/lib/python3.6/site-packages/tensorflow/python/ops/resource_variable_ops.py", line 755, in _read_variable_op
    self._dtype)
  File "/home/siri/miniconda3/lib/python3.6/site-packages/tensorflow/python/ops/gen_resource_variable_ops.py", line 508, in read_variable_op
    "ReadVariableOp", resource=resource, dtype=dtype, name=name)
  File "/home/siri/miniconda3/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/home/siri/miniconda3/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py", line 488, in new_func
    return func(*args, **kwargs)
  File "/home/siri/miniconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3272, in create_op
    op_def=op_def)
  File "/home/siri/miniconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1768, in __init__
    self._traceback = tf_stack.extract_stack()

FailedPreconditionError (see above for traceback): Error while reading resource variable embedding_1/time_distributed_3/kernel from Container: localhost. This could mean that the variable was uninitialized. Not found: Resource localhost/embedding_1/time_distributed_3/kernel/N10tensorflow3VarE does not exist.
	 [[{{node embedding_1/time_distributed_3/MatMul/ReadVariableOp}} = ReadVariableOp[dtype=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](embedding_1/time_distributed_3/kernel)]]
	 [[{{node embedding_1/Squeeze_33/_5021}} = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device_incarnation=1, tensor_name="edge_5206_embedding_1/Squeeze_33", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]


In [None]:
        #for batch_idx in range(num_train_batches):
            
        #    _total_loss_train, _predictions_train, _train_step, _training_summary = sess.run(
        #        [total_loss, predictions, train_step, summary], 
        #        feed_dict={
        #            X1:x1[batch_idx],
        #            X2:x2[batch_idx],
        #            Y:y[batch_idx].reshape(-1,1)
        #            #init_state:initial_state_train
        #        })   

        #    if batch_idx%100 == 0:
        #        loss_list = []
        #        preds_list = []
        #        for valid_batch_idx in range(num_valid_batches):
        #            _total_loss_valid, _preds_valid, _valid_summary = sess.run(
        #            [total_loss, predictions, summary], 
        #            feed_dict={
        #                X1:x1_valid[valid_batch_idx],
        #                X2:x2_valid[valid_batch_idx],
        #                Y:y_v[valid_batch_idx].reshape(-1,1)
        #                #init_state:initial_state_valid
        #            }) 
        #            loss_list.append(_total_loss_valid)
        #            preds_list.append(_preds_valid)
        #        valid_loss = np.mean(np.array(loss_list))
        #        valid_preds = np.concatenate(preds_list, axis=None)
        #        print("epoch:{} step:{} - batch-loss:{:.3f}  batch-accuracy:{:.2f} valid-loss:{:.3f} valid-accuracy:{:.2f}".\
        #              format(epoch_idx, batch_idx, 
        #                     _total_loss_train, 
        #                     accuracy_score(y[batch_idx], _predictions_train), 
        #                     valid_loss,
        #                     accuracy_score(y_valid.values, valid_preds)))
        #        training_writer.add_summary(_training_summary, batch_idx)
        #        validation_writer.add_summary(_valid_summary, batch_idx)

New data, epoch 0
