In [1]:
# !wget http://qim.fs.quoracdn.net/quora_duplicate_questions.tsv

In [2]:
import tensorflow as tf
import re
import numpy as np
import pandas as pd
from tqdm import tqdm
import collections
from unidecode import unidecode
from sklearn.cross_validation import train_test_split



In [3]:
def build_dataset(words, n_words):
    count = [['PAD', 0], ['GO', 1], ['EOS', 2], ['UNK', 3]]
    count.extend(collections.Counter(words).most_common(n_words - 1))
    dictionary = dict()
    for word, _ in count:
        dictionary[word] = len(dictionary)
    data = list()
    unk_count = 0
    for word in words:
        index = dictionary.get(word, 0)
        if index == 0:
            unk_count += 1
        data.append(index)
    count[0][1] = unk_count
    reversed_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
    return data, count, dictionary, reversed_dictionary

def str_idx(corpus, dic, maxlen, UNK=3):
    X = np.zeros((len(corpus),maxlen))
    for i in range(len(corpus)):
        for no, k in enumerate(corpus[i][:maxlen][::-1]):
            val = dic[k] if k in dic else UNK
            X[i,-1 - no]= val
    return X

def cleaning(string):
    string = unidecode(string).replace('.', ' . ').replace(',', ' , ')
    string = re.sub('[^A-Za-z\- ]+', ' ', string)
    string = re.sub(r'[ ]+', ' ', string).strip()
    return string.lower()

In [4]:
df = pd.read_csv('quora_duplicate_questions.tsv', delimiter='\t').dropna()
df.head()

Unnamed: 0,id,qid1,qid2,question1,question2,is_duplicate
0,0,1,2,What is the step by step guide to invest in sh...,What is the step by step guide to invest in sh...,0
1,1,3,4,What is the story of Kohinoor (Koh-i-Noor) Dia...,What would happen if the Indian government sto...,0
2,2,5,6,How can I increase the speed of my internet co...,How can Internet speed be increased by hacking...,0
3,3,7,8,Why am I mentally very lonely? How can I solve...,Find the remainder when [math]23^{24}[/math] i...,0
4,4,9,10,"Which one dissolve in water quikly sugar, salt...",Which fish would survive in salt water?,0


In [5]:
left, right, label = df['question1'].tolist(), df['question2'].tolist(), df['is_duplicate'].tolist()

In [6]:
np.unique(label, return_counts = True)

(array([0, 1]), array([255024, 149263]))

In [7]:
for i in tqdm(range(len(left))):
    left[i] = cleaning(left[i])
    right[i] = cleaning(right[i])

100%|██████████| 404287/404287 [00:07<00:00, 53664.30it/s]


In [8]:
concat = ' '.join(left + right).split()
vocabulary_size = len(list(set(concat)))
data, count, dictionary, rev_dictionary = build_dataset(concat, vocabulary_size)
print('vocab from size: %d'%(vocabulary_size))
print('Most common words', count[4:10])
print('Sample data', data[:10], [rev_dictionary[i] for i in data[:10]])

vocab from size: 87661
Most common words [('the', 377593), ('what', 324635), ('is', 269934), ('i', 223893), ('how', 220876), ('a', 212757)]
Sample data [5, 6, 4, 1285, 62, 1285, 2501, 10, 564, 11] ['what', 'is', 'the', 'step', 'by', 'step', 'guide', 'to', 'invest', 'in']


In [9]:
def position_encoding(inputs):
    T = tf.shape(inputs)[1]
    repr_dim = inputs.get_shape()[-1].value
    pos = tf.reshape(tf.range(0.0, tf.to_float(T), dtype=tf.float32), [-1, 1])
    i = np.arange(0, repr_dim, 2, np.float32)
    denom = np.reshape(np.power(10000.0, i / repr_dim), [1, -1])
    enc = tf.expand_dims(tf.concat([tf.sin(pos / denom), tf.cos(pos / denom)], 1), 0)
    return tf.tile(enc, [tf.shape(inputs)[0], 1, 1])

def layer_norm(inputs, epsilon=1e-8):
    mean, variance = tf.nn.moments(inputs, [-1], keep_dims=True)
    normalized = (inputs - mean) / (tf.sqrt(variance + epsilon))
    params_shape = inputs.get_shape()[-1:]
    gamma = tf.get_variable('gamma', params_shape, tf.float32, tf.ones_initializer())
    beta = tf.get_variable('beta', params_shape, tf.float32, tf.zeros_initializer())
    return gamma * normalized + beta

def self_attention(inputs, is_training, num_units, num_heads = 8, activation=None):
    T_q = T_k = tf.shape(inputs)[1]
    Q_K_V = tf.layers.dense(inputs, 3*num_units, activation)
    Q, K, V = tf.split(Q_K_V, 3, -1)
    Q_ = tf.concat(tf.split(Q, num_heads, axis=2), 0)
    K_ = tf.concat(tf.split(K, num_heads, axis=2), 0)
    V_ = tf.concat(tf.split(V, num_heads, axis=2), 0)
    align = tf.matmul(Q_, K_, transpose_b=True)
    align *= tf.rsqrt(tf.to_float(K_.get_shape()[-1].value))
    paddings = tf.fill(tf.shape(align), float('-inf'))
    lower_tri = tf.ones([T_q, T_k])
    lower_tri = tf.linalg.LinearOperatorLowerTriangular(lower_tri).to_dense()
    masks = tf.tile(tf.expand_dims(lower_tri,0), [tf.shape(align)[0],1,1])
    align = tf.where(tf.equal(masks, 0), paddings, align)
    align = tf.nn.softmax(align)
    align = tf.layers.dropout(align, 0.1, training=is_training) 
    x = tf.matmul(align, V_)
    x = tf.concat(tf.split(x, num_heads, axis=0), 2)
    x += inputs
    x = layer_norm(x)
    return x

def ffn(inputs, hidden_dim, activation=tf.nn.relu):
    x = tf.layers.conv1d(inputs, 4* hidden_dim, 1, activation=activation) 
    x = tf.layers.conv1d(x, hidden_dim, 1, activation=None)
    x += inputs
    x = layer_norm(x)
    return x

class Model:
    def __init__(self, size_layer, num_layers, embedded_size,
                 dict_size, learning_rate, dropout, kernel_size = 5):
        
        def cnn(x, scope):
            x += position_encoding(x)
            with tf.variable_scope(scope, reuse = tf.AUTO_REUSE):
                for n in range(num_layers):
                    with tf.variable_scope('attn_%d'%i,reuse=tf.AUTO_REUSE):
                        x = self_attention(x, True, size_layer)
                    with tf.variable_scope('ffn_%d'%i, reuse=tf.AUTO_REUSE):
                        x = ffn(x, size_layer)
                
                with tf.variable_scope('logits', reuse=tf.AUTO_REUSE):
                    return tf.layers.dense(x, size_layer)[:, -1]
        
        self.X_left = tf.placeholder(tf.int32, [None, None])
        self.X_right = tf.placeholder(tf.int32, [None, None])
        self.Y = tf.placeholder(tf.float32, [None])
        self.batch_size = tf.shape(self.X_left)[0]
        encoder_embeddings = tf.Variable(tf.random_uniform([dict_size, embedded_size], -1, 1))
        embedded_left = tf.nn.embedding_lookup(encoder_embeddings, self.X_left)
        embedded_right = tf.nn.embedding_lookup(encoder_embeddings, self.X_right)
        
        def contrastive_loss(y,d):
            tmp= y * tf.square(d)
            tmp2 = (1-y) * tf.square(tf.maximum((1 - d),0))
            return tf.reduce_sum(tmp +tmp2)/tf.cast(self.batch_size,tf.float32)/2
        
        self.output_left = cnn(embedded_left, 'left')
        self.output_right = cnn(embedded_right, 'right')
        print(self.output_left, self.output_right)
        self.distance = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(self.output_left,self.output_right)),
                                              1,keep_dims=True))
        self.distance = tf.div(self.distance, tf.add(tf.sqrt(tf.reduce_sum(tf.square(self.output_left),
                                                                           1,keep_dims=True)),
                                                     tf.sqrt(tf.reduce_sum(tf.square(self.output_right),
                                                                           1,keep_dims=True))))
        self.distance = tf.reshape(self.distance, [-1])
        self.cost = contrastive_loss(self.Y,self.distance)
        
        self.temp_sim = tf.subtract(tf.ones_like(self.distance),
                                    tf.rint(self.distance))
        correct_predictions = tf.equal(self.temp_sim, self.Y)
        self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"))
        self.optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(self.cost)

In [10]:
size_layer = 128
num_layers = 4
embedded_size = 128
learning_rate = 1e-4
maxlen = 50
batch_size = 128
dropout = 0.8

In [11]:
from sklearn.cross_validation import train_test_split

vectors_left = str_idx(left, dictionary, maxlen)
vectors_right = str_idx(right, dictionary, maxlen)
train_X_left, test_X_left, train_X_right, test_X_right, train_Y, test_Y = train_test_split(vectors_left,
                                                                                           vectors_right,
                                                                                           label,
                                                                                           test_size = 0.2)

In [12]:
tf.reset_default_graph()
sess = tf.InteractiveSession()
model = Model(size_layer,num_layers,embedded_size,len(dictionary),learning_rate,dropout)
sess.run(tf.global_variables_initializer())

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Use keras.layers.dense instead.
Instructions for updating:
Use keras.layers.dropout instead.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use keras.layers.conv1d instead.
Tensor("left/logits/strided_slice:0", shape=(?, 128), dtype=float32) Tensor("right/logits/strided_slice:0", shape=(?, 128), dtype=float32)
Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
Instructions for updating:
Use tf.cast instead.


In [13]:
import time

EARLY_STOPPING, CURRENT_CHECKPOINT, CURRENT_ACC, EPOCH = 3, 0, 0, 0

while True:
    lasttime = time.time()
    if CURRENT_CHECKPOINT == EARLY_STOPPING:
        print('break epoch:%d\n' % (EPOCH))
        break

    train_acc, train_loss, test_acc, test_loss = 0, 0, 0, 0
    pbar = tqdm(range(0, len(train_X_left), batch_size), desc='train minibatch loop')
    for i in pbar:
        batch_x_left = train_X_left[i:min(i+batch_size,train_X_left.shape[0])]
        batch_x_right = train_X_right[i:min(i+batch_size,train_X_left.shape[0])]
        batch_y = train_Y[i:min(i+batch_size,train_X_left.shape[0])]
        acc, loss, _ = sess.run([model.accuracy, model.cost, model.optimizer], 
                           feed_dict = {model.X_left : batch_x_left, 
                                        model.X_right: batch_x_right,
                                        model.Y : batch_y})
        assert not np.isnan(loss)
        train_loss += loss
        train_acc += acc
        pbar.set_postfix(cost=loss, accuracy = acc)
    
    pbar = tqdm(range(0, len(test_X_left), batch_size), desc='test minibatch loop')
    for i in pbar:
        batch_x_left = test_X_left[i:min(i+batch_size,test_X_left.shape[0])]
        batch_x_right = test_X_right[i:min(i+batch_size,test_X_left.shape[0])]
        batch_y = test_Y[i:min(i+batch_size,test_X_left.shape[0])]
        acc, loss = sess.run([model.accuracy, model.cost], 
                           feed_dict = {model.X_left : batch_x_left, 
                                        model.X_right: batch_x_right,
                                        model.Y : batch_y})
        test_loss += loss
        test_acc += acc
        pbar.set_postfix(cost=loss, accuracy = acc)
    
    train_loss /= (len(train_X_left) / batch_size)
    train_acc /= (len(train_X_left) / batch_size)
    test_loss /= (len(test_X_left) / batch_size)
    test_acc /= (len(test_X_left) / batch_size)
    
    if test_acc > CURRENT_ACC:
        print(
            'epoch: %d, pass acc: %f, current acc: %f'
            % (EPOCH, CURRENT_ACC, test_acc)
        )
        CURRENT_ACC = test_acc
        CURRENT_CHECKPOINT = 0
    else:
        CURRENT_CHECKPOINT += 1
    
    print('time taken:', time.time()-lasttime)
    print('epoch: %d, training loss: %f, training acc: %f, valid loss: %f, valid acc: %f\n'%(EPOCH,train_loss,
                                                                                          train_acc,test_loss,
                                                                                          test_acc))

train minibatch loop: 100%|██████████| 2527/2527 [01:41<00:00, 25.12it/s, accuracy=0.693, cost=0.1]   
test minibatch loop: 100%|██████████| 632/632 [00:09<00:00, 65.48it/s, accuracy=0.711, cost=0.096] 
train minibatch loop:   0%|          | 3/2527 [00:00<01:40, 25.16it/s, accuracy=0.703, cost=0.101] 

epoch: 0, pass acc: 0.000000, current acc: 0.685201
time taken: 111.32214426994324
epoch: 0, training loss: 0.106726, training acc: 0.669383, valid loss: 0.103184, valid acc: 0.685201



train minibatch loop: 100%|██████████| 2527/2527 [01:40<00:00, 25.08it/s, accuracy=0.733, cost=0.0915]
test minibatch loop: 100%|██████████| 632/632 [00:09<00:00, 67.03it/s, accuracy=0.722, cost=0.0919]
train minibatch loop:   0%|          | 3/2527 [00:00<01:41, 24.90it/s, accuracy=0.688, cost=0.104] 

epoch: 0, pass acc: 0.685201, current acc: 0.701866
time taken: 110.18735837936401
epoch: 0, training loss: 0.100379, training acc: 0.691623, valid loss: 0.098808, valid acc: 0.701866



train minibatch loop: 100%|██████████| 2527/2527 [01:40<00:00, 25.11it/s, accuracy=0.733, cost=0.0892]
test minibatch loop: 100%|██████████| 632/632 [00:09<00:00, 67.03it/s, accuracy=0.678, cost=0.095] 
train minibatch loop:   0%|          | 3/2527 [00:00<01:39, 25.28it/s, accuracy=0.711, cost=0.0951]

epoch: 0, pass acc: 0.701866, current acc: 0.712456
time taken: 110.06335616111755
epoch: 0, training loss: 0.096448, training acc: 0.707221, valid loss: 0.096495, valid acc: 0.712456



train minibatch loop: 100%|██████████| 2527/2527 [01:40<00:00, 25.09it/s, accuracy=0.743, cost=0.0927]
test minibatch loop: 100%|██████████| 632/632 [00:09<00:00, 66.97it/s, accuracy=0.644, cost=0.0971]
train minibatch loop:   0%|          | 3/2527 [00:00<01:39, 25.36it/s, accuracy=0.719, cost=0.0931]

epoch: 0, pass acc: 0.712456, current acc: 0.715025
time taken: 110.16492295265198
epoch: 0, training loss: 0.093926, training acc: 0.717781, valid loss: 0.095615, valid acc: 0.715025



train minibatch loop: 100%|██████████| 2527/2527 [01:40<00:00, 25.03it/s, accuracy=0.752, cost=0.0877]
test minibatch loop: 100%|██████████| 632/632 [00:09<00:00, 66.98it/s, accuracy=0.678, cost=0.097] 
train minibatch loop:   0%|          | 3/2527 [00:00<01:41, 24.84it/s, accuracy=0.688, cost=0.0955]

epoch: 0, pass acc: 0.715025, current acc: 0.721843
time taken: 110.38844656944275
epoch: 0, training loss: 0.092020, training acc: 0.726040, valid loss: 0.094243, valid acc: 0.721843



train minibatch loop: 100%|██████████| 2527/2527 [01:40<00:00, 25.11it/s, accuracy=0.723, cost=0.0882]
test minibatch loop: 100%|██████████| 632/632 [00:09<00:00, 67.09it/s, accuracy=0.667, cost=0.0952]
train minibatch loop:   0%|          | 3/2527 [00:00<01:41, 24.93it/s, accuracy=0.75, cost=0.0906] 

epoch: 0, pass acc: 0.721843, current acc: 0.722270
time taken: 110.06278610229492
epoch: 0, training loss: 0.090355, training acc: 0.733065, valid loss: 0.093710, valid acc: 0.722270



train minibatch loop: 100%|██████████| 2527/2527 [01:40<00:00, 25.03it/s, accuracy=0.752, cost=0.086] 
test minibatch loop: 100%|██████████| 632/632 [00:09<00:00, 66.96it/s, accuracy=0.7, cost=0.0953]  
train minibatch loop:   0%|          | 3/2527 [00:00<01:41, 24.94it/s, accuracy=0.742, cost=0.0918]

epoch: 0, pass acc: 0.722270, current acc: 0.725934
time taken: 110.40167164802551
epoch: 0, training loss: 0.088796, training acc: 0.739814, valid loss: 0.092955, valid acc: 0.725934



train minibatch loop: 100%|██████████| 2527/2527 [01:40<00:00, 25.15it/s, accuracy=0.762, cost=0.0806]
test minibatch loop: 100%|██████████| 632/632 [00:09<00:00, 67.26it/s, accuracy=0.689, cost=0.096] 
train minibatch loop:   0%|          | 3/2527 [00:00<01:41, 24.84it/s, accuracy=0.781, cost=0.0892]

time taken: 109.86811327934265
epoch: 0, training loss: 0.087358, training acc: 0.746224, valid loss: 0.092556, valid acc: 0.725335



train minibatch loop: 100%|██████████| 2527/2527 [01:40<00:00, 25.04it/s, accuracy=0.762, cost=0.0808]
test minibatch loop: 100%|██████████| 632/632 [00:09<00:00, 67.34it/s, accuracy=0.7, cost=0.0938]  
train minibatch loop:   0%|          | 3/2527 [00:00<01:38, 25.63it/s, accuracy=0.805, cost=0.0879]

epoch: 0, pass acc: 0.725934, current acc: 0.729039
time taken: 110.31477642059326
epoch: 0, training loss: 0.085995, training acc: 0.751777, valid loss: 0.091761, valid acc: 0.729039



train minibatch loop: 100%|██████████| 2527/2527 [01:40<00:00, 25.26it/s, accuracy=0.743, cost=0.0775]
test minibatch loop: 100%|██████████| 632/632 [00:09<00:00, 67.20it/s, accuracy=0.722, cost=0.0949]
train minibatch loop:   0%|          | 3/2527 [00:00<01:40, 25.17it/s, accuracy=0.727, cost=0.0899]

epoch: 0, pass acc: 0.729039, current acc: 0.730447
time taken: 109.4636116027832
epoch: 0, training loss: 0.084593, training acc: 0.756880, valid loss: 0.091620, valid acc: 0.730447



train minibatch loop: 100%|██████████| 2527/2527 [01:40<00:00, 25.15it/s, accuracy=0.792, cost=0.0763]
test minibatch loop: 100%|██████████| 632/632 [00:09<00:00, 66.96it/s, accuracy=0.711, cost=0.0971]
train minibatch loop:   0%|          | 3/2527 [00:00<01:39, 25.33it/s, accuracy=0.781, cost=0.0882]

epoch: 0, pass acc: 0.730447, current acc: 0.732334
time taken: 109.93308997154236
epoch: 0, training loss: 0.083287, training acc: 0.762669, valid loss: 0.091151, valid acc: 0.732334



train minibatch loop: 100%|██████████| 2527/2527 [01:40<00:00, 25.26it/s, accuracy=0.772, cost=0.0729]
test minibatch loop: 100%|██████████| 632/632 [00:09<00:00, 67.32it/s, accuracy=0.678, cost=0.098] 
train minibatch loop:   0%|          | 3/2527 [00:00<01:39, 25.40it/s, accuracy=0.781, cost=0.0819]

epoch: 0, pass acc: 0.732334, current acc: 0.732491
time taken: 109.41248917579651
epoch: 0, training loss: 0.082038, training acc: 0.767324, valid loss: 0.090638, valid acc: 0.732491



train minibatch loop: 100%|██████████| 2527/2527 [01:40<00:00, 25.21it/s, accuracy=0.772, cost=0.0769]
test minibatch loop: 100%|██████████| 632/632 [00:09<00:00, 67.24it/s, accuracy=0.711, cost=0.0949]
train minibatch loop:   0%|          | 3/2527 [00:00<01:38, 25.54it/s, accuracy=0.781, cost=0.0809]

epoch: 0, pass acc: 0.732491, current acc: 0.734844
time taken: 109.63890266418457
epoch: 0, training loss: 0.080769, training acc: 0.772957, valid loss: 0.090315, valid acc: 0.734844



train minibatch loop: 100%|██████████| 2527/2527 [01:40<00:00, 25.16it/s, accuracy=0.822, cost=0.0687]
test minibatch loop: 100%|██████████| 632/632 [00:09<00:00, 67.61it/s, accuracy=0.744, cost=0.0907]
train minibatch loop:   0%|          | 3/2527 [00:00<01:39, 25.38it/s, accuracy=0.781, cost=0.0854]

time taken: 109.79329133033752
epoch: 0, training loss: 0.079631, training acc: 0.777117, valid loss: 0.090068, valid acc: 0.734180



train minibatch loop: 100%|██████████| 2527/2527 [01:40<00:00, 25.25it/s, accuracy=0.822, cost=0.0702]
test minibatch loop: 100%|██████████| 632/632 [00:09<00:00, 67.38it/s, accuracy=0.722, cost=0.091] 
train minibatch loop:   0%|          | 3/2527 [00:00<01:40, 25.05it/s, accuracy=0.781, cost=0.0819]

epoch: 0, pass acc: 0.734844, current acc: 0.735022
time taken: 109.46223187446594
epoch: 0, training loss: 0.078417, training acc: 0.781514, valid loss: 0.089608, valid acc: 0.735022



train minibatch loop: 100%|██████████| 2527/2527 [01:40<00:00, 25.14it/s, accuracy=0.782, cost=0.0686]
test minibatch loop: 100%|██████████| 632/632 [00:09<00:00, 66.88it/s, accuracy=0.711, cost=0.0945]
train minibatch loop:   0%|          | 3/2527 [00:00<01:40, 25.15it/s, accuracy=0.75, cost=0.0856] 

epoch: 0, pass acc: 0.735022, current acc: 0.737936
time taken: 109.98049426078796
epoch: 0, training loss: 0.077204, training acc: 0.786631, valid loss: 0.089129, valid acc: 0.737936



train minibatch loop: 100%|██████████| 2527/2527 [01:39<00:00, 25.27it/s, accuracy=0.792, cost=0.0682]
test minibatch loop: 100%|██████████| 632/632 [00:09<00:00, 67.62it/s, accuracy=0.722, cost=0.0938]
train minibatch loop:   0%|          | 3/2527 [00:00<01:38, 25.51it/s, accuracy=0.836, cost=0.0775]

epoch: 0, pass acc: 0.737936, current acc: 0.739277
time taken: 109.33117318153381
epoch: 0, training loss: 0.076121, training acc: 0.790172, valid loss: 0.089027, valid acc: 0.739277



train minibatch loop: 100%|██████████| 2527/2527 [01:40<00:00, 25.23it/s, accuracy=0.832, cost=0.067] 
test minibatch loop: 100%|██████████| 632/632 [00:09<00:00, 67.35it/s, accuracy=0.7, cost=0.0949]  
train minibatch loop:   0%|          | 3/2527 [00:00<01:38, 25.66it/s, accuracy=0.82, cost=0.0774] 

epoch: 0, pass acc: 0.739277, current acc: 0.739749
time taken: 109.55670094490051
epoch: 0, training loss: 0.074985, training acc: 0.794015, valid loss: 0.088705, valid acc: 0.739749



train minibatch loop: 100%|██████████| 2527/2527 [01:39<00:00, 25.35it/s, accuracy=0.812, cost=0.0635]
test minibatch loop: 100%|██████████| 632/632 [00:09<00:00, 67.36it/s, accuracy=0.711, cost=0.0848]
train minibatch loop:   0%|          | 3/2527 [00:00<01:40, 25.13it/s, accuracy=0.797, cost=0.0756]

epoch: 0, pass acc: 0.739749, current acc: 0.740187
time taken: 109.05358052253723
epoch: 0, training loss: 0.074041, training acc: 0.797890, valid loss: 0.088700, valid acc: 0.740187



train minibatch loop: 100%|██████████| 2527/2527 [01:40<00:00, 25.26it/s, accuracy=0.842, cost=0.0616]
test minibatch loop: 100%|██████████| 632/632 [00:09<00:00, 67.31it/s, accuracy=0.689, cost=0.0933]
train minibatch loop:   0%|          | 3/2527 [00:00<01:39, 25.36it/s, accuracy=0.773, cost=0.0746]

time taken: 109.43666005134583
epoch: 0, training loss: 0.072876, training acc: 0.801452, valid loss: 0.088649, valid acc: 0.739768



train minibatch loop: 100%|██████████| 2527/2527 [01:40<00:00, 25.21it/s, accuracy=0.871, cost=0.0602]
test minibatch loop: 100%|██████████| 632/632 [00:09<00:00, 67.55it/s, accuracy=0.689, cost=0.0911]
train minibatch loop:   0%|          | 3/2527 [00:00<01:37, 25.84it/s, accuracy=0.812, cost=0.0774]

time taken: 109.58015727996826
epoch: 0, training loss: 0.071968, training acc: 0.804654, valid loss: 0.088769, valid acc: 0.738841



train minibatch loop: 100%|██████████| 2527/2527 [01:40<00:00, 25.22it/s, accuracy=0.822, cost=0.0614]
test minibatch loop: 100%|██████████| 632/632 [00:09<00:00, 67.43it/s, accuracy=0.689, cost=0.0998]

time taken: 109.57378196716309
epoch: 0, training loss: 0.070959, training acc: 0.809158, valid loss: 0.088572, valid acc: 0.739855

break epoch:0






In [14]:
left = str_idx(['a person is outdoors, on a horse.'], dictionary, maxlen)
right = str_idx(['a person on a horse jumps over a broken down airplane.'], dictionary, maxlen)
sess.run([model.temp_sim,1-model.distance], feed_dict = {model.X_left : left, 
                                        model.X_right: right})

[array([0.], dtype=float32), array([0.13981318], dtype=float32)]