In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '1'

In [2]:
import tensorflow as tf
import json

In [3]:
with open('contrastive.json') as fopen:
    data = json.load(fopen)

In [4]:
class Model:
    def __init__(self, size_layer, num_layers, embedded_size,
                 dict_size, learning_rate, dropout):
        
        def cells(size, reuse=False):
            cell = tf.nn.rnn_cell.LSTMCell(size,initializer=tf.orthogonal_initializer(),reuse=reuse)
            return tf.contrib.rnn.DropoutWrapper(cell,output_keep_prob=dropout)
        
        def birnn(inputs, scope):
            with tf.variable_scope(scope, reuse = tf.AUTO_REUSE):
                for n in range(num_layers):
                    (out_fw, out_bw), (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                        cell_fw = cells(size_layer // 2),
                        cell_bw = cells(size_layer // 2),
                        inputs = inputs,
                        dtype = tf.float32,
                        scope = 'bidirectional_rnn_%d'%(n))
                    inputs = tf.concat((out_fw, out_bw), 2)
                return inputs[:,-1]
        
        self.X_left = tf.placeholder(tf.int32, [None, None])
        self.X_right = tf.placeholder(tf.int32, [None, None])
        self.Y = tf.placeholder(tf.float32, [None])
        self.batch_size = tf.shape(self.X_left)[0]
        encoder_embeddings = tf.Variable(tf.random_uniform([dict_size, embedded_size], -1, 1))
        embedded_left = tf.nn.embedding_lookup(encoder_embeddings, self.X_left)
        embedded_right = tf.nn.embedding_lookup(encoder_embeddings, self.X_right)
        
        def contrastive_loss(y,d):
            tmp= y * tf.square(d)
            tmp2 = (1-y) * tf.square(tf.maximum((1 - d),0))
            return tf.reduce_sum(tmp +tmp2)/tf.cast(self.batch_size,tf.float32)/2
        
        self.output_left = birnn(embedded_left, 'left')
        self.output_right = birnn(embedded_right, 'right')
        self.distance = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(self.output_left,self.output_right)),
                                              1,keep_dims=True))
        self.distance = tf.div(self.distance, tf.add(tf.sqrt(tf.reduce_sum(tf.square(self.output_left),
                                                                           1,keep_dims=True)),
                                                     tf.sqrt(tf.reduce_sum(tf.square(self.output_right),
                                                                           1,keep_dims=True))))
        self.distance = tf.reshape(self.distance, [-1])
        self.cost = contrastive_loss(self.Y,self.distance)
        
        self.temp_sim = tf.subtract(tf.ones_like(self.distance),
                                    tf.rint(self.distance))
        correct_predictions = tf.equal(self.temp_sim, self.Y)
        self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"))
        self.optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(self.cost)

In [5]:
size_layer = 256
num_layers = 2
embedded_size = 256
learning_rate = 1e-3
batch_size = 128
dropout = 1.0
vocab_size = 30000

In [6]:
tf.reset_default_graph()
sess = tf.InteractiveSession()
model = Model(size_layer,num_layers,embedded_size,vocab_size,learning_rate,dropout)
sess.run(tf.global_variables_initializer())

Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
Please use `keras.layers.Bidirectional(keras.layers.RNN(cell))`, which is equivalent to this API
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Please use `layer.add_weight` method instead.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updati

In [7]:
train_X_left = data['left_train']
train_X_right = data['right_train']
train_Y = data['label_train']
test_X_left = data['left_test']
test_X_right = data['right_test']
test_Y = data['label_test']

In [8]:
pad_sequences = tf.keras.preprocessing.sequence.pad_sequences

In [9]:
import time
from tqdm import tqdm
import numpy as np

EARLY_STOPPING, CURRENT_CHECKPOINT, CURRENT_ACC, EPOCH = 1, 0, 0, 0

while True:
    lasttime = time.time()
    if CURRENT_CHECKPOINT == EARLY_STOPPING:
        print('break epoch:%d\n' % (EPOCH))
        break

    train_acc, train_loss, test_acc, test_loss = [], [], [], []
    pbar = tqdm(range(0, len(train_X_left), batch_size), desc='train minibatch loop')
    for i in pbar:
        index = min(i+batch_size,len(train_X_left))
        batch_x_left = train_X_left[i:index]
        batch_x_right = train_X_right[i:index]
        batch_y = train_Y[i:index]
        batch_x_left = pad_sequences(batch_x_left, padding='post')
        batch_x_right = pad_sequences(batch_x_right, padding='post')
        acc, loss, _ = sess.run([model.accuracy, model.cost, model.optimizer], 
                           feed_dict = {model.X_left : batch_x_left, 
                                        model.X_right: batch_x_right,
                                        model.Y : batch_y})
        assert not np.isnan(loss)
        train_loss.append(loss)
        train_acc.append(acc)
        pbar.set_postfix(cost=loss, accuracy = acc)
    
    pbar = tqdm(range(0, len(test_X_left), batch_size), desc='test minibatch loop')
    for i in pbar:
        index = min(i+batch_size,len(test_X_left))
        batch_x_left = test_X_left[i:index]
        batch_x_right = test_X_right[i:index]
        batch_y = test_Y[i:index]
        batch_x_left = pad_sequences(batch_x_left, padding='post')
        batch_x_right = pad_sequences(batch_x_right, padding='post')
        acc, loss = sess.run([model.accuracy, model.cost], 
                           feed_dict = {model.X_left : batch_x_left, 
                                        model.X_right: batch_x_right,
                                        model.Y : batch_y})
        
        test_loss.append(loss)
        test_acc.append(acc)
        pbar.set_postfix(cost=loss, accuracy = acc)
    
    train_loss = np.mean(train_loss)
    train_acc = np.mean(train_acc)
    test_loss = np.mean(test_loss)
    test_acc = np.mean(test_acc)
    
    if test_acc > CURRENT_ACC:
        print(
            'epoch: %d, pass acc: %f, current acc: %f'
            % (EPOCH, CURRENT_ACC, test_acc)
        )
        CURRENT_ACC = test_acc
        CURRENT_CHECKPOINT = 0
    else:
        CURRENT_CHECKPOINT += 1
    
    print('time taken:', time.time()-lasttime)
    print('epoch: %d, training loss: %f, training acc: %f, valid loss: %f, valid acc: %f\n'%(EPOCH,train_loss,
                                                                                          train_acc,test_loss,
                                                                                          test_acc))

train minibatch loop: 100%|██████████| 2046/2046 [12:20<00:00,  2.76it/s, accuracy=0.619, cost=0.114] 
test minibatch loop: 100%|██████████| 105/105 [00:16<00:00,  6.48it/s, accuracy=0.783, cost=0.0817]
train minibatch loop:   0%|          | 0/2046 [00:00<?, ?it/s]

epoch: 0, pass acc: 0.000000, current acc: 0.705077
time taken: 756.2168247699738
epoch: 0, training loss: 0.104395, training acc: 0.662758, valid loss: 0.096980, valid acc: 0.705077



train minibatch loop: 100%|██████████| 2046/2046 [12:16<00:00,  2.78it/s, accuracy=0.762, cost=0.087] 
test minibatch loop: 100%|██████████| 105/105 [00:15<00:00,  6.77it/s, accuracy=0.723, cost=0.0819]
train minibatch loop:   0%|          | 0/2046 [00:00<?, ?it/s]

epoch: 0, pass acc: 0.705077, current acc: 0.717673
time taken: 751.6032555103302
epoch: 0, training loss: 0.089057, training acc: 0.733014, valid loss: 0.091990, valid acc: 0.717673



train minibatch loop: 100%|██████████| 2046/2046 [12:01<00:00,  2.83it/s, accuracy=0.881, cost=0.047] 
test minibatch loop: 100%|██████████| 105/105 [00:16<00:00,  6.56it/s, accuracy=0.723, cost=0.0881]
train minibatch loop:   0%|          | 0/2046 [00:00<?, ?it/s]

epoch: 0, pass acc: 0.717673, current acc: 0.730322
time taken: 737.9174697399139
epoch: 0, training loss: 0.073122, training acc: 0.788317, valid loss: 0.090357, valid acc: 0.730322



train minibatch loop: 100%|██████████| 2046/2046 [12:04<00:00,  2.83it/s, accuracy=0.952, cost=0.025] 
test minibatch loop: 100%|██████████| 105/105 [00:15<00:00,  6.73it/s, accuracy=0.723, cost=0.0862]

time taken: 739.7751545906067
epoch: 0, training loss: 0.059200, training acc: 0.838007, valid loss: 0.094702, valid acc: 0.726081

break epoch:0




