In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '2'

In [2]:
import tensorflow as tf
import json

In [3]:
with open('pair.json') as fopen:
    data = json.load(fopen)

In [4]:
class Model:
    def __init__(self, size_layer, num_layers, embedded_size,
                 dict_size, learning_rate, dropout):
        
        def cells(size, reuse=False):
            cell = tf.nn.rnn_cell.LSTMCell(size,initializer=tf.orthogonal_initializer(),reuse=reuse)
            return tf.contrib.rnn.DropoutWrapper(cell,output_keep_prob=dropout)
        
        def birnn(inputs, scope):
            with tf.variable_scope(scope, reuse = tf.AUTO_REUSE):
                for n in range(num_layers):
                    (out_fw, out_bw), (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                        cell_fw = cells(size_layer // 2),
                        cell_bw = cells(size_layer // 2),
                        inputs = inputs,
                        dtype = tf.float32,
                        scope = 'bidirectional_rnn_%d'%(n))
                    inputs = tf.concat((out_fw, out_bw), 2)
                return inputs[:,-1]
        
        self.X = tf.placeholder(tf.int32, [None, None])
        self.Y = tf.placeholder(tf.int32, [None, None])
        self.batch_size = tf.shape(self.X)[0]
        encoder_embeddings = tf.Variable(tf.random_uniform([dict_size, embedded_size], -1, 1))
        embedded_left = tf.nn.embedding_lookup(encoder_embeddings, self.X)
        
        self.out = birnn(embedded_left, 'left')
        self.out = tf.layers.dense(self.out, size_layer)
        self.out = tf.nn.l2_normalize(self.out, 1)
        self.logits = tf.layers.dense(self.out,2,use_bias=False,
                                      kernel_constraint=tf.keras.constraints.unit_norm())
        print(self.logits)
        
        self.gamma = 64
        self.margin = 0.25
        self.O_p = 1 + self.margin
        self.O_n = -self.margin
        self.Delta_p = 1 - self.margin
        self.Delta_n = self.margin
        
        self.batch_idxs = tf.expand_dims(
          tf.range(0, self.batch_size, dtype=tf.int32), 1)  # shape [batch,1]
        idxs = tf.concat([self.batch_idxs, tf.cast(self.Y, tf.int32)], 1)
        sp = tf.expand_dims(tf.gather_nd(self.logits, idxs), 1)
        mask = tf.logical_not(
            tf.scatter_nd(idxs, tf.ones(tf.shape(idxs)[0], tf.bool),
                          tf.shape(self.logits)))

        sn = tf.reshape(tf.boolean_mask(self.logits, mask), (self.batch_size, -1))

        alpha_p = tf.nn.relu(self.O_p - tf.stop_gradient(sp))
        alpha_n = tf.nn.relu(tf.stop_gradient(sn) - self.O_n)

        r_sp_m = alpha_p * (sp - self.Delta_p)
        r_sn_m = alpha_n * (sn - self.Delta_n)
        _Z = tf.concat([r_sn_m, r_sp_m], 1)
        _Z = _Z * self.gamma
        # sum all similarity
        logZ = tf.math.reduce_logsumexp(_Z, 1, keepdims=True)
        # remove sn_p from all sum similarity
        self.cost = -r_sp_m * self.gamma + logZ
        self.cost = tf.reduce_mean(self.cost[:,0])
        
        self.optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(self.cost)
        correct_pred = tf.equal(
            tf.argmax(self.logits, 1, output_type = tf.int32), self.Y[:,0]
        )
        self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [5]:
size_layer = 256
num_layers = 2
embedded_size = 256
learning_rate = 1e-3
batch_size = 128
dropout = 1.0
vocab_size = 30000

In [6]:
tf.reset_default_graph()
sess = tf.InteractiveSession()
model = Model(size_layer,num_layers,embedded_size,vocab_size,learning_rate,dropout)
sess.run(tf.global_variables_initializer())

Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
Please use `keras.layers.Bidirectional(keras.layers.RNN(cell))`, which is equivalent to this API
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Please use `layer.add_weight` method instead.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Use keras.layers.Dense instead.
Instructions for updating:
Please use

In [7]:
data.keys()

dict_keys(['left_train', 'label_train', 'left_test', 'label_test'])

In [8]:
train_X_left = data['left_train']
train_Y = data['label_train']
test_X_left = data['left_test']
test_Y = data['label_test']

In [9]:
pad_sequences = tf.keras.preprocessing.sequence.pad_sequences

In [10]:
import time
from tqdm import tqdm
import numpy as np

EARLY_STOPPING, CURRENT_CHECKPOINT, CURRENT_ACC, EPOCH = 1, 0, 0, 0

while True:
    lasttime = time.time()
    if CURRENT_CHECKPOINT == EARLY_STOPPING:
        print('break epoch:%d\n' % (EPOCH))
        break

    train_acc, train_loss, test_acc, test_loss = [], [], [], []
    pbar = tqdm(range(0, len(train_X_left), batch_size), desc='train minibatch loop')
    for i in pbar:
        index = min(i+batch_size,len(train_X_left))
        batch_x_left = train_X_left[i:index]
        batch_y = train_Y[i:index]
        batch_y = np.expand_dims(batch_y,1)
        batch_x_left = pad_sequences(batch_x_left, padding='post')
        acc, loss, _ = sess.run([model.accuracy, model.cost, model.optimizer], 
                           feed_dict = {model.X : batch_x_left, 
                                        model.Y : batch_y})
        assert not np.isnan(loss)
        train_loss.append(loss)
        train_acc.append(acc)
        pbar.set_postfix(cost=loss, accuracy = acc)
    
    pbar = tqdm(range(0, len(test_X_left), batch_size), desc='test minibatch loop')
    for i in pbar:
        index = min(i+batch_size,len(test_X_left))
        batch_x_left = test_X_left[i:index]
        batch_y = test_Y[i:index]
        batch_y = np.expand_dims(batch_y,1)
        batch_x_left = pad_sequences(batch_x_left, padding='post')
        acc, loss = sess.run([model.accuracy, model.cost], 
                           feed_dict = {model.X : batch_x_left,
                                        model.Y : batch_y})
        
        test_loss.append(loss)
        test_acc.append(acc)
        pbar.set_postfix(cost=loss, accuracy = acc)
    
    train_loss = np.mean(train_loss)
    train_acc = np.mean(train_acc)
    test_loss = np.mean(test_loss)
    test_acc = np.mean(test_acc)
    
    if test_acc > CURRENT_ACC:
        print(
            'epoch: %d, pass acc: %f, current acc: %f'
            % (EPOCH, CURRENT_ACC, test_acc)
        )
        CURRENT_ACC = test_acc
        CURRENT_CHECKPOINT = 0
    else:
        CURRENT_CHECKPOINT += 1
    
    print('time taken:', time.time()-lasttime)
    print('epoch: %d, training loss: %f, training acc: %f, valid loss: %f, valid acc: %f\n'%(EPOCH,train_loss,
                                                                                          train_acc,test_loss,
                                                                                          test_acc))

train minibatch loop: 100%|██████████| 2046/2046 [11:42<00:00,  2.91it/s, accuracy=0.524, cost=23.9]
test minibatch loop: 100%|██████████| 105/105 [00:12<00:00,  8.33it/s, accuracy=0.542, cost=23.8]
train minibatch loop:   0%|          | 0/2046 [00:00<?, ?it/s]

epoch: 0, pass acc: 0.000000, current acc: 0.483661
time taken: 715.2289781570435
epoch: 0, training loss: 24.061218, training acc: 0.506327, valid loss: 24.190355, valid acc: 0.483661



train minibatch loop: 100%|██████████| 2046/2046 [11:05<00:00,  3.07it/s, accuracy=0.714, cost=17.2]
test minibatch loop: 100%|██████████| 105/105 [00:13<00:00,  7.83it/s, accuracy=0.807, cost=12]  
train minibatch loop:   0%|          | 0/2046 [00:00<?, ?it/s]

epoch: 0, pass acc: 0.483661, current acc: 0.744072
time taken: 678.8547568321228
epoch: 0, training loss: 18.518274, training acc: 0.677692, valid loss: 15.591663, valid acc: 0.744072



train minibatch loop: 100%|██████████| 2046/2046 [11:01<00:00,  3.09it/s, accuracy=0.786, cost=12.4]
test minibatch loop: 100%|██████████| 105/105 [00:12<00:00,  8.32it/s, accuracy=0.843, cost=11.4]
train minibatch loop:   0%|          | 0/2046 [00:00<?, ?it/s]

epoch: 0, pass acc: 0.744072, current acc: 0.756990
time taken: 673.8234732151031
epoch: 0, training loss: 14.537312, training acc: 0.764722, valid loss: 15.128952, valid acc: 0.756990



train minibatch loop: 100%|██████████| 2046/2046 [11:16<00:00,  3.02it/s, accuracy=0.857, cost=9]   
test minibatch loop: 100%|██████████| 105/105 [00:12<00:00,  8.10it/s, accuracy=0.795, cost=12.8]
train minibatch loop:   0%|          | 0/2046 [00:00<?, ?it/s]

epoch: 0, pass acc: 0.756990, current acc: 0.758466
time taken: 689.6119740009308
epoch: 0, training loss: 12.765995, training acc: 0.799474, valid loss: 15.391677, valid acc: 0.758466



train minibatch loop: 100%|██████████| 2046/2046 [11:24<00:00,  2.99it/s, accuracy=0.881, cost=7.72]
test minibatch loop: 100%|██████████| 105/105 [00:13<00:00,  7.76it/s, accuracy=0.783, cost=13.6]
train minibatch loop:   0%|          | 0/2046 [00:00<?, ?it/s]

epoch: 0, pass acc: 0.758466, current acc: 0.758574
time taken: 697.922758102417
epoch: 0, training loss: 11.443329, training acc: 0.824237, valid loss: 15.448023, valid acc: 0.758574



train minibatch loop:  61%|██████    | 1252/2046 [06:51<03:24,  3.88it/s, accuracy=0.812, cost=12.5]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [11]:
test_loss, test_acc = [], []
pbar = tqdm(range(0, len(test_X_left), batch_size), desc='test minibatch loop')
for i in pbar:
    index = min(i+batch_size,len(test_X_left))
    batch_x_left = test_X_left[i:index]
    batch_y = test_Y[i:index]
    batch_y = np.expand_dims(batch_y,1)
    batch_x_left = pad_sequences(batch_x_left, padding='post')
    acc, loss = sess.run([model.accuracy, model.cost], 
                       feed_dict = {model.X : batch_x_left,
                                    model.Y : batch_y})

    test_loss.append(loss)
    test_acc.append(acc)
    pbar.set_postfix(cost=loss, accuracy = acc)

test minibatch loop: 100%|██████████| 105/105 [00:12<00:00,  8.20it/s, accuracy=0.783, cost=13.9]


In [12]:
np.mean(test_acc)

0.75812805