In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '1'

In [2]:
import tensorflow as tf
import json

In [3]:
with open('pair.json') as fopen:
    data = json.load(fopen)

In [4]:
class Model:
    def __init__(self, size_layer, num_layers, embedded_size,
                 dict_size, learning_rate, dropout):
        
        def cells(size, reuse=False):
            cell = tf.nn.rnn_cell.LSTMCell(size,initializer=tf.orthogonal_initializer(),reuse=reuse)
            return tf.contrib.rnn.DropoutWrapper(cell,output_keep_prob=dropout)
        
        def birnn(inputs, scope):
            with tf.variable_scope(scope, reuse = tf.AUTO_REUSE):
                for n in range(num_layers):
                    (out_fw, out_bw), (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                        cell_fw = cells(size_layer // 2),
                        cell_bw = cells(size_layer // 2),
                        inputs = inputs,
                        dtype = tf.float32,
                        scope = 'bidirectional_rnn_%d'%(n))
                    inputs = tf.concat((out_fw, out_bw), 2)
                return inputs[:,-1]
        
        self.X = tf.placeholder(tf.int32, [None, None])
        self.Y = tf.placeholder(tf.int32, [None])
        Y = tf.one_hot(self.Y, 2)
        self.batch_size = tf.shape(self.X)[0]
        encoder_embeddings = tf.Variable(tf.random_uniform([dict_size, embedded_size], -1, 1))
        embedded_left = tf.nn.embedding_lookup(encoder_embeddings, self.X)
        
        self.out = birnn(embedded_left, 'left')
        self.out = tf.layers.dense(self.out, size_layer)
        self.out = tf.nn.l2_normalize(self.out, 1)
        self.logits = tf.layers.dense(self.out,2,use_bias=False,
                                      kernel_constraint=tf.keras.constraints.unit_norm())
        
        self.gamma = 32
        self.margin = 0.1
        
        num_valid_proxies = tf.reduce_sum(tf.cast(tf.reduce_sum(
        self.Y, 0, keepdims=True) != 0, tf.float32))
        y_pred = ((Y * (self.logits - self.margin) / num_valid_proxies) +
              ((1 - Y) * (self.logits - self.margin) / tf.cast(tf.shape(Y)[-1], tf.float32))) * self.gamma
        self.cost = tf.nn.softmax_cross_entropy_with_logits(labels=Y, logits=y_pred)
        self.cost = tf.reduce_mean(self.cost)
        
        self.optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(self.cost)
        correct_pred = tf.equal(
            tf.argmax(self.logits, 1, output_type = tf.int32), tf.cast(self.Y, tf.int32)
        )
        self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [5]:
size_layer = 256
num_layers = 2
embedded_size = 256
learning_rate = 1e-3
batch_size = 128
dropout = 1.0
vocab_size = 30000

In [6]:
tf.reset_default_graph()
sess = tf.InteractiveSession()
model = Model(size_layer,num_layers,embedded_size,vocab_size,learning_rate,dropout)
sess.run(tf.global_variables_initializer())

Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
Please use `keras.layers.Bidirectional(keras.layers.RNN(cell))`, which is equivalent to this API
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Please use `layer.add_weight` method instead.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Use keras.layers.Dense instead.
Instructions for updating:
Please use

In [7]:
data.keys()

dict_keys(['left_train', 'label_train', 'left_test', 'label_test'])

In [8]:
train_X_left = data['left_train']
train_Y = data['label_train']
test_X_left = data['left_test']
test_Y = data['label_test']

In [9]:
pad_sequences = tf.keras.preprocessing.sequence.pad_sequences

In [10]:
import time
from tqdm import tqdm
import numpy as np

EARLY_STOPPING, CURRENT_CHECKPOINT, CURRENT_ACC, EPOCH = 1, 0, 0, 0

while True:
    lasttime = time.time()
    if CURRENT_CHECKPOINT == EARLY_STOPPING:
        print('break epoch:%d\n' % (EPOCH))
        break

    train_acc, train_loss, test_acc, test_loss = [], [], [], []
    pbar = tqdm(range(0, len(train_X_left), batch_size), desc='train minibatch loop')
    for i in pbar:
        index = min(i+batch_size,len(train_X_left))
        batch_x_left = train_X_left[i:index]
        batch_y = train_Y[i:index]
        # batch_y = np.expand_dims(batch_y,1)
        batch_x_left = pad_sequences(batch_x_left, padding='post')
        acc, loss, _ = sess.run([model.accuracy, model.cost, model.optimizer], 
                           feed_dict = {model.X : batch_x_left, 
                                        model.Y : batch_y})
        assert not np.isnan(loss)
        train_loss.append(loss)
        train_acc.append(acc)
        pbar.set_postfix(cost=loss, accuracy = acc)
    
    pbar = tqdm(range(0, len(test_X_left), batch_size), desc='test minibatch loop')
    for i in pbar:
        index = min(i+batch_size,len(test_X_left))
        batch_x_left = test_X_left[i:index]
        batch_y = test_Y[i:index]
        # batch_y = np.expand_dims(batch_y,1)
        batch_x_left = pad_sequences(batch_x_left, padding='post')
        acc, loss = sess.run([model.accuracy, model.cost], 
                           feed_dict = {model.X : batch_x_left,
                                        model.Y : batch_y})
        
        test_loss.append(loss)
        test_acc.append(acc)
        pbar.set_postfix(cost=loss, accuracy = acc)
    
    train_loss = np.mean(train_loss)
    train_acc = np.mean(train_acc)
    test_loss = np.mean(test_loss)
    test_acc = np.mean(test_acc)
    
    if test_acc > CURRENT_ACC:
        print(
            'epoch: %d, pass acc: %f, current acc: %f'
            % (EPOCH, CURRENT_ACC, test_acc)
        )
        CURRENT_ACC = test_acc
        CURRENT_CHECKPOINT = 0
    else:
        CURRENT_CHECKPOINT += 1
    
    print('time taken:', time.time()-lasttime)
    print('epoch: %d, training loss: %f, training acc: %f, valid loss: %f, valid acc: %f\n'%(EPOCH,train_loss,
                                                                                          train_acc,test_loss,
                                                                                          test_acc))

train minibatch loop: 100%|██████████| 2046/2046 [10:55<00:00,  3.12it/s, accuracy=0.5, cost=5.26e-5]   
test minibatch loop: 100%|██████████| 105/105 [00:12<00:00,  8.25it/s, accuracy=0.542, cost=4.93e-5]
train minibatch loop:   0%|          | 0/2046 [00:00<?, ?it/s]

epoch: 0, pass acc: 0.000000, current acc: 0.482693
time taken: 668.4077956676483
epoch: 0, training loss: 0.005521, training acc: 0.505365, valid loss: 0.000050, valid acc: 0.482693



train minibatch loop:  41%|████      | 838/2046 [04:30<07:20,  2.74it/s, accuracy=0.445, cost=3.31e-5]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

train minibatch loop: 100%|██████████| 2046/2046 [10:59<00:00,  3.10it/s, accuracy=0.5, cost=2.11e-5]  
test minibatch loop: 100%|██████████| 105/105 [00:13<00:00,  7.98it/s, accuracy=0.53, cost=1.99e-5] 
train minibatch loop:   0%|          | 0/2046 [00:00<?, ?it/s]

epoch: 0, pass acc: 0.482693, current acc: 0.483695
time taken: 672.4185461997986
epoch: 0, training loss: 0.000031, training acc: 0.508458, valid loss: 0.000020, valid acc: 0.483695



train minibatch loop: 100%|██████████| 2046/2046 [11:00<00:00,  3.10it/s, accuracy=0.5, cost=9.57e-6]  
test minibatch loop: 100%|██████████| 105/105 [00:12<00:00,  8.21it/s, accuracy=0.53, cost=9.01e-6] 

time taken: 673.7720565795898
epoch: 0, training loss: 0.000014, training acc: 0.508569, valid loss: 0.000009, valid acc: 0.483323

break epoch:0




