In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

In [2]:
import numpy as np
import tensorflow as tf
import json

In [3]:
with open('dataset-bpe.json') as fopen:
    data = json.load(fopen)

In [4]:
train_X = data['train_X']
train_Y = data['train_Y']
test_X = data['test_X']
test_Y = data['test_Y']

In [5]:
EOS = 2
GO = 1
vocab_size = 32000

In [6]:
train_Y = [i + [2] for i in train_Y]
test_Y = [i + [2] for i in test_Y]

In [7]:
from tensor2tensor.utils import beam_search

def pad_second_dim(x, desired_size):
    padding = tf.tile([[[0.0]]], tf.stack([tf.shape(x)[0], desired_size - tf.shape(x)[1], tf.shape(x)[2]], 0))
    return tf.concat([x, padding], 1)

class Translator:
    def __init__(self, size_layer, num_layers, embedded_size, learning_rate):
        
        def cells(size_layer, reuse=False):
            return tf.nn.rnn_cell.GRUCell(size_layer,reuse=reuse)
        
        self.X = tf.placeholder(tf.int32, [None, None])
        self.Y = tf.placeholder(tf.int32, [None, None])
        
        self.X_seq_len = tf.count_nonzero(self.X, 1, dtype = tf.int32)
        self.Y_seq_len = tf.count_nonzero(self.Y, 1, dtype = tf.int32)
        batch_size = tf.shape(self.X)[0]
        
        embeddings = tf.Variable(tf.random_uniform([vocab_size, embedded_size], -1, 1))
        encoder_embedded = tf.nn.embedding_lookup(embeddings, self.X)
        
        for n in range(num_layers):
            (out_fw, out_bw), (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                cell_fw = cells(size_layer // 2),
                cell_bw = cells(size_layer // 2),
                inputs = encoder_embedded,
                sequence_length = self.X_seq_len,
                dtype = tf.float32,
                scope = 'bidirectional_rnn_%d'%(n))
            encoder_embedded = tf.concat((out_fw, out_bw), 2)
        
        bi_state = tf.concat((state_fw, state_bw), -1)
        encoder_state = tuple([bi_state] * num_layers)
        
        main = tf.strided_slice(self.Y, [0, 0], [batch_size, -1], [1, 1])
        decoder_input = tf.concat([tf.fill([batch_size, 1], GO), main], 1)
        dense = tf.layers.Dense(vocab_size)
        decoder_cells = tf.nn.rnn_cell.MultiRNNCell([cells(size_layer) for _ in range(num_layers)])
        
        training_helper = tf.contrib.seq2seq.TrainingHelper(
                inputs = tf.nn.embedding_lookup(embeddings, decoder_input),
                sequence_length = self.Y_seq_len,
                time_major = False)
        training_decoder = tf.contrib.seq2seq.BasicDecoder(
                cell = decoder_cells,
                helper = training_helper,
                initial_state = encoder_state,
                output_layer = dense)
        training_decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(
                decoder = training_decoder,
                impute_finished = True,
                maximum_iterations = tf.reduce_max(self.Y_seq_len))
        self.training_logits = training_decoder_output.rnn_output
        
        predicting_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
                embedding = embeddings,
                start_tokens = tf.tile(tf.constant([GO], dtype=tf.int32), [batch_size]),
                end_token = EOS)
        predicting_decoder = tf.contrib.seq2seq.BasicDecoder(
                cell = decoder_cells,
                helper = predicting_helper,
                initial_state = encoder_state,
                output_layer = dense)
        predicting_decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(
                decoder = predicting_decoder,
                impute_finished = True,
                maximum_iterations = 2 * tf.reduce_max(self.X_seq_len))
        self.fast_result = predicting_decoder_output.sample_id
        
        masks = tf.sequence_mask(self.Y_seq_len, tf.reduce_max(self.Y_seq_len), dtype=tf.float32)
        self.cost = tf.contrib.seq2seq.sequence_loss(logits = self.training_logits,
                                                     targets = self.Y,
                                                     weights = masks)
        self.optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(self.cost)
        y_t = tf.argmax(self.training_logits,axis=2)
        y_t = tf.cast(y_t, tf.int32)
        self.prediction = tf.boolean_mask(y_t, masks)
        mask_label = tf.boolean_mask(self.Y, masks)
        correct_pred = tf.equal(self.prediction, mask_label)
        correct_index = tf.cast(correct_pred, tf.float32)
        self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [8]:
size_layer = 512
num_layers = 2
embedded_size = 256
learning_rate = 1e-3
batch_size = 128
epoch = 20

In [9]:
tf.reset_default_graph()
sess = tf.InteractiveSession()
model = Translator(size_layer, num_layers, embedded_size, learning_rate)
sess.run(tf.global_variables_initializer())

Instructions for updating:
reduction_indices is deprecated, use axis instead
Instructions for updating:
This class is equivalent as tf.keras.layers.GRUCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.Bidirectional(keras.layers.RNN(cell))`, which is equivalent to this API
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Please use `layer.add_weight` method instead.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Instructions for updating:
This class is equivalent as tf.keras.layers.StackedRNNCells, and will be replaced by that in Tensorflow 2.0.
The TensorFlow contrib module will n

In [10]:
pad_sequences = tf.keras.preprocessing.sequence.pad_sequences

In [11]:
batch_x = pad_sequences(train_X[:10], padding='post')
batch_y = pad_sequences(train_Y[:10], padding='post')

sess.run([model.fast_result, model.cost, model.accuracy], 
         feed_dict = {model.X: batch_x, model.Y: batch_y})

[array([[ 8180,  8180, 13143, 13143, 12937,  6992,  4556,  4556,  9512,
         24078, 24078, 20187, 20187, 20187, 20187, 20187,  5649,  5649,
          5649, 17526, 17526, 17526, 17526, 10614, 10614, 10614, 17526,
          4803,  4803, 18919,  3277, 14446, 14446,   283,   283,   283,
           283, 29788, 29788,  1788,  9858,  9858, 28365, 28365, 14894,
         14894,  8917,  8917,  8917,  8917,  8917,  8917,  8917,  8917,
         24207,  3448,  3448,  2827,  2827, 16308, 16308, 22395, 22395,
         22395, 22395, 13721, 13721, 13721, 13721, 13721, 13721, 20560],
        [14031, 14031, 30325, 30325, 28000, 28000, 18092, 18092, 18092,
         11295, 11295,  1162,  1162,  1162, 10536, 10536, 10536, 25376,
         25376, 27602, 27602, 27602,  9451,  9451,  9451, 25158, 25158,
         25158, 29397, 29397,  2948, 13433, 13433, 13433, 26233, 21482,
         15779, 15779, 15779,     6,  5723,  5723,  5723,  5723,  4474,
          4474, 10896, 10896, 25678, 25678, 25678, 25678, 24216

In [12]:
import tqdm

for e in range(epoch):
    pbar = tqdm.tqdm(
        range(0, len(train_X), batch_size), desc = 'minibatch loop')
    train_loss, train_acc, test_loss, test_acc = [], [], [], []
    for i in pbar:
        index = min(i + batch_size, len(train_X))
        batch_x = pad_sequences(train_X[i : index], padding='post')
        batch_y = pad_sequences(train_Y[i : index], padding='post')
        feed = {model.X: batch_x,
                model.Y: batch_y}
        accuracy, loss, _ = sess.run([model.accuracy,model.cost,model.optimizer],
                                    feed_dict = feed)
        train_loss.append(loss)
        train_acc.append(accuracy)
        pbar.set_postfix(cost = loss, accuracy = accuracy)
    
    
    pbar = tqdm.tqdm(
        range(0, len(test_X), batch_size), desc = 'minibatch loop')
    for i in pbar:
        index = min(i + batch_size, len(test_X))
        batch_x = pad_sequences(test_X[i : index], padding='post')
        batch_y = pad_sequences(test_Y[i : index], padding='post')
        feed = {model.X: batch_x,
                model.Y: batch_y,}
        accuracy, loss = sess.run([model.accuracy,model.cost],
                                    feed_dict = feed)

        test_loss.append(loss)
        test_acc.append(accuracy)
        pbar.set_postfix(cost = loss, accuracy = accuracy)
    
    print('epoch %d, training avg loss %f, training avg acc %f'%(e+1,
                                                                 np.mean(train_loss),np.mean(train_acc)))
    print('epoch %d, testing avg loss %f, testing avg acc %f'%(e+1,
                                                              np.mean(test_loss),np.mean(test_acc)))

minibatch loop: 100%|██████████| 1563/1563 [11:24<00:00,  2.28it/s, accuracy=0.257, cost=4.7] 
minibatch loop: 100%|██████████| 40/40 [00:08<00:00,  4.86it/s, accuracy=0.296, cost=4.07]
minibatch loop:   0%|          | 0/1563 [00:00<?, ?it/s]

epoch 1, training avg loss 5.912079, training avg acc 0.166160
epoch 1, testing avg loss 4.506110, testing avg acc 0.262045


minibatch loop: 100%|██████████| 1563/1563 [11:27<00:00,  2.27it/s, accuracy=0.332, cost=3.72]
minibatch loop: 100%|██████████| 40/40 [00:08<00:00,  4.91it/s, accuracy=0.339, cost=3.52]
minibatch loop:   0%|          | 0/1563 [00:00<?, ?it/s]

epoch 2, training avg loss 4.049781, training avg acc 0.306016
epoch 2, testing avg loss 3.832899, testing avg acc 0.329587


minibatch loop: 100%|██████████| 1563/1563 [11:38<00:00,  2.24it/s, accuracy=0.411, cost=3]   
minibatch loop: 100%|██████████| 40/40 [00:08<00:00,  4.85it/s, accuracy=0.371, cost=3.26]
minibatch loop:   0%|          | 0/1563 [00:00<?, ?it/s]

epoch 3, training avg loss 3.400880, training avg acc 0.371896
epoch 3, testing avg loss 3.594582, testing avg acc 0.356842


minibatch loop: 100%|██████████| 1563/1563 [11:44<00:00,  2.22it/s, accuracy=0.488, cost=2.45]
minibatch loop: 100%|██████████| 40/40 [00:08<00:00,  4.86it/s, accuracy=0.376, cost=3.14]
minibatch loop:   0%|          | 0/1563 [00:00<?, ?it/s]

epoch 4, training avg loss 2.997944, training avg acc 0.418057
epoch 4, testing avg loss 3.527262, testing avg acc 0.366554


minibatch loop: 100%|██████████| 1563/1563 [11:45<00:00,  2.22it/s, accuracy=0.564, cost=2.04]
minibatch loop: 100%|██████████| 40/40 [00:08<00:00,  4.84it/s, accuracy=0.409, cost=3.12]
minibatch loop:   0%|          | 0/1563 [00:00<?, ?it/s]

epoch 5, training avg loss 2.707116, training avg acc 0.455160
epoch 5, testing avg loss 3.531936, testing avg acc 0.369929


minibatch loop: 100%|██████████| 1563/1563 [11:47<00:00,  2.21it/s, accuracy=0.62, cost=1.74] 
minibatch loop: 100%|██████████| 40/40 [00:08<00:00,  4.95it/s, accuracy=0.409, cost=3.11]
minibatch loop:   0%|          | 0/1563 [00:00<?, ?it/s]

epoch 6, training avg loss 2.482041, training avg acc 0.486549
epoch 6, testing avg loss 3.580899, testing avg acc 0.367816


minibatch loop: 100%|██████████| 1563/1563 [11:36<00:00,  2.24it/s, accuracy=0.68, cost=1.52] 
minibatch loop: 100%|██████████| 40/40 [00:08<00:00,  4.88it/s, accuracy=0.409, cost=3.18]
minibatch loop:   0%|          | 0/1563 [00:00<?, ?it/s]

epoch 7, training avg loss 2.304343, training avg acc 0.512776
epoch 7, testing avg loss 3.655856, testing avg acc 0.362913


minibatch loop: 100%|██████████| 1563/1563 [11:31<00:00,  2.26it/s, accuracy=0.691, cost=1.36]
minibatch loop: 100%|██████████| 40/40 [00:08<00:00,  4.93it/s, accuracy=0.403, cost=3.07]
minibatch loop:   0%|          | 0/1563 [00:00<?, ?it/s]

epoch 8, training avg loss 2.160954, training avg acc 0.534549
epoch 8, testing avg loss 3.737370, testing avg acc 0.358621


minibatch loop: 100%|██████████| 1563/1563 [11:34<00:00,  2.25it/s, accuracy=0.706, cost=1.3] 
minibatch loop: 100%|██████████| 40/40 [00:08<00:00,  4.83it/s, accuracy=0.392, cost=3.14]
minibatch loop:   0%|          | 0/1563 [00:00<?, ?it/s]

epoch 9, training avg loss 2.040770, training avg acc 0.552979
epoch 9, testing avg loss 3.815466, testing avg acc 0.353772


minibatch loop: 100%|██████████| 1563/1563 [11:42<00:00,  2.22it/s, accuracy=0.737, cost=1.16]
minibatch loop: 100%|██████████| 40/40 [00:08<00:00,  4.89it/s, accuracy=0.376, cost=3.18]
minibatch loop:   0%|          | 0/1563 [00:00<?, ?it/s]

epoch 10, training avg loss 1.930049, training avg acc 0.570824
epoch 10, testing avg loss 3.895134, testing avg acc 0.353186


minibatch loop: 100%|██████████| 1563/1563 [11:37<00:00,  2.24it/s, accuracy=0.764, cost=1.06]
minibatch loop: 100%|██████████| 40/40 [00:08<00:00,  4.92it/s, accuracy=0.392, cost=3.21]
minibatch loop:   0%|          | 0/1563 [00:00<?, ?it/s]

epoch 11, training avg loss 1.833433, training avg acc 0.586451
epoch 11, testing avg loss 3.991868, testing avg acc 0.349418


minibatch loop: 100%|██████████| 1563/1563 [11:39<00:00,  2.23it/s, accuracy=0.748, cost=1.06]
minibatch loop: 100%|██████████| 40/40 [00:08<00:00,  4.90it/s, accuracy=0.355, cost=3.34]
minibatch loop:   0%|          | 0/1563 [00:00<?, ?it/s]

epoch 12, training avg loss 1.756140, training avg acc 0.598503
epoch 12, testing avg loss 4.059681, testing avg acc 0.346714


minibatch loop: 100%|██████████| 1563/1563 [11:40<00:00,  2.23it/s, accuracy=0.775, cost=0.968]
minibatch loop: 100%|██████████| 40/40 [00:08<00:00,  4.91it/s, accuracy=0.387, cost=3.35]
minibatch loop:   0%|          | 0/1563 [00:00<?, ?it/s]

epoch 13, training avg loss 1.686734, training avg acc 0.609522
epoch 13, testing avg loss 4.143593, testing avg acc 0.345248


minibatch loop: 100%|██████████| 1563/1563 [11:36<00:00,  2.24it/s, accuracy=0.796, cost=0.889]
minibatch loop: 100%|██████████| 40/40 [00:08<00:00,  4.89it/s, accuracy=0.376, cost=3.38]
minibatch loop:   0%|          | 0/1563 [00:00<?, ?it/s]

epoch 14, training avg loss 1.622047, training avg acc 0.620016
epoch 14, testing avg loss 4.241367, testing avg acc 0.344546


minibatch loop: 100%|██████████| 1563/1563 [11:34<00:00,  2.25it/s, accuracy=0.8, cost=0.882] 
minibatch loop: 100%|██████████| 40/40 [00:08<00:00,  4.97it/s, accuracy=0.403, cost=3.47]
minibatch loop:   0%|          | 0/1563 [00:00<?, ?it/s]

epoch 15, training avg loss 1.568954, training avg acc 0.628421
epoch 15, testing avg loss 4.302605, testing avg acc 0.342172


minibatch loop: 100%|██████████| 1563/1563 [11:27<00:00,  2.27it/s, accuracy=0.797, cost=0.837]
minibatch loop: 100%|██████████| 40/40 [00:08<00:00,  4.97it/s, accuracy=0.376, cost=3.52]
minibatch loop:   0%|          | 0/1563 [00:00<?, ?it/s]

epoch 16, training avg loss 1.523960, training avg acc 0.635393
epoch 16, testing avg loss 4.380991, testing avg acc 0.338359


minibatch loop: 100%|██████████| 1563/1563 [11:25<00:00,  2.28it/s, accuracy=0.809, cost=0.801]
minibatch loop: 100%|██████████| 40/40 [00:08<00:00,  4.92it/s, accuracy=0.382, cost=3.57]
minibatch loop:   0%|          | 0/1563 [00:00<?, ?it/s]

epoch 17, training avg loss 1.482213, training avg acc 0.641733
epoch 17, testing avg loss 4.450799, testing avg acc 0.337775


minibatch loop: 100%|██████████| 1563/1563 [11:24<00:00,  2.28it/s, accuracy=0.82, cost=0.748]
minibatch loop: 100%|██████████| 40/40 [00:08<00:00,  4.99it/s, accuracy=0.382, cost=3.73]
minibatch loop:   0%|          | 0/1563 [00:00<?, ?it/s]

epoch 18, training avg loss 1.443569, training avg acc 0.647991
epoch 18, testing avg loss 4.534051, testing avg acc 0.334935


minibatch loop: 100%|██████████| 1563/1563 [11:26<00:00,  2.28it/s, accuracy=0.812, cost=0.739]
minibatch loop: 100%|██████████| 40/40 [00:08<00:00,  4.93it/s, accuracy=0.403, cost=3.67]
minibatch loop:   0%|          | 0/1563 [00:00<?, ?it/s]

epoch 19, training avg loss 1.407701, training avg acc 0.653678
epoch 19, testing avg loss 4.622604, testing avg acc 0.330094


minibatch loop: 100%|██████████| 1563/1563 [11:26<00:00,  2.28it/s, accuracy=0.834, cost=0.677]
minibatch loop: 100%|██████████| 40/40 [00:07<00:00,  5.04it/s, accuracy=0.387, cost=3.72]

epoch 20, training avg loss 1.372407, training avg acc 0.659821
epoch 20, testing avg loss 4.666390, testing avg acc 0.331290





In [13]:
from tensor2tensor.utils import bleu_hook

results = []
for i in tqdm.tqdm(range(0, len(test_X), batch_size)):
    index = min(i + batch_size, len(test_X))
    batch_x = pad_sequences(test_X[i : index], padding='post')
    feed = {model.X: batch_x}
    p = sess.run(model.fast_result,feed_dict = feed)
    result = []
    for row in p:
        result.append([i for i in row if i > 3])
    results.extend(result)
    
rights = []
for r in test_Y:
    rights.append([i for i in r if i > 3])
    
bleu_hook.compute_bleu(reference_corpus = rights,
                       translation_corpus = results)

100%|██████████| 40/40 [00:19<00:00,  2.02it/s]


0.047413725