In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '1'

In [2]:
import numpy as np
import tensorflow as tf

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [3]:
import json

with open('train-test.json') as fopen:
    dataset = json.load(fopen)
    
with open('dictionary.json') as fopen:
    dictionary = json.load(fopen)

In [4]:
train_X = dataset['train_X']
train_Y = dataset['train_Y']
test_X = dataset['test_X']
test_Y = dataset['test_Y']

In [5]:
dictionary.keys()

dict_keys(['from', 'to'])

In [6]:
dictionary_from = dictionary['from']['dictionary']
rev_dictionary_from = dictionary['from']['rev_dictionary']

dictionary_to = dictionary['to']['dictionary']
rev_dictionary_to = dictionary['to']['rev_dictionary']

In [7]:
GO = dictionary_from['GO']
PAD = dictionary_from['PAD']
EOS = dictionary_from['EOS']
UNK = dictionary_from['UNK']

In [8]:
for i in range(len(train_X)):
    train_X[i] += ' EOS'
    
train_X[0]

'Rachel Pike : The science behind a climate headline EOS'

In [9]:
for i in range(len(test_X)):
    test_X[i] += ' EOS'
    
test_X[0]

'How can I speak in <NUM> minutes about the bonds of women over three generations , about how the astonishing strength of those bonds took hold in the life of a four - year - old girl huddled with her young sister , her mother and her grandmother for five days and nights in a small boat in the China Sea more than <NUM> years ago , bonds that took hold in the life of that small girl and never let go - - that small girl now living in San Francisco and speaking to you today ? EOS'

In [10]:
def pad_second_dim(x, desired_size):
    padding = tf.tile([[[0.0]]], tf.stack([tf.shape(x)[0], desired_size - tf.shape(x)[1], tf.shape(x)[2]], 0))
    return tf.concat([x, padding], 1)

class Translator:
    def __init__(self, from_dict_size, to_dict_size, size_layer, num_layers, 
                 learning_rate):
        
        def cell(size, residual, reuse=False):
            c = tf.nn.rnn_cell.GRUCell(size, reuse=reuse)
            if residual:
                c = tf.nn.rnn_cell.ResidualWrapper(c)
            return c
        
        def cells(size, residual = 2):
            cell_list = []
            for i in range(num_layers):
                cell_list.append(cell(size, (i >= num_layers - residual)))
            return cell_list
        
        embedded_size = size_layer
        
        self.X = tf.placeholder(tf.int32, [None, None])
        self.Y = tf.placeholder(tf.int32, [None, None])
        self.X_seq_len = tf.count_nonzero(self.X, 1, dtype=tf.int32)
        self.Y_seq_len = tf.count_nonzero(self.Y, 1, dtype=tf.int32)
        batch_size = tf.shape(self.X)[0]
        # encoder
        encoder_embeddings = tf.Variable(tf.random_uniform([from_dict_size, embedded_size], -1, 1))
        encoder_embedded = tf.nn.embedding_lookup(encoder_embeddings, self.X)
        for n in range(num_layers):
            (out_fw, out_bw), (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                cell_fw = tf.nn.rnn_cell.MultiRNNCell(cells(size_layer // 2)),
                cell_bw = tf.nn.rnn_cell.MultiRNNCell(cells(size_layer // 2)),
                inputs = encoder_embedded,
                sequence_length = self.X_seq_len,
                dtype = tf.float32,
                scope = 'bidirectional_rnn_%d'%(n))
            encoder_embedded = tf.concat((out_fw, out_bw), 2)
            
        bi_state = tf.concat((state_fw[-1], state_bw[-1]), -1)
        self.encoder_state = tuple([bi_state] * num_layers)
        
        self.encoder_state = tuple(self.encoder_state[-1] for _ in range(num_layers))
        print(self.encoder_state)
        main = tf.strided_slice(self.Y, [0, 0], [batch_size, -1], [1, 1])
        decoder_input = tf.concat([tf.fill([batch_size, 1], GO), main], 1)
        # decoder
        decoder_embeddings = tf.Variable(tf.random_uniform([to_dict_size, embedded_size], -1, 1))
        decoder_cells = tf.nn.rnn_cell.MultiRNNCell(cells(size_layer))
        dense_layer = tf.layers.Dense(to_dict_size)
        
        training_helper = tf.contrib.seq2seq.TrainingHelper(
                inputs = tf.nn.embedding_lookup(decoder_embeddings, decoder_input),
                sequence_length = self.Y_seq_len,
                time_major = False)
        training_decoder = tf.contrib.seq2seq.BasicDecoder(
                cell = decoder_cells,
                helper = training_helper,
                initial_state = self.encoder_state,
                output_layer = dense_layer)
        training_decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(
                decoder = training_decoder,
                impute_finished = True,
                maximum_iterations = tf.reduce_max(self.Y_seq_len))
        
        predicting_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
                embedding = decoder_embeddings,
                start_tokens = tf.tile(tf.constant([GO], dtype=tf.int32), [batch_size]),
                end_token = EOS)
        predicting_decoder = tf.contrib.seq2seq.BasicDecoder(
                cell = decoder_cells,
                helper = predicting_helper,
                initial_state = self.encoder_state,
                output_layer = dense_layer)
        predicting_decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(
                decoder = predicting_decoder,
                impute_finished = True,
                maximum_iterations = tf.reduce_max(self.X_seq_len))
        self.training_logits = training_decoder_output.rnn_output
        self.predicting_ids = predicting_decoder_output.sample_id
        masks = tf.sequence_mask(self.Y_seq_len, tf.reduce_max(self.Y_seq_len), dtype=tf.float32)
        self.cost = tf.contrib.seq2seq.sequence_loss(logits = self.training_logits,
                                                     targets = self.Y,
                                                     weights = masks)
        self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize(self.cost)
        y_t = tf.argmax(self.training_logits,axis=2)
        y_t = tf.cast(y_t, tf.int32)
        self.prediction = tf.boolean_mask(y_t, masks)
        mask_label = tf.boolean_mask(self.Y, masks)
        correct_pred = tf.equal(self.prediction, mask_label)
        correct_index = tf.cast(correct_pred, tf.float32)
        self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [11]:
size_layer = 512
num_layers = 3
learning_rate = 1e-4
batch_size = 96
epoch = 20

In [12]:
tf.reset_default_graph()
sess = tf.InteractiveSession()
model = Translator(len(dictionary_from), len(dictionary_to), size_layer, num_layers, learning_rate)
sess.run(tf.global_variables_initializer())

Instructions for updating:
reduction_indices is deprecated, use axis instead
Instructions for updating:
This class is equivalent as tf.keras.layers.GRUCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
This class is equivalent as tf.keras.layers.StackedRNNCells, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.Bidirectional(keras.layers.RNN(cell))`, which is equivalent to this API
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as 

In [13]:
def str_idx(corpus, dic):
    X = []
    for i in corpus:
        ints = []
        for k in i.split():
            ints.append(dic.get(k,UNK))
        X.append(ints)
    return X

def pad_sentence_batch(sentence_batch, pad_int):
    padded_seqs = []
    seq_lens = []
    max_sentence_len = max([len(sentence) for sentence in sentence_batch])
    for sentence in sentence_batch:
        padded_seqs.append(sentence + [pad_int] * (max_sentence_len - len(sentence)))
        seq_lens.append(len(sentence))
    return padded_seqs, seq_lens

In [14]:
train_X = str_idx(train_X, dictionary_from)
test_X = str_idx(test_X, dictionary_from)
train_Y = str_idx(train_Y, dictionary_to)
test_Y = str_idx(test_Y, dictionary_to)

In [15]:
sess.run(model.predicting_ids, feed_dict = {model.X: [train_X[0]]}).shape

(1, 10)

In [16]:
import tqdm

for e in range(epoch):
    pbar = tqdm.tqdm(
        range(0, len(train_X), batch_size), desc = 'minibatch loop')
    train_loss, train_acc, test_loss, test_acc = [], [], [], []
    for i in pbar:
        index = min(i + batch_size, len(train_X))
        maxlen = max([len(s) for s in train_X[i : index] + train_Y[i : index]])
        batch_x, seq_x = pad_sentence_batch(train_X[i : index], PAD)
        batch_y, seq_y = pad_sentence_batch(train_Y[i : index], PAD)
        feed = {model.X: batch_x,
                model.Y: batch_y}
        accuracy, loss, _ = sess.run([model.accuracy,model.cost,model.optimizer],
                                    feed_dict = feed)
        train_loss.append(loss)
        train_acc.append(accuracy)
        pbar.set_postfix(cost = loss, accuracy = accuracy)
    
    
    pbar = tqdm.tqdm(
        range(0, len(test_X), batch_size), desc = 'minibatch loop')
    for i in pbar:
        index = min(i + batch_size, len(test_X))
        batch_x, seq_x = pad_sentence_batch(test_X[i : index], PAD)
        batch_y, seq_y = pad_sentence_batch(test_Y[i : index], PAD)
        feed = {model.X: batch_x,
                model.Y: batch_y,}
        accuracy, loss = sess.run([model.accuracy,model.cost],
                                    feed_dict = feed)

        test_loss.append(loss)
        test_acc.append(accuracy)
        pbar.set_postfix(cost = loss, accuracy = accuracy)
    
    print('epoch %d, training avg loss %f, training avg acc %f'%(e+1,
                                                                 np.mean(train_loss),np.mean(train_acc)))
    print('epoch %d, testing avg loss %f, testing avg acc %f'%(e+1,
                                                              np.mean(test_loss),np.mean(test_acc)))

minibatch loop: 100%|██████████| 1389/1389 [29:25<00:00,  1.27s/it, accuracy=0.176, cost=5.36]
minibatch loop: 100%|██████████| 30/30 [00:12<00:00,  2.39it/s, accuracy=0.177, cost=5.4] 
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s]

epoch 1, training avg loss 5.631867, training avg acc 0.143789
epoch 1, testing avg loss 4.847403, testing avg acc 0.219278


minibatch loop: 100%|██████████| 1389/1389 [29:42<00:00,  1.28s/it, accuracy=0.229, cost=4.71]
minibatch loop: 100%|██████████| 30/30 [00:12<00:00,  2.37it/s, accuracy=0.221, cost=4.94]
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s]

epoch 2, training avg loss 4.483749, training avg acc 0.260534
epoch 2, testing avg loss 4.345735, testing avg acc 0.273585


minibatch loop: 100%|██████████| 1389/1389 [29:14<00:00,  1.26s/it, accuracy=0.266, cost=4.33]
minibatch loop: 100%|██████████| 30/30 [00:11<00:00,  2.52it/s, accuracy=0.242, cost=4.7] 
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s]

epoch 3, training avg loss 4.086748, training avg acc 0.301338
epoch 3, testing avg loss 4.105556, testing avg acc 0.298383


minibatch loop: 100%|██████████| 1389/1389 [28:51<00:00,  1.25s/it, accuracy=0.291, cost=4.02]
minibatch loop: 100%|██████████| 30/30 [00:11<00:00,  2.50it/s, accuracy=0.259, cost=4.54]
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s]

epoch 4, training avg loss 3.840827, training avg acc 0.326301
epoch 4, testing avg loss 3.955237, testing avg acc 0.315165


minibatch loop: 100%|██████████| 1389/1389 [29:21<00:00,  1.27s/it, accuracy=0.317, cost=3.76]
minibatch loop: 100%|██████████| 30/30 [00:12<00:00,  2.44it/s, accuracy=0.278, cost=4.44]
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s]

epoch 5, training avg loss 3.656775, training avg acc 0.345212
epoch 5, testing avg loss 3.850616, testing avg acc 0.328469


minibatch loop: 100%|██████████| 1389/1389 [29:35<00:00,  1.28s/it, accuracy=0.343, cost=3.52]
minibatch loop: 100%|██████████| 30/30 [00:12<00:00,  2.42it/s, accuracy=0.279, cost=4.38]
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s]

epoch 6, training avg loss 3.507161, training avg acc 0.360993
epoch 6, testing avg loss 3.774542, testing avg acc 0.337473


minibatch loop: 100%|██████████| 1389/1389 [30:03<00:00,  1.30s/it, accuracy=0.375, cost=3.29]
minibatch loop: 100%|██████████| 30/30 [00:12<00:00,  2.41it/s, accuracy=0.285, cost=4.33]
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s]

epoch 7, training avg loss 3.379327, training avg acc 0.374931
epoch 7, testing avg loss 3.719304, testing avg acc 0.344916


minibatch loop: 100%|██████████| 1389/1389 [29:35<00:00,  1.28s/it, accuracy=0.402, cost=3.08]
minibatch loop: 100%|██████████| 30/30 [00:12<00:00,  2.42it/s, accuracy=0.291, cost=4.29]
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s]

epoch 8, training avg loss 3.266296, training avg acc 0.387727
epoch 8, testing avg loss 3.679967, testing avg acc 0.349609


minibatch loop: 100%|██████████| 1389/1389 [30:08<00:00,  1.30s/it, accuracy=0.436, cost=2.88]
minibatch loop: 100%|██████████| 30/30 [00:12<00:00,  2.43it/s, accuracy=0.291, cost=4.27]
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s]

epoch 9, training avg loss 3.164171, training avg acc 0.400009
epoch 9, testing avg loss 3.654120, testing avg acc 0.352346


minibatch loop: 100%|██████████| 1389/1389 [29:34<00:00,  1.28s/it, accuracy=0.47, cost=2.69] 
minibatch loop: 100%|██████████| 30/30 [00:12<00:00,  2.43it/s, accuracy=0.296, cost=4.25]
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s]

epoch 10, training avg loss 3.070552, training avg acc 0.411651
epoch 10, testing avg loss 3.639802, testing avg acc 0.353985


minibatch loop: 100%|██████████| 1389/1389 [29:58<00:00,  1.29s/it, accuracy=0.492, cost=2.52]
minibatch loop: 100%|██████████| 30/30 [00:12<00:00,  2.45it/s, accuracy=0.296, cost=4.24]
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s]

epoch 11, training avg loss 2.984185, training avg acc 0.422534
epoch 11, testing avg loss 3.638915, testing avg acc 0.354690


minibatch loop: 100%|██████████| 1389/1389 [29:32<00:00,  1.28s/it, accuracy=0.517, cost=2.36]
minibatch loop: 100%|██████████| 30/30 [00:12<00:00,  2.45it/s, accuracy=0.289, cost=4.24]
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s]

epoch 12, training avg loss 2.906425, training avg acc 0.432674
epoch 12, testing avg loss 3.661523, testing avg acc 0.350648


minibatch loop: 100%|██████████| 1389/1389 [29:32<00:00,  1.28s/it, accuracy=0.545, cost=2.23]
minibatch loop: 100%|██████████| 30/30 [00:12<00:00,  2.41it/s, accuracy=0.299, cost=4.22]
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s]

epoch 13, training avg loss 2.833118, training avg acc 0.442450
epoch 13, testing avg loss 3.646881, testing avg acc 0.353409


minibatch loop: 100%|██████████| 1389/1389 [29:33<00:00,  1.28s/it, accuracy=0.563, cost=2.08]
minibatch loop: 100%|██████████| 30/30 [00:12<00:00,  2.40it/s, accuracy=0.301, cost=4.21]
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s]

epoch 14, training avg loss 2.759087, training avg acc 0.452881
epoch 14, testing avg loss 3.631170, testing avg acc 0.356292


minibatch loop: 100%|██████████| 1389/1389 [29:58<00:00,  1.29s/it, accuracy=0.586, cost=1.94]
minibatch loop: 100%|██████████| 30/30 [00:12<00:00,  2.46it/s, accuracy=0.294, cost=4.22]
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s]

epoch 15, training avg loss 2.687887, training avg acc 0.463277
epoch 15, testing avg loss 3.640316, testing avg acc 0.356179


minibatch loop: 100%|██████████| 1389/1389 [29:30<00:00,  1.27s/it, accuracy=0.609, cost=1.82]
minibatch loop: 100%|██████████| 30/30 [00:12<00:00,  2.45it/s, accuracy=0.288, cost=4.25]
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s]

epoch 16, training avg loss 2.621819, training avg acc 0.473107
epoch 16, testing avg loss 3.672867, testing avg acc 0.352550


minibatch loop: 100%|██████████| 1389/1389 [29:32<00:00,  1.28s/it, accuracy=0.63, cost=1.71] 
minibatch loop: 100%|██████████| 30/30 [00:12<00:00,  2.47it/s, accuracy=0.274, cost=4.3] 
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s]

epoch 17, training avg loss 2.562509, training avg acc 0.481904
epoch 17, testing avg loss 3.732360, testing avg acc 0.345783


minibatch loop: 100%|██████████| 1389/1389 [29:23<00:00,  1.27s/it, accuracy=0.657, cost=1.62]
minibatch loop: 100%|██████████| 30/30 [00:12<00:00,  2.44it/s, accuracy=0.272, cost=4.33]
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s]

epoch 18, training avg loss 2.506106, training avg acc 0.490235
epoch 18, testing avg loss 3.779493, testing avg acc 0.341548


minibatch loop: 100%|██████████| 1389/1389 [29:22<00:00,  1.27s/it, accuracy=0.67, cost=1.54] 
minibatch loop: 100%|██████████| 30/30 [00:12<00:00,  2.45it/s, accuracy=0.271, cost=4.38]
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s]

epoch 19, training avg loss 2.446556, training avg acc 0.499697
epoch 19, testing avg loss 3.808757, testing avg acc 0.341008


minibatch loop: 100%|██████████| 1389/1389 [29:23<00:00,  1.27s/it, accuracy=0.695, cost=1.46]
minibatch loop: 100%|██████████| 30/30 [00:12<00:00,  2.48it/s, accuracy=0.28, cost=4.43] 

epoch 20, training avg loss 2.387406, training avg acc 0.509469
epoch 20, testing avg loss 3.823472, testing avg acc 0.343508





In [17]:
rev_dictionary_to = {int(k): v for k, v in rev_dictionary_to.items()}

In [18]:
test_size = 20

batch_x, seq_x = pad_sentence_batch(test_X[: test_size], PAD)
batch_y, seq_y = pad_sentence_batch(test_Y[: test_size], PAD)
feed = {model.X: batch_x}
logits = sess.run(model.predicting_ids, feed_dict = feed)
logits.shape

(20, 99)

In [19]:
rejected = ['PAD', 'EOS', 'UNK', 'GO']

for i in range(test_size):
    predict = [rev_dictionary_to[i] for i in logits[i] if rev_dictionary_to[i] not in rejected]
    actual = [rev_dictionary_to[i] for i in batch_y[i] if rev_dictionary_to[i] not in rejected]
    print(i, 'predict:', ' '.join(predict))
    print(i, 'actual:', ' '.join(actual))
    print()

0 predict: Làm sao để tôi nhớ lại <NUM> năm trước đây , như cô ấy đã làm được điều gì đó đã xảy ra với tôi , một số năm về trước , cô ấy đã làm việc với những người khác và vì thế cô ấy đã dành cả ngày để sống với cô ấy , trong khi khoảng <NUM> triệu năm tuổi cô ấy đã trở nên tốt hơn và cô ấy có thể sống sót và cô ấy đã bị cô lập trong một gia đình không ? Trong khi cô ấy đi qua và nói chuyện
0 actual: Làm sao tôi có thể trình bày trong <NUM> phút về sợi dây liên kết những người phụ nữ qua ba thế hệ , về việc làm thế nào những sợi dây mạnh mẽ đáng kinh ngạc ấy đã níu chặt lấy cuộc sống của một cô bé bốn tuổi co quắp với đứa em gái nhỏ của cô bé , với mẹ và bà trong suốt năm ngày đêm trên con thuyền nhỏ lênh đênh trên Biển Đông hơn <NUM> năm trước , những sợi dây liên kết đã níu lấy cuộc đời cô bé ấy và không bao giờ rời đi - - cô bé ấy giờ sống ở San Francisco và đang nói chuyện với các bạn hôm nay ?

1 predict: Không phải câu chuyện đó . . . . . . . . . . . . . . . . . . . . . . . . .