In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '3'

In [2]:
import numpy as np
import tensorflow as tf

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [3]:
import json

with open('train-test.json') as fopen:
    dataset = json.load(fopen)
    
with open('dictionary.json') as fopen:
    dictionary = json.load(fopen)

In [4]:
train_X = dataset['train_X']
train_Y = dataset['train_Y']
test_X = dataset['test_X']
test_Y = dataset['test_Y']

In [5]:
dictionary.keys()

dict_keys(['from', 'to'])

In [6]:
dictionary_from = dictionary['from']['dictionary']
rev_dictionary_from = dictionary['from']['rev_dictionary']

dictionary_to = dictionary['to']['dictionary']
rev_dictionary_to = dictionary['to']['rev_dictionary']

In [7]:
GO = dictionary_from['GO']
PAD = dictionary_from['PAD']
EOS = dictionary_from['EOS']
UNK = dictionary_from['UNK']

In [8]:
for i in range(len(train_X)):
    train_X[i] += ' EOS'
    
train_X[0]

'Rachel Pike : The science behind a climate headline EOS'

In [9]:
for i in range(len(test_X)):
    test_X[i] += ' EOS'
    
test_X[0]

'How can I speak in <NUM> minutes about the bonds of women over three generations , about how the astonishing strength of those bonds took hold in the life of a four - year - old girl huddled with her young sister , her mother and her grandmother for five days and nights in a small boat in the China Sea more than <NUM> years ago , bonds that took hold in the life of that small girl and never let go - - that small girl now living in San Francisco and speaking to you today ? EOS'

In [10]:
def pad_second_dim(x, desired_size):
    padding = tf.tile([[[0.0]]], tf.stack([tf.shape(x)[0], desired_size - tf.shape(x)[1], tf.shape(x)[2]], 0))
    return tf.concat([x, padding], 1)

def encoder_block(inp, n_hidden, filter_size):
    inp = tf.pad(inp, [[0, 0], [(filter_size[0]-1)//2, (filter_size[0]-1)//2], [0, 0]])
    conv = tf.layers.conv1d(inp, n_hidden, filter_size, padding="VALID", activation=None)
    return conv

def decoder_block(inp, n_hidden, filter_size):
    inp = tf.pad(inp, [[0, 0], [filter_size[0]-1, 0], [0, 0]])
    conv = tf.layers.conv1d(inp, n_hidden, filter_size, padding="VALID", activation=None)
    return conv

def glu(x):
    return tf.multiply(x[:, :, :tf.shape(x)[2]//2], tf.sigmoid(x[:, :, tf.shape(x)[2]//2:]))

def layer(inp, conv_block, kernel_width, n_hidden, residual=None):
    z = conv_block(inp, n_hidden, (kernel_width,))
    return glu(z) + (residual if residual is not None else 0)

def sinusoidal_position_encoding(inputs, mask, repr_dim):
    T = tf.shape(inputs)[1]
    pos = tf.reshape(tf.range(0.0, tf.to_float(T), dtype=tf.float32), [-1, 1])
    i = np.arange(0, repr_dim, 2, np.float32)
    denom = np.reshape(np.power(10000.0, i / repr_dim), [1, -1])
    enc = tf.expand_dims(tf.concat([tf.sin(pos / denom), tf.cos(pos / denom)], 1), 0)
    return tf.tile(enc, [tf.shape(inputs)[0], 1, 1]) * tf.expand_dims(tf.to_float(mask), -1)

class Translator:
    def __init__(self, from_dict_size, to_dict_size, size_layer, num_layers,
                 learning_rate, n_attn_heads = 16, beam_width = 5):
        
        self.X = tf.placeholder(tf.int32, [None, None])
        self.Y = tf.placeholder(tf.int32, [None, None])
        self.X_seq_len = tf.count_nonzero(self.X, 1, dtype=tf.int32)
        self.Y_seq_len = tf.count_nonzero(self.Y, 1, dtype=tf.int32)
        batch_size = tf.shape(self.X)[0]
        
        encoder_embedding = tf.Variable(tf.random_uniform([from_dict_size, size_layer], -1, 1))
        decoder_embedding = tf.Variable(tf.random_uniform([to_dict_size, size_layer], -1, 1))
        
        main = tf.strided_slice(self.Y, [0, 0], [batch_size, -1], [1, 1])
        decoder_input = tf.concat([tf.fill([batch_size, 1], GO), main], 1)
        
        encoder_embedded = tf.nn.embedding_lookup(encoder_embedding, self.X)
        en_masks = tf.sign(self.X)
        encoder_embedded += sinusoidal_position_encoding(self.X, en_masks, size_layer)
        
        e = tf.identity(encoder_embedded)
        for i in range(num_layers):
            z = layer(encoder_embedded, encoder_block, 3, size_layer * 2, encoder_embedded)
            encoder_embedded = z
        
        encoder_output, output_memory = z, z + e
        
        vocab_proj = tf.layers.Dense(len(dictionary_to))
        init_state = tf.reduce_mean(output_memory,axis=1)
        cell = tf.nn.rnn_cell.LSTMCell(size_layer)
        helper = tf.contrib.seq2seq.TrainingHelper(
            inputs = tf.nn.embedding_lookup(decoder_embedding, decoder_input),
            sequence_length = tf.to_int32(self.Y_seq_len))
        encoder_state = tf.nn.rnn_cell.LSTMStateTuple(c=init_state, h=init_state)
        decoder = tf.contrib.seq2seq.BasicDecoder(cell = cell,
                                                  helper = helper,
                                                  initial_state = encoder_state,
                                                  output_layer = vocab_proj)
        decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(decoder = decoder,
                                                                maximum_iterations = tf.reduce_max(self.Y_seq_len))
        
        helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(embedding = decoder_embedding,
                                                          start_tokens = tf.tile(
                                                              tf.constant([GO], 
                                                                          dtype=tf.int32), 
                                                              [tf.shape(init_state)[0]]),
                                                          end_token = EOS)
        decoder = tf.contrib.seq2seq.BasicDecoder(
            cell = cell,
            helper = helper,
            initial_state = encoder_state,
            output_layer = vocab_proj)
        predicting_decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(
            decoder = decoder,
            maximum_iterations = tf.reduce_max(self.X_seq_len))
        self.training_logits = decoder_output.rnn_output
        self.predicting_ids = predicting_decoder_output.sample_id
        self.logits = decoder_output.sample_id
        masks = tf.sequence_mask(self.Y_seq_len, tf.reduce_max(self.Y_seq_len), dtype=tf.float32)
        self.cost = tf.contrib.seq2seq.sequence_loss(logits = self.training_logits,
                                                     targets = self.Y,
                                                     weights = masks)
        self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize(self.cost)
        y_t = tf.argmax(self.training_logits,axis=2)
        y_t = tf.cast(y_t, tf.int32)
        self.prediction = tf.boolean_mask(y_t, masks)
        mask_label = tf.boolean_mask(self.Y, masks)
        correct_pred = tf.equal(self.prediction, mask_label)
        correct_index = tf.cast(correct_pred, tf.float32)
        self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [11]:
size_layer = 512
num_layers = 4
learning_rate = 1e-4
batch_size = 96
epoch = 20

In [12]:
tf.reset_default_graph()
sess = tf.InteractiveSession()
model = Translator(len(dictionary_from), len(dictionary_to), size_layer, num_layers, learning_rate)
sess.run(tf.global_variables_initializer())

Instructions for updating:
reduction_indices is deprecated, use axis instead
Instructions for updating:
Use `tf.cast` instead.
Instructions for updating:
Use `tf.keras.layers.Conv1D` instead.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
Use `tf.cast` instead.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Us

In [13]:
def str_idx(corpus, dic):
    X = []
    for i in corpus:
        ints = []
        for k in i.split():
            ints.append(dic.get(k,UNK))
        X.append(ints)
    return X

def pad_sentence_batch(sentence_batch, pad_int):
    padded_seqs = []
    seq_lens = []
    max_sentence_len = max([len(sentence) for sentence in sentence_batch])
    for sentence in sentence_batch:
        padded_seqs.append(sentence + [pad_int] * (max_sentence_len - len(sentence)))
        seq_lens.append(len(sentence))
    return padded_seqs, seq_lens

In [14]:
train_X = str_idx(train_X, dictionary_from)
test_X = str_idx(test_X, dictionary_from)
train_Y = str_idx(train_Y, dictionary_to)
test_Y = str_idx(test_Y, dictionary_to)

In [15]:
sess.run(model.predicting_ids, feed_dict = {model.X: [train_X[0]]}).shape

(1, 10)

In [16]:
import tqdm

for e in range(epoch):
    pbar = tqdm.tqdm(
        range(0, len(train_X), batch_size), desc = 'minibatch loop')
    train_loss, train_acc, test_loss, test_acc = [], [], [], []
    for i in pbar:
        index = min(i + batch_size, len(train_X))
        maxlen = max([len(s) for s in train_X[i : index] + train_Y[i : index]])
        batch_x, seq_x = pad_sentence_batch(train_X[i : index], PAD)
        batch_y, seq_y = pad_sentence_batch(train_Y[i : index], PAD)
        feed = {model.X: batch_x,
                model.Y: batch_y}
        accuracy, loss, _ = sess.run([model.accuracy,model.cost,model.optimizer],
                                    feed_dict = feed)
        train_loss.append(loss)
        train_acc.append(accuracy)
        pbar.set_postfix(cost = loss, accuracy = accuracy)
    
    
    pbar = tqdm.tqdm(
        range(0, len(test_X), batch_size), desc = 'minibatch loop')
    for i in pbar:
        index = min(i + batch_size, len(test_X))
        batch_x, seq_x = pad_sentence_batch(test_X[i : index], PAD)
        batch_y, seq_y = pad_sentence_batch(test_Y[i : index], PAD)
        feed = {model.X: batch_x,
                model.Y: batch_y,}
        accuracy, loss = sess.run([model.accuracy,model.cost],
                                    feed_dict = feed)

        test_loss.append(loss)
        test_acc.append(accuracy)
        pbar.set_postfix(cost = loss, accuracy = accuracy)
    
    print('epoch %d, training avg loss %f, training avg acc %f'%(e+1,
                                                                 np.mean(train_loss),np.mean(train_acc)))
    print('epoch %d, testing avg loss %f, testing avg acc %f'%(e+1,
                                                              np.mean(test_loss),np.mean(test_acc)))

minibatch loop: 100%|██████████| 1389/1389 [05:44<00:00,  4.03it/s, accuracy=0.097, cost=6.24] 
minibatch loop: 100%|██████████| 30/30 [00:03<00:00,  8.94it/s, accuracy=0.132, cost=6.15]
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s, accuracy=0.116, cost=5.83]

epoch 1, training avg loss 6.314159, training avg acc 0.073371
epoch 1, testing avg loss 5.815233, testing avg acc 0.118221


minibatch loop: 100%|██████████| 1389/1389 [05:52<00:00,  3.94it/s, accuracy=0.142, cost=5.66]
minibatch loop: 100%|██████████| 30/30 [00:03<00:00,  9.30it/s, accuracy=0.165, cost=5.71]
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s, accuracy=0.169, cost=5.23]

epoch 2, training avg loss 5.473304, training avg acc 0.151520
epoch 2, testing avg loss 5.258025, testing avg acc 0.173030


minibatch loop: 100%|██████████| 1389/1389 [05:37<00:00,  4.11it/s, accuracy=0.175, cost=5.33]
minibatch loop: 100%|██████████| 30/30 [00:03<00:00,  9.27it/s, accuracy=0.175, cost=5.46]
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s, accuracy=0.2, cost=4.9]

epoch 3, training avg loss 5.030872, training avg acc 0.195301
epoch 3, testing avg loss 4.940977, testing avg acc 0.205318


minibatch loop: 100%|██████████| 1389/1389 [05:37<00:00,  4.11it/s, accuracy=0.197, cost=5.1] 
minibatch loop: 100%|██████████| 30/30 [00:03<00:00,  9.30it/s, accuracy=0.182, cost=5.3] 
minibatch loop:   0%|          | 1/1389 [00:00<04:35,  5.05it/s, accuracy=0.232, cost=4.67]

epoch 4, training avg loss 4.751232, training avg acc 0.225746
epoch 4, testing avg loss 4.733564, testing avg acc 0.227314


minibatch loop: 100%|██████████| 1389/1389 [05:52<00:00,  3.94it/s, accuracy=0.212, cost=4.9] 
minibatch loop: 100%|██████████| 30/30 [00:03<00:00,  9.11it/s, accuracy=0.194, cost=5.17]
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s]

epoch 5, training avg loss 4.550449, training avg acc 0.247681
epoch 5, testing avg loss 4.582543, testing avg acc 0.245636


minibatch loop: 100%|██████████| 1389/1389 [05:41<00:00,  4.07it/s, accuracy=0.224, cost=4.74]
minibatch loop: 100%|██████████| 30/30 [00:03<00:00,  9.02it/s, accuracy=0.21, cost=5.06] 
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s]

epoch 6, training avg loss 4.399148, training avg acc 0.263987
epoch 6, testing avg loss 4.464551, testing avg acc 0.259171


minibatch loop: 100%|██████████| 1389/1389 [05:42<00:00,  4.05it/s, accuracy=0.225, cost=4.61]
minibatch loop: 100%|██████████| 30/30 [00:03<00:00,  8.92it/s, accuracy=0.222, cost=4.97]
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s]

epoch 7, training avg loss 4.277634, training avg acc 0.276880
epoch 7, testing avg loss 4.373359, testing avg acc 0.269750


minibatch loop: 100%|██████████| 1389/1389 [05:43<00:00,  4.05it/s, accuracy=0.245, cost=4.49]
minibatch loop: 100%|██████████| 30/30 [00:03<00:00,  9.04it/s, accuracy=0.228, cost=4.91]
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s, accuracy=0.28, cost=4.16]

epoch 8, training avg loss 4.174403, training avg acc 0.287856
epoch 8, testing avg loss 4.295752, testing avg acc 0.278947


minibatch loop: 100%|██████████| 1389/1389 [05:56<00:00,  3.90it/s, accuracy=0.257, cost=4.39]
minibatch loop: 100%|██████████| 30/30 [00:03<00:00,  9.03it/s, accuracy=0.239, cost=4.85]
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s]

epoch 9, training avg loss 4.084603, training avg acc 0.297128
epoch 9, testing avg loss 4.227735, testing avg acc 0.286975


minibatch loop: 100%|██████████| 1389/1389 [06:10<00:00,  3.75it/s, accuracy=0.269, cost=4.29]
minibatch loop: 100%|██████████| 30/30 [00:03<00:00,  9.01it/s, accuracy=0.247, cost=4.79]
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s, accuracy=0.299, cost=4]

epoch 10, training avg loss 4.005041, training avg acc 0.305592
epoch 10, testing avg loss 4.172348, testing avg acc 0.292998


minibatch loop: 100%|██████████| 1389/1389 [05:43<00:00,  4.05it/s, accuracy=0.273, cost=4.2] 
minibatch loop: 100%|██████████| 30/30 [00:03<00:00,  8.97it/s, accuracy=0.243, cost=4.74]
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s]

epoch 11, training avg loss 3.933362, training avg acc 0.313303
epoch 11, testing avg loss 4.122051, testing avg acc 0.299210


minibatch loop: 100%|██████████| 1389/1389 [05:43<00:00,  4.05it/s, accuracy=0.278, cost=4.11]
minibatch loop: 100%|██████████| 30/30 [00:03<00:00,  9.12it/s, accuracy=0.247, cost=4.7] 
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s]

epoch 12, training avg loss 3.867816, training avg acc 0.320256
epoch 12, testing avg loss 4.077651, testing avg acc 0.304496


minibatch loop: 100%|██████████| 1389/1389 [05:42<00:00,  4.06it/s, accuracy=0.29, cost=4.02] 
minibatch loop: 100%|██████████| 30/30 [00:03<00:00,  9.12it/s, accuracy=0.249, cost=4.67]
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s, accuracy=0.317, cost=3.82]

epoch 13, training avg loss 3.807164, training avg acc 0.326946
epoch 13, testing avg loss 4.037922, testing avg acc 0.309011


minibatch loop: 100%|██████████| 1389/1389 [05:41<00:00,  4.06it/s, accuracy=0.29, cost=3.93] 
minibatch loop: 100%|██████████| 30/30 [00:03<00:00,  9.13it/s, accuracy=0.251, cost=4.64]
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s]

epoch 14, training avg loss 3.750871, training avg acc 0.333028
epoch 14, testing avg loss 4.002204, testing avg acc 0.313677


minibatch loop: 100%|██████████| 1389/1389 [05:42<00:00,  4.05it/s, accuracy=0.305, cost=3.84]
minibatch loop: 100%|██████████| 30/30 [00:03<00:00,  9.08it/s, accuracy=0.25, cost=4.61] 
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s]

epoch 15, training avg loss 3.698038, training avg acc 0.338569
epoch 15, testing avg loss 3.971979, testing avg acc 0.317617


minibatch loop: 100%|██████████| 1389/1389 [05:43<00:00,  4.05it/s, accuracy=0.305, cost=3.76]
minibatch loop: 100%|██████████| 30/30 [00:03<00:00,  9.08it/s, accuracy=0.251, cost=4.59]
minibatch loop:   0%|          | 1/1389 [00:00<04:35,  5.04it/s, accuracy=0.342, cost=3.66]

epoch 16, training avg loss 3.648497, training avg acc 0.343987
epoch 16, testing avg loss 3.942400, testing avg acc 0.321354


minibatch loop: 100%|██████████| 1389/1389 [05:43<00:00,  4.04it/s, accuracy=0.312, cost=3.67]
minibatch loop: 100%|██████████| 30/30 [00:03<00:00,  8.96it/s, accuracy=0.258, cost=4.56]
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s]

epoch 17, training avg loss 3.601849, training avg acc 0.349063
epoch 17, testing avg loss 3.915208, testing avg acc 0.324719


minibatch loop: 100%|██████████| 1389/1389 [05:44<00:00,  4.03it/s, accuracy=0.323, cost=3.59]
minibatch loop: 100%|██████████| 30/30 [00:03<00:00,  9.11it/s, accuracy=0.257, cost=4.54]
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s, accuracy=0.352, cost=3.57]

epoch 18, training avg loss 3.557607, training avg acc 0.353985
epoch 18, testing avg loss 3.894269, testing avg acc 0.327735


minibatch loop: 100%|██████████| 1389/1389 [05:43<00:00,  4.04it/s, accuracy=0.336, cost=3.52]
minibatch loop: 100%|██████████| 30/30 [00:03<00:00,  9.02it/s, accuracy=0.258, cost=4.53]
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s]

epoch 19, training avg loss 3.515697, training avg acc 0.358559
epoch 19, testing avg loss 3.876340, testing avg acc 0.329096


minibatch loop: 100%|██████████| 1389/1389 [05:44<00:00,  4.03it/s, accuracy=0.336, cost=3.45]
minibatch loop: 100%|██████████| 30/30 [00:03<00:00,  8.93it/s, accuracy=0.258, cost=4.52]

epoch 20, training avg loss 3.476228, training avg acc 0.363063
epoch 20, testing avg loss 3.863040, testing avg acc 0.329069





In [17]:
rev_dictionary_to = {int(k): v for k, v in rev_dictionary_to.items()}

In [18]:
test_size = 20

batch_x, seq_x = pad_sentence_batch(test_X[: test_size], PAD)
batch_y, seq_y = pad_sentence_batch(test_Y[: test_size], PAD)
feed = {model.X: batch_x}
logits = sess.run(model.predicting_ids, feed_dict = feed)
logits.shape

(20, 99)

In [19]:
rejected = ['PAD', 'EOS', 'UNK', 'GO']

for i in range(test_size):
    predict = [rev_dictionary_to[i] for i in logits[i] if rev_dictionary_to[i] not in rejected]
    actual = [rev_dictionary_to[i] for i in batch_y[i] if rev_dictionary_to[i] not in rejected]
    print(i, 'predict:', ' '.join(predict))
    print(i, 'actual:', ' '.join(actual))
    print()

0 predict: Tôi có thể nói về việc làm việc trong một năm <NUM> - - một trong những người đàn ông đã làm việc trong suốt cuộc đời mình , và sau đó , ông ấy đã viết về một bài nói chuyện về nhà , và một trong những người đàn ông đã nói rằng , " Anh đã được dạy cho tôi , và một trong những người phụ nữ đã từng nói với họ , " Tại sao ? " , và họ đã làm việc với <NUM> người khác , và khi nào đó có thể
0 actual: Làm sao tôi có thể trình bày trong <NUM> phút về sợi dây liên kết những người phụ nữ qua ba thế hệ , về việc làm thế nào những sợi dây mạnh mẽ đáng kinh ngạc ấy đã níu chặt lấy cuộc sống của một cô bé bốn tuổi co quắp với đứa em gái nhỏ của cô bé , với mẹ và bà trong suốt năm ngày đêm trên con thuyền nhỏ lênh đênh trên Biển Đông hơn <NUM> năm trước , những sợi dây liên kết đã níu lấy cuộc đời cô bé ấy và không bao giờ rời đi - - cô bé ấy giờ sống ở San Francisco và đang nói chuyện với các bạn hôm nay ?

1 predict: Đây không phải là một câu chuyện . . . . . . . . . . . . . . . . . . .