In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '3'

In [2]:
import numpy as np
import tensorflow as tf

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [3]:
import json

with open('train-test.json') as fopen:
    dataset = json.load(fopen)
    
with open('dictionary.json') as fopen:
    dictionary = json.load(fopen)

In [4]:
train_X = dataset['train_X']
train_Y = dataset['train_Y']
test_X = dataset['test_X']
test_Y = dataset['test_Y']

In [5]:
dictionary.keys()

dict_keys(['from', 'to'])

In [6]:
dictionary_from = dictionary['from']['dictionary']
rev_dictionary_from = dictionary['from']['rev_dictionary']

dictionary_to = dictionary['to']['dictionary']
rev_dictionary_to = dictionary['to']['rev_dictionary']

In [7]:
GO = dictionary_from['GO']
PAD = dictionary_from['PAD']
EOS = dictionary_from['EOS']
UNK = dictionary_from['UNK']

In [8]:
for i in range(len(train_X)):
    train_X[i] += ' EOS'
    
train_X[0]

'Rachel Pike : The science behind a climate headline EOS'

In [9]:
for i in range(len(test_X)):
    test_X[i] += ' EOS'
    
test_X[0]

'How can I speak in <NUM> minutes about the bonds of women over three generations , about how the astonishing strength of those bonds took hold in the life of a four - year - old girl huddled with her young sister , her mother and her grandmother for five days and nights in a small boat in the China Sea more than <NUM> years ago , bonds that took hold in the life of that small girl and never let go - - that small girl now living in San Francisco and speaking to you today ? EOS'

In [10]:
def pad_second_dim(x, desired_size):
    padding = tf.tile([[[0.0]]], tf.stack([tf.shape(x)[0], desired_size - tf.shape(x)[1], tf.shape(x)[2]], 0))
    return tf.concat([x, padding], 1)

class Translator:
    def __init__(self, size_layer, num_layers, embedded_size,
                 from_dict_size, to_dict_size, learning_rate, batch_size,
                 force_teaching_ratio = 0.5, beam_width = 10):
        
        def cells(reuse=False):
            return tf.nn.rnn_cell.GRUCell(size_layer, reuse=reuse)
        
        def attention(encoder_out, seq_len, reuse=False):
            attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(num_units = size_layer, 
                                                                    memory = encoder_out,
                                                                    memory_sequence_length = seq_len)
            return tf.contrib.seq2seq.AttentionWrapper(
            cell = tf.nn.rnn_cell.MultiRNNCell([cells(reuse) for _ in range(num_layers)]), 
                attention_mechanism = attention_mechanism,
                attention_layer_size = size_layer)
        
        self.X = tf.placeholder(tf.int32, [None, None])
        self.Y = tf.placeholder(tf.int32, [None, None])
        self.X_seq_len = tf.count_nonzero(self.X, 1, dtype=tf.int32)
        self.Y_seq_len = tf.count_nonzero(self.Y, 1, dtype=tf.int32)
        batch_size = tf.shape(self.X)[0]
        
        encoder_embedding = tf.Variable(tf.random_uniform([from_dict_size, embedded_size], -1, 1))
        decoder_embedding = tf.Variable(tf.random_uniform([to_dict_size, embedded_size], -1, 1))
        
        encoder_out, encoder_state = tf.nn.dynamic_rnn(
            cell = tf.nn.rnn_cell.MultiRNNCell([cells() for _ in range(num_layers)]), 
            inputs = tf.nn.embedding_lookup(encoder_embedding, self.X),
            sequence_length = self.X_seq_len,
            dtype = tf.float32)
        main = tf.strided_slice(self.Y, [0, 0], [batch_size, -1], [1, 1])
        decoder_input = tf.concat([tf.fill([batch_size, 1], GO), main], 1)
        dense = tf.layers.Dense(to_dict_size)
        decoder_cells = attention(encoder_out, self.X_seq_len)
        
        with tf.variable_scope('decode'):
            decoder_cell = attention(encoder_out, self.X_seq_len, reuse=False)
            main = tf.strided_slice(self.Y, [0, 0], [batch_size, -1], [1, 1])
            decoder_input = tf.concat([tf.fill([batch_size, 1], GO), main], 1)
            training_helper = tf.contrib.seq2seq.ScheduledEmbeddingTrainingHelper(
            inputs = tf.nn.embedding_lookup(decoder_embedding, decoder_input),
                sequence_length = self.Y_seq_len,
                embedding = decoder_embedding,
                sampling_probability = 1 - force_teaching_ratio,
                time_major = False)
            training_decoder = tf.contrib.seq2seq.BasicDecoder(
                cell = decoder_cell,
                helper = training_helper,
                initial_state = decoder_cell.zero_state(batch_size, tf.float32).clone(cell_state=encoder_state),
                output_layer = tf.layers.Dense(to_dict_size))
            training_decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(
                decoder = training_decoder,
                impute_finished = True,
                maximum_iterations = tf.reduce_max(self.Y_seq_len))
            self.training_logits = training_decoder_output.rnn_output
            
        with tf.variable_scope('decode', reuse=True):
            encoder_out_tiled = tf.contrib.seq2seq.tile_batch(encoder_out, beam_width)
            encoder_state_tiled = tf.contrib.seq2seq.tile_batch(encoder_state, beam_width)
            X_seq_len_tiled = tf.contrib.seq2seq.tile_batch(self.X_seq_len, beam_width)
            decoder_cell = attention(encoder_out_tiled, X_seq_len_tiled, reuse=True)
            predicting_decoder = tf.contrib.seq2seq.BeamSearchDecoder(
                cell = decoder_cell,
                embedding = decoder_embedding,
                start_tokens = tf.tile(tf.constant([GO], dtype=tf.int32), [batch_size]),
                end_token = EOS,
                initial_state = decoder_cell.zero_state(batch_size * beam_width, tf.float32).clone(
                    cell_state = encoder_state_tiled),
                beam_width = beam_width,
                output_layer = tf.layers.Dense(to_dict_size, _reuse=True),
                length_penalty_weight = 0.0)
            predicting_decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(
                decoder = predicting_decoder,
                impute_finished = False,
                maximum_iterations = tf.reduce_max(self.X_seq_len))
            self.predicting_ids = predicting_decoder_output.predicted_ids[:, :, 0]
        
        masks = tf.sequence_mask(self.Y_seq_len, tf.reduce_max(self.Y_seq_len), dtype=tf.float32)
        self.cost = tf.contrib.seq2seq.sequence_loss(logits = self.training_logits,
                                                     targets = self.Y,
                                                     weights = masks)
        self.optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(self.cost)
        y_t = tf.argmax(self.training_logits,axis=2)
        y_t = tf.cast(y_t, tf.int32)
        self.prediction = tf.boolean_mask(y_t, masks)
        mask_label = tf.boolean_mask(self.Y, masks)
        correct_pred = tf.equal(self.prediction, mask_label)
        correct_index = tf.cast(correct_pred, tf.float32)
        self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [11]:
size_layer = 512
num_layers = 2
embedded_size = 256
learning_rate = 1e-3
batch_size = 96
epoch = 20

In [12]:
tf.reset_default_graph()
sess = tf.InteractiveSession()
model = Translator(size_layer, num_layers, embedded_size, len(dictionary_from), 
                len(dictionary_to), learning_rate,batch_size)
sess.run(tf.global_variables_initializer())

W0904 01:46:51.543623 140669811308352 deprecation.py:506] From /home/husein/.local/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py:507: calling count_nonzero (from tensorflow.python.ops.math_ops) with axis is deprecated and will be removed in a future version.
Instructions for updating:
reduction_indices is deprecated, use axis instead
W0904 01:46:51.579996 140669811308352 deprecation.py:323] From <ipython-input-10-2d75e45f9bf6>:11: GRUCell.__init__ (from tensorflow.python.ops.rnn_cell_impl) is deprecated and will be removed in a future version.
Instructions for updating:
This class is equivalent as tf.keras.layers.GRUCell, and will be replaced by that in Tensorflow 2.0.
W0904 01:46:51.582330 140669811308352 deprecation.py:323] From <ipython-input-10-2d75e45f9bf6>:32: MultiRNNCell.__init__ (from tensorflow.python.ops.rnn_cell_impl) is deprecated and will be removed in a future version.
Instructions for updating:
This class is equivalent as tf.keras.layers.StackedRNNCe

In [13]:
def str_idx(corpus, dic):
    X = []
    for i in corpus:
        ints = []
        for k in i.split():
            ints.append(dic.get(k,UNK))
        X.append(ints)
    return X

def pad_sentence_batch(sentence_batch, pad_int):
    padded_seqs = []
    seq_lens = []
    max_sentence_len = max([len(sentence) for sentence in sentence_batch])
    for sentence in sentence_batch:
        padded_seqs.append(sentence + [pad_int] * (max_sentence_len - len(sentence)))
        seq_lens.append(len(sentence))
    return padded_seqs, seq_lens

In [14]:
train_X = str_idx(train_X, dictionary_from)
test_X = str_idx(test_X, dictionary_from)
train_Y = str_idx(train_Y, dictionary_to)
test_Y = str_idx(test_Y, dictionary_to)

In [15]:
import tqdm

for e in range(epoch):
    pbar = tqdm.tqdm(
        range(0, len(train_X), batch_size), desc = 'minibatch loop')
    train_loss, train_acc, test_loss, test_acc = [], [], [], []
    for i in pbar:
        index = min(i + batch_size, len(train_X))
        maxlen = max([len(s) for s in train_X[i : index] + train_Y[i : index]])
        batch_x, seq_x = pad_sentence_batch(train_X[i : index], PAD)
        batch_y, seq_y = pad_sentence_batch(train_Y[i : index], PAD)
        feed = {model.X: batch_x,
                model.Y: batch_y}
        accuracy, loss, _ = sess.run([model.accuracy,model.cost,model.optimizer],
                                    feed_dict = feed)
        train_loss.append(loss)
        train_acc.append(accuracy)
        pbar.set_postfix(cost = loss, accuracy = accuracy)
    
    
    pbar = tqdm.tqdm(
        range(0, len(test_X), batch_size), desc = 'minibatch loop')
    for i in pbar:
        index = min(i + batch_size, len(test_X))
        batch_x, seq_x = pad_sentence_batch(test_X[i : index], PAD)
        batch_y, seq_y = pad_sentence_batch(test_Y[i : index], PAD)
        feed = {model.X: batch_x,
                model.Y: batch_y,}
        accuracy, loss = sess.run([model.accuracy,model.cost],
                                    feed_dict = feed)

        test_loss.append(loss)
        test_acc.append(accuracy)
        pbar.set_postfix(cost = loss, accuracy = accuracy)
    
    print('epoch %d, training avg loss %f, training avg acc %f'%(e+1,
                                                                 np.mean(train_loss),np.mean(train_acc)))
    print('epoch %d, testing avg loss %f, testing avg acc %f'%(e+1,
                                                              np.mean(test_loss),np.mean(test_acc)))

minibatch loop: 100%|██████████| 1389/1389 [17:56<00:00,  1.29it/s, accuracy=0.144, cost=5.62]
minibatch loop: 100%|██████████| 30/30 [00:11<00:00,  2.72it/s, accuracy=0.178, cost=5.41]
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s]

epoch 1, training avg loss 5.722446, training avg acc 0.126634
epoch 1, testing avg loss 5.049315, testing avg acc 0.178725


minibatch loop: 100%|██████████| 1389/1389 [18:13<00:00,  1.27it/s, accuracy=0.207, cost=4.68]
minibatch loop: 100%|██████████| 30/30 [00:11<00:00,  2.67it/s, accuracy=0.201, cost=5]   
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s]

epoch 2, training avg loss 4.635658, training avg acc 0.213174
epoch 2, testing avg loss 4.573058, testing avg acc 0.219527


minibatch loop: 100%|██████████| 1389/1389 [18:00<00:00,  1.29it/s, accuracy=0.248, cost=4.07]
minibatch loop: 100%|██████████| 30/30 [00:10<00:00,  2.78it/s, accuracy=0.203, cost=4.96]
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s]

epoch 3, training avg loss 4.140654, training avg acc 0.250490
epoch 3, testing avg loss 4.613475, testing avg acc 0.211343


minibatch loop: 100%|██████████| 1389/1389 [18:17<00:00,  1.27it/s, accuracy=0.235, cost=3.94]
minibatch loop: 100%|██████████| 30/30 [00:11<00:00,  2.69it/s, accuracy=0.21, cost=4.74] 
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s]

epoch 4, training avg loss 3.856998, training avg acc 0.273884
epoch 4, testing avg loss 4.282487, testing avg acc 0.250329


minibatch loop: 100%|██████████| 1389/1389 [18:40<00:00,  1.24it/s, accuracy=0.29, cost=3.46] 
minibatch loop: 100%|██████████| 30/30 [00:11<00:00,  2.67it/s, accuracy=0.232, cost=4.66]
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s]

epoch 5, training avg loss 3.668442, training avg acc 0.291074
epoch 5, testing avg loss 4.284626, testing avg acc 0.248959


minibatch loop: 100%|██████████| 1389/1389 [18:38<00:00,  1.24it/s, accuracy=0.309, cost=3.19]
minibatch loop: 100%|██████████| 30/30 [00:11<00:00,  2.68it/s, accuracy=0.228, cost=4.72]
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s]

epoch 6, training avg loss 3.544738, training avg acc 0.301867
epoch 6, testing avg loss 4.312648, testing avg acc 0.243911


minibatch loop: 100%|██████████| 1389/1389 [18:36<00:00,  1.24it/s, accuracy=0.322, cost=3.13]
minibatch loop: 100%|██████████| 30/30 [00:11<00:00,  2.67it/s, accuracy=0.221, cost=4.72]
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s]

epoch 7, training avg loss 3.417497, training avg acc 0.314344
epoch 7, testing avg loss 4.300966, testing avg acc 0.249767


minibatch loop: 100%|██████████| 1389/1389 [18:38<00:00,  1.24it/s, accuracy=0.313, cost=3.08]
minibatch loop: 100%|██████████| 30/30 [00:11<00:00,  2.66it/s, accuracy=0.225, cost=4.78]
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s]

epoch 8, training avg loss 3.327761, training avg acc 0.322477
epoch 8, testing avg loss 4.370446, testing avg acc 0.243623


minibatch loop: 100%|██████████| 1389/1389 [18:39<00:00,  1.24it/s, accuracy=0.348, cost=3.07]
minibatch loop: 100%|██████████| 30/30 [00:11<00:00,  2.67it/s, accuracy=0.205, cost=4.92]
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s]

epoch 9, training avg loss 3.236349, training avg acc 0.332293
epoch 9, testing avg loss 4.380473, testing avg acc 0.244758


minibatch loop: 100%|██████████| 1389/1389 [18:38<00:00,  1.24it/s, accuracy=0.354, cost=3.13]
minibatch loop: 100%|██████████| 30/30 [00:11<00:00,  2.67it/s, accuracy=0.208, cost=4.91]
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s]

epoch 10, training avg loss 3.151763, training avg acc 0.341397
epoch 10, testing avg loss 4.473010, testing avg acc 0.233849


minibatch loop: 100%|██████████| 1389/1389 [18:37<00:00,  1.24it/s, accuracy=0.418, cost=2.72]
minibatch loop: 100%|██████████| 30/30 [00:11<00:00,  2.67it/s, accuracy=0.196, cost=4.98]
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s]

epoch 11, training avg loss 3.086361, training avg acc 0.348576
epoch 11, testing avg loss 4.613791, testing avg acc 0.224273


minibatch loop: 100%|██████████| 1389/1389 [18:37<00:00,  1.24it/s, accuracy=0.4, cost=2.62]  
minibatch loop: 100%|██████████| 30/30 [00:11<00:00,  2.67it/s, accuracy=0.217, cost=4.91]
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s]

epoch 12, training avg loss 3.030483, training avg acc 0.354695
epoch 12, testing avg loss 4.503867, testing avg acc 0.238146


minibatch loop: 100%|██████████| 1389/1389 [18:38<00:00,  1.24it/s, accuracy=0.375, cost=2.75]
minibatch loop: 100%|██████████| 30/30 [00:11<00:00,  2.67it/s, accuracy=0.204, cost=4.98]
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s]

epoch 13, training avg loss 2.993183, training avg acc 0.358729
epoch 13, testing avg loss 4.438147, testing avg acc 0.245386


minibatch loop: 100%|██████████| 1389/1389 [18:38<00:00,  1.24it/s, accuracy=0.415, cost=2.61]
minibatch loop: 100%|██████████| 30/30 [00:11<00:00,  2.67it/s, accuracy=0.186, cost=5.08]
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s]

epoch 14, training avg loss 2.945512, training avg acc 0.364219
epoch 14, testing avg loss 4.549821, testing avg acc 0.236241


minibatch loop: 100%|██████████| 1389/1389 [18:38<00:00,  1.24it/s, accuracy=0.433, cost=2.45]
minibatch loop: 100%|██████████| 30/30 [00:11<00:00,  2.65it/s, accuracy=0.216, cost=5.11]
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s]

epoch 15, training avg loss 2.915582, training avg acc 0.367316
epoch 15, testing avg loss 4.646375, testing avg acc 0.228479


minibatch loop: 100%|██████████| 1389/1389 [18:37<00:00,  1.24it/s, accuracy=0.382, cost=2.58]
minibatch loop: 100%|██████████| 30/30 [00:11<00:00,  2.68it/s, accuracy=0.226, cost=5.05]
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s]

epoch 16, training avg loss 2.885841, training avg acc 0.370711
epoch 16, testing avg loss 4.552987, testing avg acc 0.237716


minibatch loop: 100%|██████████| 1389/1389 [18:38<00:00,  1.24it/s, accuracy=0.422, cost=2.56]
minibatch loop: 100%|██████████| 30/30 [00:11<00:00,  2.68it/s, accuracy=0.207, cost=5.13]
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s]

epoch 17, training avg loss 2.847207, training avg acc 0.375959
epoch 17, testing avg loss 4.551970, testing avg acc 0.240700


minibatch loop: 100%|██████████| 1389/1389 [18:38<00:00,  1.24it/s, accuracy=0.462, cost=2.39]
minibatch loop: 100%|██████████| 30/30 [00:11<00:00,  2.67it/s, accuracy=0.211, cost=5.03]
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s]

epoch 18, training avg loss 2.816338, training avg acc 0.379417
epoch 18, testing avg loss 4.553676, testing avg acc 0.239334


minibatch loop: 100%|██████████| 1389/1389 [18:39<00:00,  1.24it/s, accuracy=0.418, cost=2.4] 
minibatch loop: 100%|██████████| 30/30 [00:11<00:00,  2.68it/s, accuracy=0.198, cost=5.19]
minibatch loop:   0%|          | 0/1389 [00:00<?, ?it/s]

epoch 19, training avg loss 2.803320, training avg acc 0.380768
epoch 19, testing avg loss 4.635271, testing avg acc 0.234281


minibatch loop: 100%|██████████| 1389/1389 [18:05<00:00,  1.28it/s, accuracy=0.422, cost=2.43]
minibatch loop: 100%|██████████| 30/30 [00:11<00:00,  2.73it/s, accuracy=0.189, cost=5.24]

epoch 20, training avg loss 2.797876, training avg acc 0.381341
epoch 20, testing avg loss 4.757613, testing avg acc 0.222577





In [16]:
rev_dictionary_to = {int(k): v for k, v in rev_dictionary_to.items()}

In [17]:
test_size = 20

batch_x, seq_x = pad_sentence_batch(test_X[: test_size], PAD)
batch_y, seq_y = pad_sentence_batch(test_Y[: test_size], PAD)
feed = {model.X: batch_x}
logits = sess.run(model.predicting_ids, feed_dict = feed)
logits.shape

(20, 99)

In [18]:
rejected = ['PAD', 'EOS', 'UNK', 'GO']

for i in range(test_size):
    predict = [rev_dictionary_to[i] for i in logits[i] if rev_dictionary_to[i] not in rejected]
    actual = [rev_dictionary_to[i] for i in batch_y[i] if rev_dictionary_to[i] not in rejected]
    print(i, 'predict:', ' '.join(predict))
    print(i, 'actual:', ' '.join(actual))
    print()

0 predict: Làm thế nào để nói trong <NUM> phút về những người phụ nữ , làm thế nào về sức mạnh mạnh mẽ của những người nắm giữ trong cuộc sống của một bé gái bé gái , mẹ và và và mẹ và làm thế ở Trung ở Trung Quốc hơn <NUM> năm <NUM> năm , để giữ giữ cuộc sống của cô bé và và nói chuyện với đứa bé gái cô bé và nói chuyện với đứa bé gái và và nói chuyện với các bé gái và và nói chuyện với con gái và nói chuyện
0 actual: Làm sao tôi có thể trình bày trong <NUM> phút về sợi dây liên kết những người phụ nữ qua ba thế hệ , về việc làm thế nào những sợi dây mạnh mẽ đáng kinh ngạc ấy đã níu chặt lấy cuộc sống của một cô bé bốn tuổi co quắp với đứa em gái nhỏ của cô bé , với mẹ và bà trong suốt năm ngày đêm trên con thuyền nhỏ lênh đênh trên Biển Đông hơn <NUM> năm trước , những sợi dây liên kết đã níu lấy cuộc đời cô bé ấy và không bao giờ rời đi - - cô bé ấy giờ sống ở San Francisco và đang nói chuyện với các bạn hôm nay ?

1 predict: Câu chuyện không phải hoàn thành chuyện hoàn . . . . . . 