In [1]:
%matplotlib inline

import matplotlib.pyplot as plt
import numpy as np
from os.path import exists
import tensorflow as tf
from tensorflow.python.layers import core as layers_core

tf.reset_default_graph()
sess = tf.InteractiveSession()

  return f(*args, **kwds)


In [2]:
tf.__version__

'1.4.0'

In [3]:
# Encoder embedding

# Embedding encoder/matrix
src_vocab_size = 1000 # hparam
src_embedding_size = 300 # hparam

embedding_encoder = tf.get_variable(
    "embedding_encoder",
    [src_vocab_size, src_embedding_size]) # just the embedding matrix # TODO: check tutorial for more

# Look up embedding:
encoder_inputs = tf.placeholder(shape=(None, None), dtype=tf.int32, name='encoder_inputs') # [max_encoder_time, batch_size]
print(encoder_inputs)
batch_size = tf.shape(encoder_inputs)[1]
print(batch_size)
source_sequence_length = tf.placeholder(shape=(None,), dtype=tf.int32, name='source_sequence_length') # batch_size
print(source_sequence_length)

encoder_emb_inp = tf.nn.embedding_lookup(
    embedding_encoder, encoder_inputs) # [max_encoder_time, batch_size, src_embedding_size]
encoder_emb_inp

Tensor("encoder_inputs:0", shape=(?, ?), dtype=int32)
Tensor("strided_slice:0", shape=(), dtype=int32)
Tensor("source_sequence_length:0", shape=(?,), dtype=int32)


<tf.Tensor 'embedding_lookup:0' shape=(?, ?, 300) dtype=float32>

In [4]:
# Encoder

encoder_hidden_units = 32 # hparam
encoder_cell = tf.nn.rnn_cell.BasicLSTMCell(encoder_hidden_units) # hparam

encoder_outputs, encoder_final_state = tf.nn.dynamic_rnn(
    encoder_cell, encoder_emb_inp,
    sequence_length=source_sequence_length, dtype=tf.float32, time_major=True)

encoder_final_state

LSTMStateTuple(c=<tf.Tensor 'rnn/while/Exit_2:0' shape=(?, 32) dtype=float32>, h=<tf.Tensor 'rnn/while/Exit_3:0' shape=(?, 32) dtype=float32>)

In [5]:
# Decoder embedding

# Embedding decoder/matrix
tgt_vocab_size = 1000 # hparam
tgt_embedding_size = 300 # hparam

embedding_decoder = tf.get_variable(
    "embedding_decoder",
    [tgt_vocab_size, tgt_embedding_size]) # just the embedding matrix # TODO: check tutorial for more

# Look up embedding:
decoder_inputs = tf.placeholder(shape=(None, None), dtype=tf.int32, name='decoder_inputs') # [max_decoder_time, batch_size]
print(decoder_inputs)
target_sequence_length = tf.placeholder(shape=(None,), dtype=tf.int32, name='target_sequence_length') # batch_size
print(target_sequence_length)

decoder_emb_inp = tf.nn.embedding_lookup(
    embedding_decoder, decoder_inputs) # [max_decoder_time, batch_size, src_embedding_size]
decoder_emb_inp

Tensor("decoder_inputs:0", shape=(?, ?), dtype=int32)
Tensor("target_sequence_length:0", shape=(?,), dtype=int32)


<tf.Tensor 'embedding_lookup_1:0' shape=(?, ?, 300) dtype=float32>

In [6]:
# Decoder

decoder_hidden_units = encoder_hidden_units # hparam
decoder_cell = tf.nn.rnn_cell.BasicLSTMCell(decoder_hidden_units) # hparam

training_helper = tf.contrib.seq2seq.TrainingHelper(
    decoder_emb_inp, target_sequence_length, time_major=True) # helper # TODO: check tutorial for more

projection_layer = layers_core.Dense(
    tgt_vocab_size, use_bias=False)

decoder = tf.contrib.seq2seq.BasicDecoder(
    decoder_cell, training_helper, encoder_final_state,
    output_layer=projection_layer) # decoder

final_outputs, final_decoder_state, final_sequence_lengths = tf.contrib.seq2seq.dynamic_decode(
    decoder, output_time_major=True, impute_finished=True) # dynamic decoding # TODO: check tutorial for more 

print(final_outputs)
print(final_decoder_state)
print(final_sequence_lengths)

decoder_logits_train = final_outputs.rnn_output

decoder_logits_train

BasicDecoderOutput(rnn_output=<tf.Tensor 'decoder/TensorArrayStack/TensorArrayGatherV3:0' shape=(?, ?, 1000) dtype=float32>, sample_id=<tf.Tensor 'decoder/TensorArrayStack_1/TensorArrayGatherV3:0' shape=(?, ?) dtype=int32>)
LSTMStateTuple(c=<tf.Tensor 'decoder/while/Exit_3:0' shape=(?, 32) dtype=float32>, h=<tf.Tensor 'decoder/while/Exit_4:0' shape=(?, 32) dtype=float32>)
Tensor("decoder/while/Exit_7:0", shape=(?,), dtype=int32)


<tf.Tensor 'decoder/TensorArrayStack/TensorArrayGatherV3:0' shape=(?, ?, 1000) dtype=float32>

In [7]:
# Loss

decoder_targets = tf.placeholder(shape=(None, None), dtype=tf.int32, name='decoder_targets') # [max_decoder_time, batch_size]

target_weights = tf.placeholder(shape=(None, None), dtype=tf.float32, name='target_weights') # [max_decoder_time, batch_size]

crossent = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=decoder_targets, logits=decoder_logits_train)
train_loss = (tf.reduce_sum(crossent * target_weights) / tf.cast(batch_size, dtype=tf.float32))

In [8]:
# Calculate and clip gradients

params = tf.trainable_variables()
print(params)

gradients = tf.gradients(train_loss, params)

max_gradient_norm = 5 # max_gradient_norm, is often set to a value like 5 or 1 # hparam
clipped_gradients, _ = tf.clip_by_global_norm(
    gradients, max_gradient_norm)

[<tf.Variable 'embedding_encoder:0' shape=(1000, 300) dtype=float32_ref>, <tf.Variable 'rnn/basic_lstm_cell/kernel:0' shape=(332, 128) dtype=float32_ref>, <tf.Variable 'rnn/basic_lstm_cell/bias:0' shape=(128,) dtype=float32_ref>, <tf.Variable 'embedding_decoder:0' shape=(1000, 300) dtype=float32_ref>, <tf.Variable 'decoder/basic_lstm_cell/kernel:0' shape=(332, 128) dtype=float32_ref>, <tf.Variable 'decoder/basic_lstm_cell/bias:0' shape=(128,) dtype=float32_ref>, <tf.Variable 'decoder/dense/kernel:0' shape=(32, 1000) dtype=float32_ref>]


In [9]:
# Optimization

learning_rate = 0.001 # hparam # learning_rate is usually in the range 0.0001 to 0.001
optimizer = tf.train.AdamOptimizer(learning_rate)
update_step = optimizer.apply_gradients(
    zip(clipped_gradients, params))

In [10]:
# Inference

tgt_sos_id = -1 # TODO: check tutorial for more 
tgt_eos_id = -2 # TODO: check tutorial for more 
inf_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
    embedding_decoder,
    tf.fill([batch_size], tgt_sos_id), tgt_eos_id) # helper

inference_decoder = tf.contrib.seq2seq.BasicDecoder(
    decoder_cell, inf_helper, encoder_final_state,
    output_layer=projection_layer) # decoder

maximum_iterations = tf.round(tf.reduce_max(source_sequence_length) * 2) # one heuristic is to decode up to two times the source sentence lengths
final_outputs_test, final_decoder_state_test, final_sequence_lengths_test = tf.contrib.seq2seq.dynamic_decode(
    inference_decoder, output_time_major=True, impute_finished=True,
    maximum_iterations=maximum_iterations) # dynamic decoding # TODO: check tutorial for more 

translations_test = final_outputs_test.sample_id
translations_test

<tf.Tensor 'decoder_1/TensorArrayStack_1/TensorArrayGatherV3:0' shape=(?, ?) dtype=int32>