## Seq2Seq : Predicting the same sequence for a given sequence

In [1]:
#dependencies
import numpy as np #matrix math
import tensorflow as tf #ML
import helpers #formatting data, and generating random sequence data

In [2]:
tf.reset_default_graph()
sess = tf.InteractiveSession()

In [3]:
tf.__version__

'1.2.1'

In [4]:
PAD = 0
EOS = 1

vocab_size = 10
input_embedding_size = 20 #character length

encoder_hidden_units = 20
decoder_hidden_units = encoder_hidden_units * 2

In [5]:
#3 placeholders

encoder_inputs = tf.placeholder(shape=(None, None), dtype=tf.int32, name="encoder_inputs")
encoder_inputs_length = tf.placeholder(shape=(None,), dtype=tf.int32, name="encode_inputs_length")
decoder_targets = tf.placeholder(shape=(None, None), dtype=tf.int32, name="decoder_targets")

In [6]:
#embeddings
embeddings = tf.Variable(tf.random_uniform([vocab_size, input_embedding_size], -1.0,1.0), dtype=tf.float32)

encoder_inputs_embedded = tf.nn.embedding_lookup(embeddings, encoder_inputs)

In [7]:
#define the encoder
from tensorflow.python.ops.rnn_cell import LSTMCell, LSTMStateTuple

In [8]:
encoder_cell = LSTMCell(encoder_hidden_units)

In [9]:
((encoder_fw_outputs, 
  encoder_bw_outputs), 
 (encoder_fw_final_state, 
  encoder_bw_final_state)) = (
        tf.nn.bidirectional_dynamic_rnn(cell_fw=encoder_cell,
                                       cell_bw=encoder_cell,
                                       inputs=encoder_inputs_embedded,
                                       sequence_length=encoder_inputs_length,
                                       dtype=tf.float32, time_major=True)
                                )

In [10]:
encoder_outputs = tf.concat((encoder_fw_outputs, encoder_bw_outputs), 2)

#bidirectional step
encoder_final_state_c = tf.concat((encoder_fw_final_state.c, encoder_bw_final_state.c), 1)
encoder_final_state_h = tf.concat((encoder_fw_final_state.h, encoder_bw_final_state.h), 1)

#TF Tuple used by LSTM Cells for state_size, zero_state, and output state
encoder_final_state = LSTMStateTuple(
        c = encoder_final_state_c,
        h = encoder_final_state_h
    )


In [11]:
#decoder
decoder_cell = LSTMCell(decoder_hidden_units)
encoder_max_time, batch_size = tf.unstack(tf.shape(encoder_inputs))
decoder_lengths = encoder_inputs_length + 3



In [12]:
#output projections
#define our weights and biases

W = tf.Variable(tf.random_uniform([decoder_hidden_units], vocab_size, -1, 1), dtype=tf.float32)
b = tf.Variable(tf.zeros([vocab_size]), dtype=tf.float32)

In [13]:
#create padded inputs for the decoder from the word embeddings

#were telling the program to test a conditon, and trigger an error if the condition is false
assert EOS==1 and PAD==0

eos_time_slice = tf.ones([batch_size], dtype=tf.int32, name="EOS")
pad_time_slice = tf.zeros([batch_size], dtype=tf.int32, name="PAD")

eos_step_embedded = tf.nn.embedding_lookup(embeddings, eos_time_slice)
pad_step_embedded = tf.nn.embedding_lookup(embeddings, pad_time_slice)

In [14]:
#manually specifying loop function through time - to get initial cell state and input to RNN
#normally we'd just use dynamic_rnn, but lets get detailed here with raw_rnn

#we define and return these values, no operations occur here
def loop_fn_initial():
    initial_elements_finished = (0 >= decoder_lengths)  # all False at the initial step
    #end of sentence
    initial_input = eos_step_embedded
    #last time steps cell state
    initial_cell_state = encoder_final_state
    #none
    initial_cell_output = None
    #none
    initial_loop_state = None  # we don't need to pass any additional information
    return (initial_elements_finished,
            initial_input,
            initial_cell_state,
            initial_cell_output,
            initial_loop_state)

In [None]:
#attention mechanism --choose which previously generated token to pass as input in the next timestep
def loop_fn_transition(time, previous_output, previous_state, previous_loop_state):

    
    def get_next_input():
        #dot product between previous ouput and weights, then + biases
        output_logits = tf.add(tf.matmul(previous_output, W), b)
        #Logits simply means that the function operates on the unscaled output of 
        #earlier layers and that the relative scale to understand the units is linear. 
        #It means, in particular, the sum of the inputs may not equal 1, that the values are not probabilities 
        #(you might have an input of 5).
        #prediction value at current time step
        
        #Returns the index with the largest value across axes of a tensor.
        prediction = tf.argmax(output_logits, axis=1)
        #embed prediction for the next input
        next_input = tf.nn.embedding_lookup(embeddings, prediction)
        return next_input
    
    
    elements_finished = (time >= decoder_lengths) # this operation produces boolean tensor of [batch_size]
                                                  # defining if corresponding sequence has ended

    
    
    #Computes the "logical and" of elements across dimensions of a tensor.
    finished = tf.reduce_all(elements_finished) # -> boolean scalar
    #Return either fn1() or fn2() based on the boolean predicate pred.
    input = tf.cond(finished, lambda: pad_step_embedded, get_next_input)
    
    #set previous to current
    state = previous_state
    output = previous_output
    loop_state = None

    return (elements_finished, 
            input,
            state,
            output,
            loop_state)