Basic seq2seq model with Tensorflow
======================

In [2]:
__author__ = 'Nicholas Tomlin'
__version__ = "CSLI Summer 2018 internship"

### Imports 
Tested with Tensorflow 1.8.0. Using the Dense layer for seq2seq inference decoder, which will be described below. We'll need to add the `src/models/` file to our Python path to import the base RNN model.

In [4]:
import numpy as np
import tensorflow as tf
import warnings
import random
from tensorflow.python.layers.core import Dense

In [7]:
import sys
sys.path.append('../src/models/')
from tf_rnn_classifier import TfRNNClassifier

### Basic seq2seq class definition
We build a single graph which includes embeddings, encoding, and two separate decoding functions. One decoding function is used during training, and the other is used for inference (prediction). 

In [9]:
class TfEncoderDecoder(TfRNNClassifier):
    def __init__(self, max_input_length=5, max_output_length=5, num_layers=2, **kwargs):
        self.max_input_length = max_input_length
        self.max_output_length = max_output_length
        self.num_layers = num_layers

        super(TfEncoderDecoder, self).__init__(**kwargs)

    def build_graph(self):
        self._define_embedding()
        self._init_placeholders()
        self._init_embedding()
        self.encoding_layer()
        self.decoding_layer()


    def _init_placeholders(self):
        self.encoder_inputs = tf.placeholder(
            shape=[None, None],
            dtype=tf.int32,
            name="encoder_inputs")

        self.encoder_lengths = tf.placeholder(
            shape=[None],
            dtype=tf.int32,
            name="encoder_lengths")

        self.decoder_inputs= tf.placeholder(
            shape=[None, None],
            dtype=tf.int32,
            name="decoder_inputs")

        self.decoder_targets = tf.placeholder(
            shape=[None, None],
            dtype=tf.int32,
            name="decoder_targets")

        self.decoder_lengths = tf.placeholder(
            shape=[None],
            dtype=tf.int32,
            name="decoder_lengths")

    def _init_embedding(self):
        self.embedded_encoder_inputs = tf.nn.embedding_lookup(self.embedding, self.encoder_inputs)
        self.embedded_decoder_inputs = tf.nn.embedding_lookup(self.embedding, self.decoder_inputs)

    def encoding_layer(self):
        encoder_cell = tf.nn.rnn_cell.LSTMCell(self.hidden_dim, activation=self.hidden_activation)
        encoder_outputs, encoder_final_state = tf.nn.dynamic_rnn(
            cell=encoder_cell,
            inputs=self.embedded_encoder_inputs,
            time_major=True,
            dtype=tf.float32,
            scope="encoding_layer")
        self.encoder_final_state = encoder_final_state

    def decoding_layer(self):
        self.decoding_training()
        self.decoding_inference()

    def decoding_training(self):
        self.decoder_cell = tf.nn.rnn_cell.LSTMCell(self.hidden_dim, activation=self.hidden_activation)

        decoder_outputs, decoder_final_state = tf.nn.dynamic_rnn(
        self.decoder_cell,
            self.embedded_decoder_inputs,
            initial_state=self.encoder_final_state,
            time_major=True,
            dtype=tf.float32,
            scope="decoding_layer")
        decoder_logits = tf.contrib.layers.linear(decoder_outputs, self.vocab_size)
        
        self.training_outputs = decoder_outputs
        self.training_logits = decoder_logits


    def decoding_inference(self):
        output_layer = Dense(
            self.vocab_size,
            kernel_initializer = tf.truncated_normal_initializer(mean = 0.0, stddev=0.1))

        start_tokens = tf.tile(
            input=tf.constant([2], dtype=tf.int32), # TODO: don't hardcode start token like this (2)
            multiples=[self.batch_size],
            name='start_tokens')

        helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
            embedding=self.embedding,
            start_tokens=start_tokens,
            end_token=3) # TODO: don't hardcode end token like this (3)

        inference_decoder = tf.contrib.seq2seq.BasicDecoder(
            self.decoder_cell,
            helper,
            self.encoder_final_state,
            output_layer)

        inference_decoder_output = tf.contrib.seq2seq.dynamic_decode(
            inference_decoder,
            impute_finished=True,
            maximum_iterations=self.max_output_length)[0]

        self.inference_decoder_output = inference_decoder_output 
        self.inference_logits = inference_decoder_output.sample_id


    def prepare_output_data(self, y):
        return y


	def get_cost_function(self, **kwargs):
		"""Uses `softmax_cross_entropy_with_logits` so the
		input should *not* have a softmax activation
		applied to it.
		"""
		return tf.reduce_mean(
			tf.nn.softmax_cross_entropy_with_logits_v2(
				logits=self.training_logits,
				labels=tf.one_hot(self.decoder_targets, depth=self.vocab_size, dtype=tf.float32)))


	def predict(self, X):
		X, x_lengths = self._convert_X(X)
		sliced_logits = tf.slice(X)

		answer_logits = self.sess.run(self.inference_logits, {self.encoder_inputs: [X]*self.batch_size, 
                                      self.decoder_lengths: [len(X)]*self.batch_size, 
                                      self.encoder_lengths: [len(X)]*self.batch_size})[0] 

		return predictions


	def train_dict(self, X, y):
		decoder_inputs = [["<GO>"] + list(seq) for seq in y]
		decoder_targets = [list(seq) + ["<EOS>"] for seq in y]

		encoder_inputs, encoder_lengths = self._convert_X(X)
		decoder_inputs, decoder_lengths = self._convert_X(decoder_inputs)
		decoder_targets, _ = self._convert_X(decoder_targets)
		return {self.encoder_inputs: encoder_inputs,
				self.decoder_inputs: decoder_inputs,
				self.decoder_targets: decoder_targets,
				self.encoder_lengths: encoder_lengths,
				self.decoder_lengths: decoder_lengths}

TabError: inconsistent use of tabs and spaces in indentation (<ipython-input-9-d295f8ca58d7>, line 59)