%TODO
* add vocab_size feature to data_reader & vocab
* properly handle first and last messages in dialogs (they have only one target)
* next_seq decoder
* prev_seq decoder

In [1]:
import data_reader
import utils
import tensorflow as tf
import math
import numpy as np

In [2]:
dr = data_reader.SequenceDataReader('data/chats.txt')

origin_X = dr.get_data()
prev_X = [[]]+dr.get_data()[:-1]
next_X = dr.get_data()[1:]+[[]]

it = utils.seq2seq_data_iterator(dr.get_data(), dr.get_data())

In [3]:
FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_integer('num_hidden', 256, 'Hidden size of GRU cell.')
tf.app.flags.DEFINE_integer('embedding_size', 300, 'The size of word embeddings.')
tf.app.flags.DEFINE_integer('vocab_size', 20000, 'Size of vocabulary. Most frequent words are used.')

## Define placeholders and function for filling feed dictionary.

Because all `*_decoder` functions in `tf.nn.seq2seq` take lists as inputs, we can't do the same thing for supporting variable-length sequences as we do for encoder (`input_placeholder`).

To manage this problem, we could use bucketing (`tf.nn.seq2seq.model_with_buckets`) or just calculate maximum length of line in dataset and create such number of placeholders for decoder. 

`SequenceDataReader` has `max_length` property for that.

In [4]:
# Encoder input
input_placeholder = tf.placeholder(tf.int32, shape=[None,None], name='enc_inputs')
sequence_length_placeholder = tf.placeholder(tf.int32, shape=[None,], name='seq_lengths')


# Correct next output sequence for next_seq decoder.
next_seq_output_placeholder = [tf.placeholder(tf.int32, shape=[None,],
                                              name='dec_next_seq_output{0}'.format(i)) for i in range(dr.max_len)]
# Input for next_seq decoder.
next_seq_input_placeholder = next_seq_output_placeholder[1:]


# Correct previous output sequence for prev_seq decoder.
prev_seq_output_placeholder = [tf.placeholder(tf.int32, shape=[None,],
                                              name='dec_prev_seq_output{0}'.format(i)) for i in range(dr.max_len)]
# Input for prev_seq decoder.
prev_seq_input_placeholder = prev_seq_output_placeholder[1:]



def fill_feed_dict(input):
    PAD_TOKEN = dr.vocab.encode_word(dr.vocab.PAD_TOKEN)
    feed_dict = {
        input_placeholder: input,
        sequence_length_placeholder: utils.padded_sequence_lengths(input, pad_value=PAD_TOKEN)
    }
    return feed_dict

## Define matrix for word embeddings.
* Tensor `embedded` is used as `inputs` in encoder network.

In [5]:
with tf.variable_scope('embeddings'):
    # Default initializer for embeddings should have variance=1.
    sqrt3 = math.sqrt(3)  # Uniform(-sqrt(3), sqrt(3)) has variance=1.
    initializer = tf.random_uniform_initializer(-sqrt3, sqrt3)
    embedding_matrix = tf.get_variable("embedding_matrix", shape=[FLAGS.vocab_size, FLAGS.embedding_size], initializer=initializer)

embedded = tf.nn.embedding_lookup(embedding_matrix, input_placeholder)

## Define encoder rnn.
* Tensor `encoder_state` will be used as initial state in decoder network.

In [6]:
with tf.variable_scope('encoder'):
    cell = tf.nn.rnn_cell.GRUCell(FLAGS.num_hidden)
    _, encoder_state = tf.nn.dynamic_rnn(cell,
                                               dtype=tf.float32,
                                               inputs=embedded,
                                               sequence_length=sequence_length_placeholder)

## Define two decoders. One for decoding previous sentence, and second for next sequence.

* Instead of using `OutputProjectionWrapper` we could initialize output projection explicitly and pass it to `tf.nn.seq2seq._extract_argmax_and_embed`.


* Also, we can't use `EmbeddingWrapper` because we want to share embeddings between encoder and decoders.


* **NOTE** that we actually create two output variables for each decoder. One for training (without feeding previous output from decoder to the next input) and the second one for predicting. So, `next/prev_seq_outputs_predict` will not be used while training the model. They are only needed for predicting sequences when model is trained.

### Define next sequence decoder.

In [7]:
def decoder_loop_fn(prev, _):
    # * prev is a 2D Tensor of shape [batch_size x output_size],
    # * RETURN next is a 2D Tensor of shape [batch_size x input_size].
    return tf.nn.seq2seq._extract_argmax_and_embed(embedding_matrix, )

loop_function_train = tf.nn.seq2seq._extract_argmax_and_embed(embedding_matrix, update_embedding=True)
loop_function_predict = tf.nn.seq2seq._extract_argmax_and_embed(embedding_matrix, update_embedding=False)

with tf.variable_scope("next_seq_decoder"):
    embedded_next_seq_inputs = [tf.nn.embedding_lookup(embedding_matrix, inp) for inp in next_seq_input_placeholder]
    
    cell = tf.nn.rnn_cell.GRUCell(FLAGS.num_hidden)
    cell = tf.nn.rnn_cell.OutputProjectionWrapper(cell, FLAGS.vocab_size)
    
    next_seq_outputs_train, _ = tf.nn.seq2seq.rnn_decoder(embedded_next_seq_inputs, initial_state=encoder_state,
                                                          cell=cell, loop_function=loop_function_train)
with tf.variable_scope("next_seq_decoder", reuse=True):
    next_seq_outputs_predict, _ = tf.nn.seq2seq.rnn_decoder(embedded_next_seq_inputs, initial_state=encoder_state,
                                                           cell=cell, loop_function=loop_function_predict)

### The same thing with prev sequence decoder.

In [9]:
with tf.variable_scope("prev_seq_decoder"):
    embedded_prev_seq_inputs = [tf.nn.embedding_lookup(embedding_matrix, inp) for inp in prev_seq_input_placeholder]
    
    cell = tf.nn.rnn_cell.GRUCell(FLAGS.num_hidden)
    cell = tf.nn.rnn_cell.OutputProjectionWrapper(cell, FLAGS.vocab_size)
    
    prev_seq_outputs_train, _ = tf.nn.seq2seq.rnn_decoder(embedded_prev_seq_inputs, initial_state=encoder_state,
                                                          cell=cell, loop_function=loop_function_train)
with tf.variable_scope("prev_seq_decoder", reuse=True):
    prev_seq_outputs_predict, _ = tf.nn.seq2seq.rnn_decoder(embedded_prev_seq_inputs, initial_state=encoder_state,
                                                           cell=cell, loop_function=loop_function_predict)

In [17]:
with tf.Session() as sess:
    init = tf.initialize_all_variables()
    sess.run(init)
    outputs, state = sess.run([encoder_outputs, encoder_state], feed_dict=fill_feed_dict(np.array([[1,2,3,0,0,0]])))    

In [22]:
import tensorflow as tf

data = [[1], [1,2,3], [5,6,7,8,9,10]]
data_pl = [tf.placeholder(tf.int32, shape=[None,], name='batch'+str(i)) for i in range(len(data))]



batched = tf.train.batch(data_pl, batch_size=1, dynamic_pad=True)

AttributeError: 'Tensor' object has no attribute 'dequeue'

In [11]:
init = tf.initialize_all_variables()
with tf.Session() as sess:
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)
    sess.run(init)
#     RES = sess.run(batched, feed_dict={data_pl[i]: data[i] for i in range(len(data))})
    PRODUCED = sess.run(produced, feed_dict=None)
#     RES = sess.run(batched, feed_dict=None)
    coord.request_stop()
    coord.join(threads)

In [12]:
PRODUCED

[array([1], dtype=int32),
 array([1, 2, 3], dtype=int32),
 array([ 5,  6,  7,  8,  9, 10], dtype=int32)]

In [7]:
RES

[array([1, 1, 1], dtype=int32),
 array([1, 1, 1], dtype=int32),
 array([5, 5, 5], dtype=int32)]

In [25]:
res[0]['y']

[array([[1],
        [1],
        [1]], dtype=int32), array([[1, 2, 3],
        [1, 2, 3],
        [1, 2, 3]], dtype=int32), array([[ 5,  6,  7,  8,  9, 10],
        [ 5,  6,  7,  8,  9, 10],
        [ 5,  6,  7,  8,  9, 10]], dtype=int32)]

In [None]:
tf.train.batch()