In [1]:
import tensorflow as tf
from tensorflow.python.ops import lookup_ops
from tensorflow.python.layers import core as layers_core

In [2]:
tf.reset_default_graph()

with open('/tmp/toy_data.txt', 'w') as data_file:
    for _ in range(100):
        data_file.write("abc\tdef\n")
        data_file.write("aabc\tdef\n")
        data_file.write("def\taf\n")
        data_file.write("def\taf\n")
    
table = lookup_ops.index_table_from_tensor(
    tf.constant(['PAD'] + list("abcdef"))
)

dataset = tf.contrib.data.TextLineDataset('/tmp/toy_data.txt')
dataset = dataset.map(lambda string: tf.string_split([string], delimiter='\t').values)
source = dataset.map(lambda string: string[0])
target = dataset.map(lambda string: string[1])

source = source.map(lambda string: tf.string_split([string], delimiter='').values)
source = source.map(lambda words: table.lookup(words))
target = target.map(lambda string: tf.string_split([string], delimiter='').values)
target = target.map(lambda words: table.lookup(words))

src_tgt_dataset = tf.contrib.data.Dataset.zip((source, target))
src_tgt_dataset = src_tgt_dataset.map(
    lambda src, tgt: (src,
                      tf.concat(([0], tgt), 0),
                      tf.concat((tgt, [0]), 0),)
)
src_tgt_dataset = src_tgt_dataset.map(
    lambda src, tgt_in, tgt_out: (src, tgt_in, tgt_out, tf.size(src), tf.size(tgt_in))
)

# if I set the third padding shape to tf.TensorShape([5]),
# it fails if there is no 4 character long sample in the batch
# WHY???
batched = src_tgt_dataset.padded_batch(1, padded_shapes=(
    tf.TensorShape([6]), tf.TensorShape([5]), tf.TensorShape([None]),
         tf.TensorShape([]), tf.TensorShape([])))
batched_iter = batched.make_initializable_iterator()
src_ids, tgt_in_ids, tgt_out_ids, src_size, tgt_size = batched_iter.get_next()

table_initializer = tf.tables_initializer()

embedding = tf.get_variable("embedding", [7, 3], dtype=tf.float32)

encoder_emb_inp = tf.nn.embedding_lookup(embedding, src_ids)
    
encoder_cell = tf.contrib.rnn.BasicLSTMCell(16)

encoder_outputs, encoder_state = tf.nn.dynamic_rnn(encoder_cell, encoder_emb_inp, dtype=tf.float32,
                                                   sequence_length=src_size)

decoder_initial_state = encoder_state
decoder_cell = tf.contrib.rnn.BasicLSTMCell(16)
decoder_emb_inp = tf.nn.embedding_lookup(embedding, tgt_in_ids)
helper = tf.contrib.seq2seq.TrainingHelper(decoder_emb_inp, tgt_size)
decoder = tf.contrib.seq2seq.BasicDecoder(decoder_cell, helper, decoder_initial_state)
outputs, final_context_state, _ = tf.contrib.seq2seq.dynamic_decode(decoder)
sample_id = outputs.sample_id
output_proj = layers_core.Dense(7, name="output_projection")
logits = output_proj(outputs.rnn_output)

crossent = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=tgt_out_ids, logits=logits)
target_weights = tf.sequence_mask(tgt_size, tf.shape(tgt_out_ids)[1], tf.float32)
loss = tf.reduce_sum(crossent * target_weights) / tf.to_float(5)

optimizer =tf.train.AdamOptimizer(0.1)
params = tf.trainable_variables()
gradients = tf.gradients(loss, params)
update = optimizer.apply_gradients(zip(gradients, params))

sess = tf.InteractiveSession()
sess.run(table_initializer)
sess.run(batched_iter.initializer)
sess.run(tf.global_variables_initializer())

for i in range(10):
    sess.run(update)
    l = sess.run(loss)
    print(i, l)

0 1.1095
1 0.881862
2 0.826861
3 0.717166
4 0.629941
5 0.405157
6 0.468002
7 0.158661
8 0.265651
9 0.082523


In [3]:
print(sess.run([src_ids, tgt_in_ids]))

[array([[1, 2, 3, 0, 0, 0]]), array([[0, 4, 5, 6, 0]])]


In [4]:
tgt_out_ids.shape

TensorShape([Dimension(None), Dimension(None)])

In [5]:
o = sess.run(outputs)

In [6]:
probs = o.rnn_output

import numpy as np

np.argmax(probs, axis=-1)

array([[5, 1, 1, 0]])

In [7]:
sess.run(table.size())

7

# Questions

* Should the decoder cell be different from the encoder cell?