In [1]:
import tensorflow as tf
import numpy as np

In [2]:
max_length  = 100
vocab_size  = 2          # Input vocabulary
hidden_size = 128
seq_length  = 20
batch_size  = 32

In [3]:
input = [tf.placeholder(tf.float32, shape=(None, vocab_size), name="inp%i" % t) for t in range(seq_length)]
label = tf.placeholder(tf.float32, shape=(None, 21), name="label")

In [4]:
cell = tf.nn.rnn_cell.GRUCell(hidden_size)
state = cell.zero_state(batch_size, tf.float32)
outputs = []

In [5]:
with tf.variable_scope('rnn_unfolding') as varscope:
    for input_ in input:
        output, state = cell(input_, state)
        outputs.append(output)
        varscope.reuse_variables()

In [6]:
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.2)
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

In [7]:
# Attach MLP to last output
W = tf.Variable(tf.random_normal((128, 21), stddev=0.01))
b = tf.Variable(np.zeros((21,)), dtype=tf.float32)

output = tf.sigmoid(tf.matmul(outputs[-1], W) + b)

In [8]:
# Loss
clipped_output         = tf.clip_by_value(output, 1e-10, 1.0)
clipped_1_minus_output = tf.clip_by_value(1 - output, 1e-10, 1.0)

loss = -tf.reduce_sum(label * tf.log(clipped_output) + (1 - label) * tf.log(clipped_1_minus_output))
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(output, 1), tf.argmax(label, 1)), tf.float32))

In [9]:
# Optimizer
train_op = tf.train.AdamOptimizer().minimize(loss)

In [10]:
# Initialize
sess.run(tf.initialize_all_variables())

In [None]:
def one_hotify(vector, output_vocabulary_size):
    # Create the result vector
    vector_one_hot = np.zeros(list(vector.shape) + [output_vocabulary_size])
    
    # Use fancy indexing to activate positions
    vector_one_hot[list(np.indices(vector.shape)) + [vector]] = 1
    
    return vector_one_hot

In [None]:
# Train
losses     = []
accuracies = []

for iter_ in range(2 * 10 ** 4):
    # Generate samples randomly
    x_   = np.random.randint(0, 2, (batch_size, seq_length))
    x_1h = one_hotify(x_, vocab_size)

    y_   = np.sum(x_, 1)
    y_1h = one_hotify(y_, seq_length + 1)   # because #_output_symbols = seq_length + 1
    
    # Calculate loss and backprop
    feed_dict = {input[t]: x_1h[:,t,:] for t in range(seq_length)}
    feed_dict.update({label: y_1h})
    loss_t, acc_t, _ = sess.run((loss, accuracy, train_op), feed_dict)
    
    losses.append(loss_t)
    accuracies.append(acc_t * 100)
    
    if iter_ % (5 * 10 ** 2) == 0 and iter_ > 0:
        print 'Iteration: %5d Loss: %3.4f Accuracy: %3.2f%%' % (iter_, np.mean(losses), np.mean(accuracies))

Iteration:   500 Loss: 111.0955 Accuracy: 0.17%
Iteration:  1000 Loss: 106.4354 Accuracy: 0.17%
Iteration:  1500 Loss: 103.1345 Accuracy: 0.18%
Iteration:  2000 Loss: 93.3388 Accuracy: 0.29%
Iteration:  2500 Loss: 81.8088 Accuracy: 0.40%
Iteration:  3000 Loss: 71.2362 Accuracy: 0.50%
Iteration:  3500 Loss: 62.8803 Accuracy: 0.56%
Iteration:  4000 Loss: 56.0681 Accuracy: 0.61%
Iteration:  4500 Loss: 50.4782 Accuracy: 0.65%
Iteration:  5000 Loss: 46.0655 Accuracy: 0.68%
Iteration:  5500 Loss: 42.3031 Accuracy: 0.71%
Iteration:  6000 Loss: 39.0224 Accuracy: 0.73%
Iteration:  6500 Loss: 36.1969 Accuracy: 0.75%
Iteration:  7000 Loss: 33.7473 Accuracy: 0.77%
Iteration:  7500 Loss: 31.8561 Accuracy: 0.78%
Iteration:  8000 Loss: 29.9516 Accuracy: 0.80%
Iteration:  8500 Loss: 28.2615 Accuracy: 0.81%
Iteration:  9000 Loss: 26.7822 Accuracy: 0.82%
Iteration:  9500 Loss: 25.5526 Accuracy: 0.83%
Iteration: 10000 Loss: 24.3823 Accuracy: 0.84%
