In [1]:
# import pip
# sorted(["%s==%s" % (i.key, i.version) for i in pip.get_installed_distributions()])

In [2]:
# import os
# os.getcwd()

In [3]:
import tensorflow as tf
import numpy as np

# Import MINST data
from tensorflow.examples.tutorials.mnist import input_data

''' To classify images using a bidirectional reccurent neural network, we consider every image row as a sequence of pixels. Because MNIST image shape is 28*28px, we will then handle 28 sequences of 28 steps for every sample. '''

In [4]:
tf.reset_default_graph()

MNIST_dataset_path = 'MNIST_data/'
log_path = 'log'

# if folder exists, remove all content
if tf.gfile.Exists(log_path):
    tf.gfile.DeleteRecursively(log_path)
tf.gfile.MakeDirs(log_path)

mnist = input_data.read_data_sets(MNIST_dataset_path, one_hot = True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [5]:
# Parameters
learning_rate = 0.001
training_iters = 100000
batch_size = 128
display_step = 10
dropout = 0.9  # keep prob

# Network Parameters
n_input = 28 # MNIST data input (img shape: 28*28)
n_steps = 28 # timesteps
n_hidden = 128 # hidden layer num of features
n_classes = 10 # MNIST total classes (0-9 digits)

# tf Graph input
# with tf.name_scope('input'):
x = tf.placeholder("float", [None, n_steps, n_input], name = 'InputData')
y = tf.placeholder("float", [None, n_classes], name = 'LabelData')
# keep_prob = tf.placeholder(tf.float32)
# tf.summary.scalar('dropout_keep_probability', keep_prob)
    
# # Define weights
# weights = {
#     # Hidden layer weights => 2*n_hidden because of foward + backward cells
#     'out': tf.Variable(tf.truncated_normal([2*n_hidden, n_classes]))
# }
# biases = {
#     'out': tf.Variable(tf.truncated_normal([n_classes]))
# }

In [6]:
def weight_variable(shape):
    """Create a weight variable with appropriate initialization."""
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    """Create a bias variable with appropriate initialization."""
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

In [7]:
def variable_summaries(var, name):
    """Attach a lot of summaries to a Tensor."""
    with tf.name_scope('summaries'):
        mean = tf.reduce_mean(var)
        tf.summary.scalar('mean/' + name, mean)
        with tf.name_scope('stddev'):
            stddev = tf.sqrt(tf.reduce_sum(tf.square(var - mean)))
        tf.summary.scalar('sttdev/' + name, stddev)
        tf.summary.scalar('max/' + name, tf.reduce_max(var))
        tf.summary.scalar('min/' + name, tf.reduce_min(var))
        tf.summary.histogram(name, var)

In [8]:
def BiRNN(x, input_dim, output_dim, layer_name, activate = tf.nn.relu):

    # Prepare data shape to match `bidirectional_rnn` function requirements
    # Current data input shape: (batch_size, n_steps, n_input)
    # Required shape: 'n_steps' tensors list of shape (batch_size, n_input)
    with tf.name_scope(layer_name):
        
#         with tf.name_scope('weights'):
        weights = weight_variable([2 * input_dim, output_dim])
        variable_summaries(weights, layer_name + '/weights')
#         with tf.name_scope('biases'):
        biases = bias_variable([output_dim])
        variable_summaries(biases, layer_name + '/biases')
        
        # Permuting batch_size and n_steps
        x = tf.transpose(x, [1, 0, 2])
        # Reshape to (n_steps*batch_size, n_input)
        x = tf.reshape(x, [-1, n_input])
        # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
        x = tf.split(0, n_steps, x)

        # Define lstm cells with tensorflow
        # Forward direction cell
        lstm_fw_cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
        # Backward direction cell
        lstm_bw_cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)

        # Get lstm cell output
#         try:
        outputs, _, _ = tf.nn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x,
                                              dtype=tf.float32)
#         except Exception: # Old TensorFlow version only returns outputs not states
#             outputs = tf.nn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x,
#                                             dtype=tf.float32)

        # Linear activation, using rnn inner loop last output
        lin_activations = tf.matmul(outputs[-1], weights) + biases
        activations = activate(lin_activations, 'activation')
        tf.summary.histogram(layer_name + '/activations', activations)
        return activations

In [9]:
with tf.name_scope('Model'):
    pred = BiRNN(x, n_hidden, n_classes, 'layer_1')
#     variable_summaries()

with tf.name_scope('Loss'):
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))

with tf.name_scope('Optimizer'):
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# # Evaluate model
with tf.name_scope('Accuracy'):
    correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# # Initializing the variables
init = tf.global_variables_initializer()
# init = tf.initialize_all_variables().run()

In [10]:
# Create a summary to monitor cost tensor
tf.summary.scalar("loss", cost)
# Create a summary to monitor accuracy tensor
tf.summary.scalar("accuracy", accuracy)
# Merge all summaries into a single op
merged_summary_op = tf.summary.merge_all()

In [11]:
# Launch the graph
with tf.Session() as sess:
    sess.run(init)
    
    # op to write logs to Tensorboard
    summary_writer = tf.summary.FileWriter(log_path, graph=tf.get_default_graph())
    
    step = 1
    # Keep training until reach max iterations
    while step * batch_size < training_iters:
        batch_x, batch_y = mnist.train.next_batch(batch_size)
        # Reshape data to get 28 seq of 28 elements
        batch_x = batch_x.reshape((batch_size, n_steps, n_input))
        # Run optimization op (backprop)
        sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
        if step % display_step == 0:
            
            # Run optimization op (backprop), cost op (to get loss value)
            # and summary nodes
            foo, bar, summary = sess.run([optimizer, cost, merged_summary_op],
                                     feed_dict={x: batch_x, y: batch_y})
            # Write logs at every iteration
            summary_writer.add_summary(summary, step)
            
            # Calculate batch accuracy
            acc = sess.run(accuracy, feed_dict={x: batch_x, y: batch_y})
            # Calculate batch loss
            loss = sess.run(cost, feed_dict={x: batch_x, y: batch_y})
            print ("Iter " + str(step*batch_size) + ", Minibatch Loss= " + \
                  "{:.6f}".format(loss) + ", Training Accuracy= " + \
                  "{:.5f}".format(acc))
        step += 1

    print ("Optimization Finished!")

    # Calculate accuracy for 128 mnist test images
    test_len = 128
    test_data = mnist.test.images[:test_len].reshape((-1, n_steps, n_input))
    test_label = mnist.test.labels[:test_len]
    print ("Testing Accuracy:", \
        "{:.6f}".format(sess.run(accuracy, feed_dict={x: test_data, y: test_label})))
    
    print("Run the command line:\n" +
          "--> tensorboard --logdir=" + str(log_path) + "/" +
          "\nThen open http://0.0.0.0:6006/ into your web browser")

Iter 1280, Minibatch Loss= 2.179289, Training Accuracy= 0.25000
Iter 2560, Minibatch Loss= 1.945461, Training Accuracy= 0.33594
Iter 3840, Minibatch Loss= 1.580908, Training Accuracy= 0.49219
Iter 5120, Minibatch Loss= 1.241914, Training Accuracy= 0.67188
Iter 6400, Minibatch Loss= 0.908585, Training Accuracy= 0.78125
Iter 7680, Minibatch Loss= 0.958676, Training Accuracy= 0.71875
Iter 8960, Minibatch Loss= 0.747770, Training Accuracy= 0.78125
Iter 10240, Minibatch Loss= 0.616611, Training Accuracy= 0.87500
Iter 11520, Minibatch Loss= 0.441207, Training Accuracy= 0.87500
Iter 12800, Minibatch Loss= 0.564387, Training Accuracy= 0.81250
Iter 14080, Minibatch Loss= 0.399285, Training Accuracy= 0.90625
Iter 15360, Minibatch Loss= 0.349946, Training Accuracy= 0.91406
Iter 16640, Minibatch Loss= 0.391109, Training Accuracy= 0.90625
Iter 17920, Minibatch Loss= 0.280629, Training Accuracy= 0.90625
Iter 19200, Minibatch Loss= 0.348255, Training Accuracy= 0.90625
Iter 20480, Minibatch Loss= 0.17

https://www.tensorflow.org/versions/r0.10/api_docs/python/nn/#rnn

https://github.com/dennybritz/tf-rnn/blob/master/rnn_cell_wrappers.py.ipynb

https://www.tensorflow.org/how_tos/summaries_and_tensorboard/

https://www.tensorflow.org/how_tos/graph_viz/