In [47]:
import tensorflow as tf

from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("tmp/data/", one_hot = True)

Extracting tmp/data/train-images-idx3-ubyte.gz
Extracting tmp/data/train-labels-idx1-ubyte.gz
Extracting tmp/data/t10k-images-idx3-ubyte.gz
Extracting tmp/data/t10k-labels-idx1-ubyte.gz


In [4]:
element_size= 28
time_steps = 28
num_classes = 10
batch_size = 128
hidden_layer_size = 128

In [5]:
LOG_DIR = "logs/RNN_with_summaries"

In [6]:
_inputs = tf.placeholder(tf.float32, shape=[None, time_steps, element_size], name="inputs") # Shape of data is
# shape=[None, time_steps, element_size] = (None, 28, 28) 28 time steps of 28 row/column vectors
y = tf.placeholder(tf.float32, shape=[None, num_classes], name="labels")

In [12]:
batch_x, batch_y = mnist.train.next_batch(batch_size)
#print(batch_x.shape) --128 images of 784 pixels
#print(batch_y.shape) -- 128 labels of size 10*1
batch_x = batch_x.reshape((batch_size, time_steps, element_size)) # reshape above to 128 images of shape 28 * 28

In [40]:
def variable_summaries(var):
    with tf.name_scope("summaries"):
        mean = tf.reduce_mean(var)
        tf.summary.scalar("mean", mean)
        with tf.name_scope("stddev"):
            stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
        tf.summary.scalar("stddev", stddev)
        tf.summary.scalar("max", tf.reduce_max(var))
        tf.summary.scalar("min", tf.reduce_min(var))
        tf.summary.histogram("histogram", var)
        
with tf.name_scope('rnn_weights'):
    with tf.name_scope("W_x"):
        Wx = tf.Variable(tf.zeros([element_size, hidden_layer_size]))
        variable_summaries(Wx)
    with tf.name_scope("W_h"):
        Wh = tf.Variable(tf.zeros([hidden_layer_size, hidden_layer_size]))
        variable_summaries(Wh)
    with tf.name_scope("bias"):
        b_rnn = tf.Variable(tf.zeros([hidden_layer_size]))
        variable_summaries(b_rnn)

# One step of internal calculation for RNN
def rnn_step(previous_hidden_state, x):
    # print(x.shape) -- (?,28)
    # print(Wx.shape) -- (28, 128)
    current_hidden_state = tf.tanh(tf.matmul(previous_hidden_state, Wh) + tf.matmul(x, Wx) + b_rnn) 
    # print(current_hidden_state.shape) -- (128, 128)
    return current_hidden_state

processed_input = tf.transpose(_inputs, perm=[1,0,2]) # we transpose here so as to make the input compatible for matmul
initial_hidden = tf.zeros([batch_size, hidden_layer_size]) # initial hidden state
all_hidden_states = tf.scan(rnn_step, processed_input, initializer = initial_hidden, name = 'states') # This calculates
# all the hidden states of the rnn
#print(all_hidden_states) -- all_hidden_states contains 28 hidden state each of size (128,128)

(?, 28)
(28, 128)
(128, 128)
Tensor("states_4/TensorArrayStack/TensorArrayGatherV3:0", shape=(28, 128, 128), dtype=float32)


In [42]:
with tf.name_scope("linear_layer_weights") as scope:
    with tf.name_scope("W_linear"):
        Wl = tf.Variable(tf.truncated_normal([hidden_layer_size,num_classes], mean =0, stddev=0.01))
        variable_summaries(Wl)
    with tf.name_scope("Bias_linear"):
        bl = tf.Variable(tf.truncated_normal([num_classes], mean =0, stddev=0.01))
        variable_summaries(bl)
    
def get_linear_layer(hidden_state):
    return tf.matmul(hidden_state, Wl) + bl

with tf.name_scope('linear_layer_weights') as scope:
    all_outputs = tf.map_fn(get_linear_layer, all_hidden_states)
    print(all_outputs) # this returns 28 outputs of shape (128,10) as a result of the get_linear_layer application
    output = all_outputs[-1] # we select the last output from the above 28 results
    print(output)
    tf.summary.histogram('outputs', output)

Tensor("linear_layer_weights_7/map/TensorArrayStack/TensorArrayGatherV3:0", shape=(28, 128, 10), dtype=float32)
Tensor("linear_layer_weights_7/strided_slice:0", shape=(128, 10), dtype=float32)


In [43]:
# This step performs softmax on the output value 
with tf.name_scope("cross_entropy"):
    cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=output, labels=y))
    tf.summary.scalar('cross_entropy', cross_entropy)

# We use this step for training 
with tf.name_scope("train"):
    train_step = tf.train.RMSPropOptimizer(0.001, 0.9).minimize(cross_entropy) # performs gradient descent algorithm

# find the accuracy
with tf.name_scope("accuracy"):
    correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(output,1))
    accuracy = (tf.reduce_mean(tf.cast(correct_prediction, tf.float32)))*100
    tf.summary.scalar('accuracy', accuracy)
    
merged = tf.summary.merge_all()
print(merged)

Tensor("Merge_2/MergeSummary:0", shape=(), dtype=string)


In [32]:
test_data = mnist.test.images[:batch_size].reshape((-1,time_steps, element_size))
test_label = mnist.test.labels[:batch_size]

In [36]:
with tf.Session() as sess:
    train_writer = tf.summary.FileWriter(LOG_DIR+"/train", graph = tf.get_default_graph())
    test_writer = tf.summary.FileWriter(LOG_DIR+"/test", graph = tf.get_default_graph())
    sess.run(tf.global_variables_initializer())
    
    for i in range(10000):
        batch_x, batch_y = mnist.train.next_batch(batch_size)
        batch_x = batch_x.reshape((batch_size, time_steps, element_size))
        summary, _ = sess.run([merged, train_step], feed_dict = {_inputs:batch_x, y:batch_y})
        train_writer.add_summary(summary, i)
        
        if i%1000 == 0:
            acc, loss =  sess.run([accuracy, cross_entropy], feed_dict = {_inputs:batch_x, y:batch_y})
            print ("Iter " + str(i) + ", Minibatch Loss= " + "{:.6f}".format(loss) + ", Training Accuracy= " + "{:.5f}".format(acc))
    
        if i % 10:
            summary, acc = sess.run([merged, accuracy], feed_dict = {_inputs:batch_x, y:batch_y})
            test_writer.add_summary(summary, i)
    test_acc = sess.run(accuracy, feed_dict={_inputs: test_data,y: test_label})
    print ("Test Accuracy:", test_acc)   

Iter 0, Minibatch Loss= 2.302419, Training Accuracy= 10.93750
Iter 1000, Minibatch Loss= 1.245614, Training Accuracy= 50.00000
Iter 2000, Minibatch Loss= 0.566871, Training Accuracy= 87.50000
Iter 3000, Minibatch Loss= 0.326850, Training Accuracy= 89.06250
Iter 4000, Minibatch Loss= 0.230052, Training Accuracy= 91.40625
Iter 5000, Minibatch Loss= 0.179282, Training Accuracy= 95.31250
Iter 6000, Minibatch Loss= 0.018218, Training Accuracy= 100.00000
Iter 7000, Minibatch Loss= 0.047549, Training Accuracy= 98.43750
Iter 8000, Minibatch Loss= 0.024658, Training Accuracy= 100.00000
Iter 9000, Minibatch Loss= 0.018582, Training Accuracy= 100.00000
Test Accuracy: 98.4375
