# Classifying MNIST with RNN

In [5]:
# Import MINST data
import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

import tensorflow as tf
import numpy as np

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


> To classify images using a reccurent neural network, we consider every image row as a sequence of pixels. Because MNIST image shape is 28*28px, we will then handle 28 sequences of 28 steps for every sample.

### params

In [35]:
lr = 0.001
training_steps = 100000# 1 million
batch_size = 128
display_step = 100 # print model's accuracy every "display_step" steps

### network parameters

In [7]:
input_dim = 28
timesteps = 28 # 28x28
num_hidden = 128 # number of hidden units
num_classes = 10 

### Graph inputs

In [9]:
# tf.placeholder(dtype, shape=None, name=None)
x = tf.placeholder(tf.float32, shape=[None,timesteps,input_dim], name='X')
y = tf.placeholder(tf.float32, shape=[None, num_classes], name = 'Y')
# initial state of cell
istate = tf.placeholder(tf.float32, [None, 2*num_hidden])

### Weights and Biases

In [11]:
weights = {
    'hidden' : tf.Variable(tf.random_normal([input_dim, num_hidden])),
    'out' : tf.Variable(tf.random_normal([num_hidden, num_classes]))
}
biases = {
    'hidden' : tf.Variable(tf.random_normal([num_hidden])),
    'out' : tf.Variable(tf.random_normal([num_classes]))
}

## RNN

In [19]:
def RNN(_X, istate, weights, biases ): # input to RNN : X, initial state of cell, weights and biases
    # input shape: (batch_size, timesteps, input_dim)
    # permute timesteps and batch_size
    _X = tf.transpose(_X, [1,0,2])
    # Reshape to prepare input to hidden activation
    _X = tf.reshape(_X, [-1, input_dim])
    # activation
    _activation = tf.matmul(_X, weights['hidden']) + biases['hidden']
    # create a cell
    lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=num_hidden)
    # Split data because rnn cell needs a list of inputs for the RNN inner loop
    _activations = tf.split(0,timesteps, _activation) # split along 0th dimenstion, into "timesteps" items
    # Get lstm cell output
    outputs, states = tf.nn.rnn(cell=lstm_cell, inputs=_activations, initial_state=istate)
    
    # output activation
    return tf.matmul(outputs[-1],weights['out']) + biases['out']

## Define Model, Training ops

In [21]:
# Ouput prediction
pred = RNN(x,istate,weights,biases)

# loss
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y)) # get average of all softmax losses
# optimizer
optimizer = tf.train.AdamOptimizer(learning_rate=lr).minimize(loss)



## Evaluation

In [22]:
# accuracy
correct_pred = tf.equal( tf.argmax(y,1), tf.argmax(pred,1) )
accuracy = tf.reduce_mean(tf.cast(correct_pred,tf.float32))

# Launch session

In [36]:
# init all variables
init_op = tf.initialize_all_variables()

with tf.Session() as sess:
    sess.run(init_op)
    step = 1
    # run training op for "training_steps" num of times
    while batch_size*step < training_steps:
        # get training batches
        batchX, batchY = mnist.train.next_batch(batch_size)
        # reshape batchX to 3D tensor : [ batch_size x timesteps x input_dim ]
        batchX = batchX.reshape([batch_size,timesteps, input_dim])
        # run training op
        sess.run(optimizer, feed_dict={
                x : batchX,
                y : batchY,
                istate : np.zeros([batch_size, 2*num_hidden])
            })
        # evaluate model every "display_step" times
        if not step % display_step:
            # accuracy
            # get test batches
            t_batchX, t_batchY = mnist.test.next_batch(batch_size)
            t_batchX = t_batchX.reshape([batch_size,timesteps, input_dim])
            acc, loss_val = sess.run([accuracy,loss], feed_dict = {
                    x : t_batchX,
                    y : t_batchY,
                    istate : np.zeros([batch_size, 2*num_hidden])
                })
            print('Iteration : {0}\t Batch Loss : {1}\t Accuracy : {2}'.format(step,loss_val,acc))
        step += 1
        
    # out of the loop
    print('>> Optimization complete')
    # get final accuracy
    t_batchX, t_batchY = mnist.test.next_batch(batch_size*2)
    t_batchX = t_batchX.reshape([batch_size*2,timesteps, input_dim])
    acc = sess.run(accuracy, feed_dict= {
            x : t_batchX,
            y : t_batchY,
            istate : np.zeros([2*batch_size,2*num_hidden])
        })
    print('>> Final Accuracy : {}'.format(acc))

Iteration : 100	 Batch Loss : 0.690336465836	 Accuracy : 0.8046875
Iteration : 200	 Batch Loss : 0.362896382809	 Accuracy : 0.8671875
Iteration : 300	 Batch Loss : 0.226416990161	 Accuracy : 0.90625
Iteration : 400	 Batch Loss : 0.119445301592	 Accuracy : 0.9765625
Iteration : 500	 Batch Loss : 0.171115219593	 Accuracy : 0.9296875
Iteration : 600	 Batch Loss : 0.192652314901	 Accuracy : 0.9296875
Iteration : 700	 Batch Loss : 0.168055459857	 Accuracy : 0.9296875
>> Optimization complete
>> Final Accuracy : 0.96875
