In [1]:
'''
Simple RNN using tensorflow core API.
After 130000 epochs, it gives test accuracy of 70.64%
'''
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/",one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [2]:
import tensorflow as tf

In [3]:
'''
Get 28*28 dimensional image. Consider 28 timesteps of 28 long vector as input to RNN.
'''
timesteps = 28
X = tf.placeholder(tf.float32, [None, 28, 28])   
x = tf.unstack(X, timesteps, 1) #List of 28 timesteps of tensors of size (batch_size,28)
y_ = tf.placeholder(tf.float32, [None, 10])

In [4]:
training_steps = 130000
n_hidden = 50 #Number of hidden units in RNN
learning_rate = 0.001 #Learning Rate of RMSprop
bs = 128

In [5]:
'''
The final length of hidden_states will be equal to timesteps+1 i.e. 29 because we have used all_zeros hidden state for initial
step.
'''
with tf.device('/gpu:0'):
    hidden_states = [tf.zeros([tf.shape(X)[0],n_hidden])]  #Initial hidden state of all zeros
    U_hidden = [tf.Variable(tf.random_normal([n_hidden,n_hidden])) for i in range(28)] #Weight vector of hidden state
    W_input = [tf.Variable(tf.random_normal([28,n_hidden])) for i in range(28)] #Weight vector of input 
    W_output = [tf.Variable(tf.random_normal([n_hidden,10])) for i in range(28)] #Weight vector for final output from hidden state
    Y_hidden=[] #Vector of output vector
    for i in range(timesteps):
        temp = tf.matmul(hidden_states[i], U_hidden[i]) #U_hidden*h(t-1)
        temp1 = tf.matmul(x[i],W_input[i])#W_input*x(t)
        hs = tf.tanh(temp+temp1) #Hidden state
        Y_hidden.append(tf.matmul(hs,W_output[i])) #append output to output vector
        hidden_states.append(hs) #append hidden state to hidden_states
    final_weight = tf.Variable(tf.random_normal([n_hidden,10])) #Final Weight vector to output layer
    final_bias = tf.Variable(tf.random_normal([10]))  #Final Bias vector
    # Linear activation, using rnn inner loop last output
    logits = tf.matmul(hidden_states[-1],final_weight)+final_bias #Logits
    y = tf.nn.softmax(logits) #Softmax of logits
    cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y_)) #cross_entropy loss
    optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate) #RMSprop
    train_step = optimizer.minimize(cross_entropy)
    correct_pred = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) #correct predictions
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) #accuracy

In [6]:
sess = tf.InteractiveSession()

In [7]:
tf.global_variables_initializer().run()

In [8]:
for step in range(1,training_steps+1):
    batch_xs, batch_ys = mnist.train.next_batch(bs)
    batch_xs = batch_xs.reshape((bs, 28, 28))
    sess.run(train_step, feed_dict={X: batch_xs, y_: batch_ys})
    if step%1000==0 or step == 1:
        loss, acc = sess.run([cross_entropy, accuracy], feed_dict={X: batch_xs,y_: batch_ys})
        print(str(step)+" - Loss = "+str(loss)+" ,Accuracy = "+str(acc))

1 - Loss = 9.81963 ,Accuracy = 0.078125
1000 - Loss = 4.60225 ,Accuracy = 0.101562
2000 - Loss = 2.7198 ,Accuracy = 0.125
3000 - Loss = 2.3697 ,Accuracy = 0.210938
4000 - Loss = 2.11399 ,Accuracy = 0.265625
5000 - Loss = 1.92657 ,Accuracy = 0.226562
6000 - Loss = 1.77153 ,Accuracy = 0.335938
7000 - Loss = 1.85083 ,Accuracy = 0.359375
8000 - Loss = 1.6776 ,Accuracy = 0.414062
9000 - Loss = 1.53131 ,Accuracy = 0.4375
10000 - Loss = 1.60078 ,Accuracy = 0.351562
11000 - Loss = 1.56431 ,Accuracy = 0.390625
12000 - Loss = 1.55229 ,Accuracy = 0.4375
13000 - Loss = 1.59606 ,Accuracy = 0.359375
14000 - Loss = 1.55991 ,Accuracy = 0.429688
15000 - Loss = 1.62629 ,Accuracy = 0.382812
16000 - Loss = 1.37265 ,Accuracy = 0.523438
17000 - Loss = 1.49696 ,Accuracy = 0.4375
18000 - Loss = 1.49628 ,Accuracy = 0.460938
19000 - Loss = 1.5675 ,Accuracy = 0.4375
20000 - Loss = 1.52443 ,Accuracy = 0.429688
21000 - Loss = 1.5831 ,Accuracy = 0.390625
22000 - Loss = 1.32543 ,Accuracy = 0.492188
23000 - Loss = 1.

In [13]:
print("Accuracy for test set: "+str(sess.run(accuracy, feed_dict={X: mnist.test.images.reshape([mnist.test.images.shape[0],28,28]), y_: mnist.test.labels})))

Accuracy for test set: 0.7064
