### Training a convolutional neural network on MNIST dataset 

In [3]:
import tensorflow as tf

In [6]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot = True)

Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.
Extracting MNIST_data/train-images-idx3-ubyte.gz
Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


The network I plan to build is as follows: 
<br>
*input images* --> 748 pixels or 28x28 pixel images
<br>
*filters* = 5x5
<br>
*stride* = 1
<br>
*padding* = same
<br>
since padding is same and stride 1, after every convolution the width and height of a layer remians same
<br>
the only way the dimensions get reduced is by max pooling
<br>
*pooling window size* = 2
<br>
for each example:
<br>
<br>
<font color=red>**28x28x1 --5x5x1x32--> 28x28x32 --max-pooling--> 14x14x32 --5x5x32x64--> 14x14x64 --max-pooling--> 7x7x64 ---> 1024(hidden_layer) ----> 10(out_layer)**</font>

In [17]:
# we design a CNN with 2 convolutional layers and a hidden layer
n_input = 784
input_h = 28
input_w = 28
channels = 1
n_conv1 = 32
n_conv2 = 64 
# these are the number of filters in the convolutional layers
conv1_k = 5
conv2_k = 5
n_hidden = 1024
n_out = 10
pooling_window = 2

In [18]:
# declaring global variables
weights = {
    'wc1': tf.Variable(tf.random_normal([conv1_k, conv1_k, channels, n_conv1])),
    'wc2': tf.Variable(tf.random_normal([conv2_k, conv2_k, n_conv1, n_conv2])),
    'wh1': tf.Variable(tf.random_normal([input_h//4*input_w//4*n_conv2,n_hidden])),
    'out': tf.Variable(tf.random_normal([n_hidden, n_out]))
}

biases = {
    'bc1': tf.Variable(tf.random_normal([n_conv1])),
    'bc2': tf.Variable(tf.random_normal([n_conv2])),
    'bh1': tf.Variable(tf.random_normal([n_hidden])),
    'out': tf.Variable(tf.random_normal([n_out]))
}

In [23]:
def conv(x, weights, bias, strides=1):
    out = tf.nn.conv2d(x, weights, padding = 'SAME', strides = [1,strides,strides,1])
    out = tf.nn.bias_add(out, bias)
    return tf.nn.relu(out)     
    
def max_pooling(x, k=2):
    return tf.nn.max_pool(x, padding='SAME', ksize=[1,k,k,1], strides=[1,k,k,1])   

In [24]:
def cnn(x, weights, biases):
    x = tf.reshape(x, shape=[-1, input_h, input_w, 1])
    # reshaped the 784 feature input to be 28x28
    # 1st convolutional layer
    conv1 = conv(x, weights['wc1'], biases['bc1'])
    conv1 = max_pooling(conv1, pooling_window)
    
    # 2nd convolutional layer
    conv2 = conv(conv1, weights['wc2'], biases['bc2'])
    conv2 = max_pooling(conv2, pooling_window)
    
    # we reshape 7x7x64 so that it can be multiplied by the weights joining it to the 
    # hidden layer(each pixel will have a separate weight connecting it to the hidden layer)
    hidden_input = tf.reshape(conv2, shape = [-1, input_h//4*input_w//4*n_conv2])
    hidden_out = tf.add(tf.matmul(hidden_input, weights['wh1']), biases['bh1'])
    hidden_out = tf.nn.relu(hidden_out)
    
    # outer layer 
    out = tf.add(tf.matmul(hidden_out, weights['out']), biases['out'])
    return out

In [25]:
x = tf.placeholder('float', [None, n_input])
y = tf.placeholder('float', [None, n_out])
pred = cnn(x, weights, biases)
pred

<tf.Tensor 'Add_1:0' shape=(?, 10) dtype=float32>

In [28]:
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=0.01).minimize(cost)

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See tf.nn.softmax_cross_entropy_with_logits_v2.



In [29]:
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)

In [31]:
batch_size = 100
for i in range(10):
    num_batches = int(mnist.train.num_examples/batch_size)
    total_cost = 0
    for j in range(num_batches):
        batch_x, batch_y = mnist.train.next_batch(batch_size)
        _ , c = sess.run([optimizer, cost], feed_dict={x:batch_x, y:batch_y})
        total_cost += c
    print(total_cost)

1377615.489714384
34864.78751116991
21688.614163067497
16270.202837093324
14160.842768996552
12438.837339997292
11719.354070261064
10253.592679539022
9501.334009983439
8141.805627436638


In [34]:
predictions = tf.argmax(pred,1)
correct_labels = tf.argmax(y,1)
correct_predictions = tf.equal(predictions, correct_labels)
predictions,correct_predictions  = sess.run([predictions, correct_predictions], feed_dict={x:mnist.test.images,
                                              y:mnist.test.labels})

In [37]:
p = (correct_predictions == False).sum()
print(p)
accuracy = 1 - p/len(correct_predictions)
print(accuracy)

192
0.9808
