We want to experiment with the MNIST dataset for recognizing handwritten digits using a feed-forward neural networks classifier. There is an input layer, some hidden layers and one final output layer.

We compare the output of the network with the intended output using a cost function (cross entropy) and try to minimize the cost with an optimizer (AdamOptimizer, SGD, AdaGrad...). This requires going backwards to manipulate the weights (backpropagation). 

Each cycle of feed-forward + backpropagation is called an <i>epoch</i>. 

In [70]:
# Reference:
# https://www.youtube.com/watch?list=PLQVvvaa0QuDfKTOs3Keq_kaG2P55YRn5v&v=PwAGxqrXSCs

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data 

mnist = input_data.read_data_sets("tmp/data/", one_hot=True)

Extracting tmp/data/train-images-idx3-ubyte.gz
Extracting tmp/data/train-labels-idx1-ubyte.gz
Extracting tmp/data/t10k-images-idx3-ubyte.gz
Extracting tmp/data/t10k-labels-idx1-ubyte.gz


In [71]:
# defining the model 
# specifying the number of nodes for hidden layers (these could change)

n_nodes_hl1 = 500
n_nodes_hl2 = 500
n_nodes_hl3 = 500

# 10 classes: 0 - 9
# each digit represented with a 10-dimentional list with one non-zero value
# e.g. 4 = [0,0,0,1,0,0,0,0,0,0]

n_classes = 10 

# repeatedly go through batches of 100 of features and feed them through our network at a time 
# and manipulate the weights 

batch_size = 100

# placeholders for the input and output matrix: 
# input matrix size: height x width = 28 * 28 = 784 

x = tf.placeholder('float', [None, 784])
y = tf.placeholder('float')

def neural_network_model(data):
    hidden_1_layer = {'weights':tf.Variable(tf.random_normal([784, n_nodes_hl1])),
                      'biases':tf.Variable(tf.random_normal([n_nodes_hl1]))}

    hidden_2_layer = {'weights':tf.Variable(tf.random_normal([n_nodes_hl1, n_nodes_hl2])),
                      'biases':tf.Variable(tf.random_normal([n_nodes_hl2]))}

    hidden_3_layer = {'weights':tf.Variable(tf.random_normal([n_nodes_hl2, n_nodes_hl3])),
                      'biases':tf.Variable(tf.random_normal([n_nodes_hl3]))}

    output_layer = {'weights':tf.Variable(tf.random_normal([n_nodes_hl3, n_classes])),
                    'biases':tf.Variable(tf.random_normal([n_classes]))}
    
    # (input_data * weights) + biases 
    
    l1 = tf.add(tf.matmul(data,hidden_1_layer['weights']), hidden_1_layer['biases'])
    l1 = tf.nn.relu(l1)

    l2 = tf.add(tf.matmul(l1,hidden_2_layer['weights']), hidden_2_layer['biases'])
    l2 = tf.nn.relu(l2)

    l3 = tf.add(tf.matmul(l2,hidden_3_layer['weights']), hidden_3_layer['biases'])
    l3 = tf.nn.relu(l3)

    output = tf.matmul(l3,output_layer['weights']) + output_layer['biases']

    return output

In [73]:
# the softmax function is a generalization of the logistic function that "squashes" a K-dimensional 
# vector of arbitrary real values to a K-dimensional vector of real values in the range (0, 1) that add up to 1

# tensor: A typed multi-dimensional array

# tf.reduce_mean: computes the mean of elements across dimensions of a tensor

def train_neural_network(x):
    # run the model first
    prediction = neural_network_model(x)
    # calculate the cost
    cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y) )
    # we'd like to minimize this cost. In this case we use the AdamOptimizer which is a 
    # a Stochastic gradient-based method for optimization. Its default learning rate is set 
    # to be 0.001 which is fine enough for our purposes here
    optimizer = tf.train.AdamOptimizer().minimize(cost)
    
    # how many epochs do we need (we try with 10 initilly)
    hm_epochs = 10
    
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for epoch in range(hm_epochs):
            epoch_loss = 0 
            for _ in range(int(mnist.train.num_examples/batch_size)):
                epoch_x, epoch_y = mnist.train.next_batch(batch_size)
                # c: cost
                _, c = sess.run([optimizer, cost], feed_dict={x: epoch_x, y: epoch_y})
                epoch_loss += c
            print('epoch', epoch, 'completed out of', hm_epochs,'. loss:', epoch_loss)
            
        # Now that we have trained the model we can gauge its performance 
        # tf.argmax: returns the index with the largest value across axes of a tensor.
        # we are checking to see if that value is the same in the intended output vs prediction 
        correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
        # tf.cast: casts a tensor to a new type.
        accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
        print('Accuracy:', accuracy.eval({x:mnist.test.images, y:mnist.test.labels}))
        
# Now we can actually run the netwrok
train_neural_network(x)

epoch 0 completed out of 10 . loss: 1878542.74203
epoch 1 completed out of 10 . loss: 409449.558212
epoch 2 completed out of 10 . loss: 220113.94924
epoch 3 completed out of 10 . loss: 131067.236988
epoch 4 completed out of 10 . loss: 80119.2542363
epoch 5 completed out of 10 . loss: 51459.1476424
epoch 6 completed out of 10 . loss: 34180.2244798
epoch 7 completed out of 10 . loss: 24406.4477171
epoch 8 completed out of 10 . loss: 20571.1951414
epoch 9 completed out of 10 . loss: 17014.7164322
Accuracy: 0.9484
