In [2]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.contrib.layers import flatten
from tensorflow.examples.tutorials.mnist import input_data

def display_mnist_images(gens, num_images):
    plt.rcParams['image.interpolation'] = 'nearest'
    plt.rcParams['image.cmap'] = 'gray'
    fig, axs = plt.subplots(1, num_images, figsize=(25, 3))
    for i in range(num_images):
        reshaped_img = (gens[i].reshape(28, 28) * 255).astype(np.uint8)
        axs.flat[i].imshow(reshaped_img)
    plt.show()


def pad_images(data):
    # reformat the MNIST data into a shape that LeNet will accept #
    # pad the data with two rows of zeros on the top and bottom #
    # and add columns of zeros on the left and right #
    # new dimensions should be (28 + 2 + 2 = 32) #
    return np.pad(data, ((0, 0), (2, 2), (2, 2), (0, 0)), 'constant')

In [3]:
# download MNIST dataset #
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True, reshape=False)

# visualize random MNIST images #
batch_xs, batch_ys = mnist.train.next_batch(10)
list_of_images = np.split(batch_xs, 10)
#    display_mnist_images(list_of_images, 10)

train_examples, n_classes = mnist.train.num_examples, mnist.train.labels.shape[1]

# MNIST data that TensorFlow pre-loads comes as 28 x 28 x 1 (greyscale) images #
image_width, image_height, image_channels = mnist.train.images.shape[1:]
print("Image width: {}\nImage height: {}\nImage channels: {}".format(image_width, image_height, image_channels))

######################################
# define the model (build the graph) #
######################################

# TODO: (1) In the Convolutional network architecture the majority of neurons are in the fully connected layer.
#           These dense layers are more prone to over-fitting the data. Add dropout regularization to these layers. - DONE
#
#       (2) For complex tasks such as these, the network requires non-linearity. Add non-linear activations to the
#           appropriate layers.
#
#       (3) For each convolutional layer, calculate the output feature map/activation map dimensions.
#
#       (4) Change the gradient optimizer to the Adam optimzer. - DONE : 97.570

# LeNet architecture accepts a 32 x 32 x C image as input, where C is the number of color channels #
x = tf.placeholder(tf.float32, [None, 32, 32, 1])

# add an additional input placeholder for dropout regularization #
keep_prob = tf.placeholder(tf.float32, name="keep_prob")

# control the amount of regularization #
keep_nodes = 0.75 #just_added

mean = 0.0
sigma = 0.1

# Layer 1: Convolutional input = 32 x 32 x 1. Output feature map = 28 x 28 x 6 #
conv1_W = tf.Variable(tf.truncated_normal(shape=(5, 5, 1, 6), mean=mean, stddev=sigma))
conv1_b = tf.Variable(tf.zeros(6))
conv1 = tf.nn.conv2d(x, conv1_W, strides=[1, 1, 1, 1], padding='VALID') + conv1_b

# Activation #
conv1 = tf.nn.relu(conv1)

# Pooling input = 28 x 28 x 6. Output = 14 x 14 x 6 #
conv1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')

# Layer 2: Convolutional input = 14 x 14 x 6. Output = 10 x 10 x 16 #
conv2_W = tf.Variable(tf.truncated_normal(shape=(5, 5, 6, 16), mean=mean, stddev=sigma))
conv2_b = tf.Variable(tf.zeros(16))
conv2 = tf.nn.conv2d(conv1, conv2_W, strides=[1, 1, 1, 1], padding='VALID') + conv2_b

# Activation #
conv2 = tf.nn.relu6(conv2)

# Pooling input = 10 x 10 x 16. Output = 5 x 5 x 16 #
conv2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')

# Flatten layer: Input = 5 x 5 x 16. Output = 400 #
flat_layer = flatten(conv2)

# Layer 3: Fully Connected input = 400. Output = 120 #
fully_connected_1_W = tf.Variable(tf.truncated_normal(shape=(400, 120), mean=mean, stddev=sigma))
fully_connected_1_b = tf.Variable(tf.zeros(120))
fully_connected_1 = tf.add(tf.matmul(flat_layer, fully_connected_1_W), fully_connected_1_b)
fully_connected_1_drop = tf.nn.dropout(fully_connected_1,keep_prob) #just_added

# Activation #
fc1 = tf.nn.relu6(fully_connected_1_drop)

# Layer 4: Fully Connected input = 120. Output = 84 #
fully_connected_2_W = tf.Variable(tf.truncated_normal(shape=(120, 84), mean=mean, stddev=sigma))
fully_connected_2_b = tf.Variable(tf.zeros(84))
fully_connected_2 = tf.add(tf.matmul(fully_connected_1, fully_connected_2_W), fully_connected_2_b)
fully_connected_2_drop = tf.nn.dropout(fully_connected_2,keep_prob)

# Activation #
fc2 = tf.nn.relu6(fully_connected_2_drop)

# Layer 5: Fully Connected input = 84. Output = 10 #
fully_connected_3_W = tf.Variable(tf.truncated_normal(shape=(84, 10), mean=mean, stddev=sigma))
fully_connected_3_b = tf.Variable(tf.zeros(10))

# Final outputs #
y_ = tf.add(tf.matmul(fc2, fully_connected_3_W), fully_connected_3_b)

# Target classes/labels #
y = tf.placeholder(tf.float32, [None, n_classes])

# Softmax layer #
prob = tf.nn.softmax(y_)

########################
# define loss function #
########################

cross_entropy_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y_, labels=y))

learning_rate = 0.01

train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy_loss)

###########################
# define model evaluation #
###########################

actual_class, predicted_class = tf.argmax(y, 1), tf.argmax(prob, 1)
correct_prediction = tf.cast(tf.equal(predicted_class, actual_class), tf.float32)
classification_accuracy = tf.reduce_mean(correct_prediction)

#########################
# define training cycle #
#########################

num_epochs = 10
batch_size = 128

# initializing the variables before starting the session #
init = tf.global_variables_initializer()

# launch the graph in a session (use the session as a context manager) #
with tf.Session() as sess:
    # run session #
    sess.run(init)
    # start main training cycle #
    for epoch in range(num_epochs):
        avg_cost = 0.
        total_batch = int(mnist.train.num_examples / batch_size)
        # loop over all batches #
        for i in range(total_batch):
            batch_x, batch_y = mnist.train.next_batch(batch_size)
            # reshape image dimensions with zero padding #
            batch_x = pad_images(batch_x)
            # run optimization op (backprop), cost op and accuracy op (to get training losses) #
            _, c, a = sess.run([train_step, cross_entropy_loss, classification_accuracy], feed_dict={x: batch_x, y: batch_y, keep_prob: keep_nodes})
            # compute avg training loss #
            avg_cost += c / total_batch
        # display logs per epoch step #
        if epoch % 1 == 0:
            cost_eval = cross_entropy_loss.eval(feed_dict={x: pad_images(mnist.validation.images), y: mnist.validation.labels, keep_prob: 1.0})
            print("Epoch {}:\ntraining-cross-entropy-loss = {:.4f}\nvalidation-cross-entropy-loss = {:.4f}\n".format(epoch + 1, avg_cost, cost_eval))
    print("Optimization Finished!")
    # calculate test set accuracy #
    test_accuracy = classification_accuracy.eval({x: pad_images(mnist.test.images), y: mnist.test.labels, keep_prob: 1.0})
    print("Accuracy on test set = {:.3f}%".format(test_accuracy * 100))

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Image width: 28
Image height: 28
Image channels: 1
Epoch 1:
training-cross-entropy-loss = 0.2158
validation-cross-entropy-loss = 0.1005

Epoch 2:
training-cross-entropy-loss = 0.1495
validation-cross-entropy-loss = 0.1587

Epoch 3:
training-cross-entropy-loss = 0.3212
validation-cross-entropy-loss = 0.3982

Epoch 4:
training-cross-entropy-loss = 0.3810
validation-cross-entropy-loss = 0.1938

Epoch 5:
training-cross-entropy-loss = 0.3946
validation-cross-entropy-loss = 0.1915

Epoch 6:
training-cross-entropy-loss = 0.3633
validation-cross-entropy-loss = 0.1910

Epoch 7:
training-cross-entropy-loss = 0.3231
validation-cross-entropy-loss = 0.2828

Epoch 8:
training-cross-entropy-loss = 0.2797
validation-cross-entropy-loss = 0.1646

Epoch 9:
training-cross-entropy-loss = 0.2555
validation-cross-entr