In [1]:
import numpy as np
import tensorflow as tf
from matplotlib import pyplot as plt
from tensorflow.examples.tutorials.mnist import input_data
%matplotlib inline

# Data Loading
We first define a function for downloading and loading MNIST. 

In [2]:
mnist = input_data.read_data_sets(train_dir = "./MNIST_data", one_hot=True)

# LeNet Implementation
Using two convolutional layers followed by relu activation and max pooling 

In [1]:
def predict(self, x):
    """predict returns prediction given input."""
    x_image = tf.reshape(x, [-1, 28, 28, 1])
    W_conv1 = self.weight_variable([3, 3, 1, 16])
    b_conv1 = self.bias_variable([16])
    h_conv1 = tf.nn.relu(self.conv2d(x_image, W_conv1) + b_conv1)

    h_pool1 = self.max_pool_2x2(h_conv1)

    W_conv2 = self.weight_variable([3, 3, 16, 16])
    b_conv2 = self.bias_variable([16])
    h_conv2 = tf.nn.relu(self.conv2d(h_pool1, W_conv2) + b_conv2)

    h_pool2 = self.max_pool_2x2(h_conv2)

    W_fc1 = self.weight_variable([7 * 7 * 16, 128])
    b_fc1 = self.bias_variable([128])

    h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*16])
    h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

    W_fc2 = self.weight_variable([128, 10])
    b_fc2 = self.bias_variable([10])

    y_conv = tf.matmul(h_fc1, W_fc2) + b_fc2

    return y_conv


def conv2d(self, x, W):
  """conv2d returns a 2d convolution layer with full stride."""
  return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')


def max_pool_2x2(self, x):
  """max_pool_2x2 downsamples a feature map by 2X."""
  return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                        strides=[1, 2, 2, 1], padding='SAME')


def weight_variable(self, shape):
  """weight_variable generates a weight variable of a given shape."""
  initial = tf.truncated_normal(shape, stddev=0.1)
  return tf.Variable(initial)


def bias_variable(self, shape):
  """bias_variable generates a bias variable of a given shape."""
  initial = tf.constant(0.1, shape=shape)
  return tf.Variable(initial)


def train(learning_rate=0.01, max_epochs=1000, 
              batch_size=64):
        """ Train network on the given data. """
        
        # Define placeholder for x
        x = tf.placeholder(tf.float32, [None, 784])

        # Define placeholder for y
        y_ = tf.placeholder(tf.float32, [None, 10])

        # Predict given the data
        y_conv = self.predict(x)

        cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=y_,
                                                                logits=y_conv)
        cross_entropy = tf.reduce_mean(cross_entropy)
    
        # Define loss and optimizer
        train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(cross_entropy)

        correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
        correct_prediction = tf.cast(correct_prediction, tf.float32)
        accuracy = tf.reduce_mean(correct_prediction)
        
        train_acc = list()
        val_acc = list()
        test_acc = list()
        
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            
            for epoch in range(max_epochs):
                for _ in range(int(mnist.train.num_examples/batch_size)):
                    batch_xs, batch_ys = mnist.train.next_batch(batch_size)
                    train_step.run(feed_dict={x: batch_xs, y_: batch_ys})
                
                print('Epoch', epoch, 'completed out of', max_epochs)
            
                train_accuracy = accuracy.eval(feed_dict={
                        x: mnist.train.images, y_: mnist.train.labels})
                
                validation_accuracy = accuracy.eval(feed_dict={
                    x: mnist.eval.images, y_: mnist.eval.labels})
                
                test_accuracy = accuracy.eval(feed_dict={
                    x: mnist.test.images, y_: mnist.test.labels})
                
                train_acc.append(train_accuracy)
                val_acc.append(validation_accuracy)
                test_acc.append(test_accuracy)
                
#                 print('epoch %d, training accuracy %g' % (epoch, train_accuracy))
#                 print('epoch %d, validation accuracy %g' % (epoch, train_accuracy))
#                 print('epoch %d, test accuracy %g' % (epoch, train_accuracy))
                
            return (train_acc, val_acc, test_acc)

# Training on MNIST
Finally we can let our network run on the MNIST dataset!

In [None]:
train()

# Task 2: Changing the Learning Rate

In [None]:
learning_rates = [0.1, 0.01, 0.001, 0.0001]
validation_errors = list()
for lr in learning_rates:
    validation_errors.append(train(learning_rate=lr)[1])

for lr, ve in zip(learning_rates, validation_errors):
    plt.plot(range(1,len(ve)), ve, label='Learning rate:%s' % lr)
    plt.xlabel('number of epochs')
    plt.ylabel('validation error')
plt.show()