In [7]:
import tensorflow as tf
import numpy as np
import timeit, time, math
from sklearn.utils import shuffle
old_v = tf.logging.get_verbosity()
tf.logging.set_verbosity(tf.logging.ERROR)

<h1>Extract MNIST data</h1>

In [3]:
from tensorflow.examples.tutorials.mnist import input_data
#get mnist data, with one_hot encoding, reshape = False (that means images are not flatten)
mnist = input_data.read_data_sets("MNIST_data/",reshape=False,one_hot=True)
#suppress warnings
tf.logging.set_verbosity(old_v)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


<h1>Prepare training, validation and testing data</h1>

In [4]:
x_train, y_train           = mnist.train.images, mnist.train.labels
x_validation, y_validation = mnist.validation.images, mnist.validation.labels
x_test, y_test             = mnist.test.images, mnist.test.labels

#pad images with 0s (28x28 to 32x32)
x_train = np.pad(x_train, ((0,0), (2,2), (2,2), (0,0)), 'constant')
x_validation = np.pad(x_validation, ((0,0), (2,2), (2,2), (0,0)), 'constant')
x_test = np.pad(x_test, ((0,0), (2,2), (2,2), (0,0)), 'constant')

print (x_test[0].shape)

(32, 32, 1)


In [8]:
num_classes = y_train.shape[1]

<h1>Define hyperparameter</h1>

In [9]:
EPOCHS = 30
BATCH_SIZE = 128
lr = 0.001

batches = math.floor(x_train.shape[0] // BATCH_SIZE)

In [11]:
tf.reset_default_graph()

<h1>Placeholder</h1>

In [12]:
X = tf.placeholder(tf.float32, [None, 32, 32, 1])
Y = tf.placeholder(tf.int32, [None, 10])


mean = 0.0
stddev = 0.1

def initialize_weight(shape, mean, stddev):
    W = tf.truncated_normal(shape=shape, mean = mean, stddev = stddev)
    return tf.Variable(W)



weights = {
    'W_conv1': tf.Variable(initialize_weight([5,5,1,6], mean, stddev),name='W_conv1'),
    'W_conv2': tf.Variable(initialize_weight([5,5,6,16], mean, stddev),name='W_conv2'),
    'W_fc1': tf.Variable(initialize_weight([5*5*16,120], mean, stddev),name='W_fc1'),
    'W_fc2': tf.Variable(initialize_weight([120,84], mean, stddev),name='W_fc2'),
    'W_out': tf.Variable(initialize_weight([84,num_classes], mean, stddev),name='W_out')
}

biases = {
    'b_conv1': tf.Variable(tf.zeros(shape=[6]),name='b_conv1'),
    'b_conv2': tf.Variable(tf.zeros(shape=[16]),name='b_conv2'),
    'b_fc1': tf.Variable(tf.zeros(shape=[120]),name='b_fc1'),
    'b_fc2': tf.Variable(tf.zeros(shape=[84]),name='b_fc2'),
    'b_out': tf.Variable(tf.zeros(shape=[num_classes]),name='b_out')
}

<h1>Define LeNet-5</h1>

In [13]:
def LeNet(x):
    conv1 = tf.nn.conv2d(x, weights['W_conv1'], strides=[1,1,1,1], padding='VALID') + biases['b_conv1']
    conv1 = tf.nn.relu(conv1)
    conv1 = tf.nn.max_pool(conv1, ksize=[1,2,2,1], strides=[1,2,2,1], padding='VALID')
    
    conv2 = tf.nn.conv2d(conv1, weights['W_conv2'], strides=[1,1,1,1], padding='VALID') + biases['b_conv2']
    conv2 = tf.nn.relu(conv2)
    conv2 = tf.nn.max_pool(conv2, ksize=[1,2,2,1], strides=[1,2,2,1], padding='VALID')
    
    fc_input = tf.contrib.layers.flatten(conv2)
    
    fc1 = tf.matmul(fc_input, weights['W_fc1']) + biases['b_fc1']
    fc1 = tf.nn.relu(fc1)
    
    fc2 = tf.matmul(fc1, weights['W_fc2']) + biases['b_fc2']
    fc2 = tf.nn.relu(fc2)
    
    out = tf.matmul(fc2, weights['W_out']) + biases['b_out']
    return out

<h1>Cost and optimization</h1>

In [14]:
#predicted labels
logits = LeNet(X)

#define loss
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits,labels=Y),name='loss')
#define optimizer
optimizer = tf.train.AdamOptimizer(learning_rate=lr)
train_op = optimizer.minimize(loss)

#compare the predicted labels with true labels
correct_pred = tf.equal(tf.argmax(logits,1),tf.argmax(Y,1))

#compute the accuracy by taking average
accuracy = tf.reduce_mean(tf.cast(correct_pred,tf.float32),name='accuracy')

#Initialize the variables
init = tf.global_variables_initializer()

<h1>Training, validating, testing</h1>
<h2>1. Print out validation accuracy after each training epoch</h2>
<h2>2. Print out training time on each epoch</h2>
<h2>3. Print out testing accuracy</h2>

In [112]:
val_accuracy = []
test_accuracy = []
time_taken = []

with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
    sess.run(init)
    
    for ep in range(EPOCHS):
        
        permutation = np.random.permutation(x_train.shape[0])
        x_train_shuffled = x_train[permutation, :]
        y_train_shuffled = y_train[permutation, :]
        
        start_time = time.time()
        
        for j in range(batches):

            start = j * BATCH_SIZE
            end = min(start + BATCH_SIZE, x_train.shape[0] - 1)
            X_batch = x_train_shuffled[start:end]
            Y_batch = y_train_shuffled[start:end]
            
            train_loss, acc, _ = sess.run(fetches=[loss, accuracy, train_op],
                                         feed_dict={X: X_batch,
                                                   Y: Y_batch})
        end_time = time.time()
        time_s = end_time-start_time
 
        val_acc = sess.run([accuracy], feed_dict={X:x_validation,
                                                 Y: y_validation})
    
        test_acc = sess.run([accuracy], feed_dict={X: x_test,
                                                  Y: y_test})
        print("Epoch {}: training loss = {}, training_time = {}, val_accuracy = {}, test_accuracy = {}".format(
        ep, train_loss, time_s, val_acc[0], test_acc[0]))
        
        
        val_accuracy.append(val_acc[0])
        test_accuracy.append(test_acc[0])
        time_taken.append(time_s)
        

Epoch 0: training loss = 0.13400030136108398, training_time = 2.7117762565612793, val_accuracy = 0.9664000272750854, test_accuracy = 0.9624999761581421
Epoch 1: training loss = 0.0984925702214241, training_time = 2.513347864151001, val_accuracy = 0.9753999710083008, test_accuracy = 0.9779999852180481
Epoch 2: training loss = 0.07610903680324554, training_time = 2.5133113861083984, val_accuracy = 0.9837999939918518, test_accuracy = 0.9830999970436096
Epoch 3: training loss = 0.029794372618198395, training_time = 2.4452459812164307, val_accuracy = 0.9860000014305115, test_accuracy = 0.9876999855041504
Epoch 4: training loss = 0.04230296611785889, training_time = 2.4303364753723145, val_accuracy = 0.9829999804496765, test_accuracy = 0.9861000180244446
Epoch 5: training loss = 0.01900586672127247, training_time = 2.431164264678955, val_accuracy = 0.9879999756813049, test_accuracy = 0.989799976348877
Epoch 6: training loss = 0.035759564489126205, training_time = 2.5024423599243164, val_accu