In [10]:
import tensorflow as tf
import numpy as np
import time
from sklearn.utils import shuffle
old_v = tf.logging.get_verbosity()
tf.logging.set_verbosity(tf.logging.ERROR)

<h1>Extract MNIST data</h1>

In [2]:
from tensorflow.examples.tutorials.mnist import input_data
#get mnist data, with one_hot encoding, reshape = False (that means images are not flatten)
mnist = input_data.read_data_sets("MNIST_data/",reshape=False,one_hot=True)
#suppress warnings
tf.logging.set_verbosity(old_v)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


<h1>Prepare training, validation and testing data</h1>

In [3]:
x_train, y_train           = mnist.train.images, mnist.train.labels
x_validation, y_validation = mnist.validation.images, mnist.validation.labels
x_test, y_test             = mnist.test.images, mnist.test.labels

#pad images with 0s (28x28 to 32x32)
x_train      = np.pad(x_train, ((0,0),(2,2),(2,2),(0,0)), 'constant')
x_validation = np.pad(x_validation, ((0,0),(2,2),(2,2),(0,0)), 'constant')
x_test       = np.pad(x_test, ((0,0),(2,2),(2,2),(0,0)), 'constant')
print(x_train.shape)
def padImage(X):
    return np.pad(X, ((0,0),(2,2),(2,2),(0,0)), 'constant')

(55000, 32, 32, 1)


<h1>Define hyperparameter</h1>

In [4]:
#learning rate
lr = 0.01
#number of traning steps
num_epochs =1000
#number of batch_size
batch_size = 128

#network parameters
n_hidden_1 = 300
n_hidden_2 = 100
num_input = 784
num_classes = 10

In [5]:
tf.reset_default_graph()

<h1>Placeholder</h1>

In [6]:
X = tf.placeholder(tf.float32,[None,32, 32, 1],name='X')
Y = tf.placeholder(tf.int32,[None,num_classes],name='Y')


<h1>Define LeNet-5</h1>

In [7]:
def LeNet5(X):
    conv1_W = tf.Variable(tf.truncated_normal(shape=[5, 5, 1, 6], mean=0, stddev=0.08))
    conv1_b = tf.Variable(tf.zeros(shape=6))

    conv1 = tf.nn.conv2d(X, conv1_W, strides=[1,1,1,1], padding='VALID')+ conv1_b
    conv1 = tf.nn.relu(conv1)
    conv1_pool = tf.nn.max_pool(conv1, ksize=[1,2,2,1], strides=[1,2,2,1], padding='VALID')
    conv1_bn = tf.layers.batch_normalization(conv1_pool)
    
    conv2_W = tf.Variable(tf.truncated_normal(shape=[5, 5, 6, 16], mean=0, stddev=0.08))
    conv2_b = tf.Variable(tf.zeros(shape=16))

    conv2 = tf.nn.conv2d(conv1_bn, conv2_W, strides=[1,1,1,1], padding='VALID')+ conv2_b
    conv2 = tf.nn.relu(conv2)
    conv2_pool = tf.nn.max_pool(conv2, ksize=[1,2,2,1], strides=[1,2,2,1], padding='VALID')
    conv2_bn = tf.layers.batch_normalization(conv2_pool)
    
    fc0   = tf.contrib.layers.flatten(conv2_bn)
    fc1_w = tf.Variable(tf.truncated_normal(shape=[400,120], mean=0, stddev=0.08))
    fc1_b = tf.Variable(tf.zeros(shape=120))
    fc1   = tf.matmul(fc0, fc1_w) + fc1_b
    fc1    = tf.nn.relu(fc1)
    
    fc2_w = tf.Variable(tf.truncated_normal(shape=[120,84], mean=0, stddev=0.08))
    fc2_b = tf.Variable(tf.zeros(shape=84))
    fc2   = tf.matmul(fc1, fc2_w) + fc2_b
    fc2    = tf.nn.relu(fc2)
    
    fc3_w = tf.Variable(tf.truncated_normal(shape=[84,10], mean=0, stddev=0.08))
    fc3_b = tf.Variable(tf.zeros(shape=10))
    logits   = tf.matmul(fc2, fc3_w) + fc3_b
    return logits
    
    

<h1>Cost and optimization</h1>

In [8]:
logits = LeNet5(X)
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=Y)
loss = tf.reduce_mean(cross_entropy)
optimizer = tf.train.AdamOptimizer(learning_rate = lr)
training_operation = optimizer.minimize(loss)
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
init = tf.global_variables_initializer()


<h1>Training, validating, testing</h1>
<h2>1. Print out validation accuracy after each training epoch</h2>
<h2>2. Print out training time on each epoch</h2>
<h2>3. Print out testing accuracy</h2>

In [12]:
with tf.Session() as sess:
    sess.run(init)
  
    for i in range(num_epochs):
        #fetch batch
        batch_x, batch_y = mnist.train.next_batch(batch_size)
        batch_x = padImage(batch_x)
        #run optimization
        start = time.time()
        sess.run(training_operation, feed_dict={X:batch_x, Y:batch_y})
        end = time.time()
        if i % 100 ==0:
            print("step "+str(i)+", Train time = {:.3f}".format(end-start))
            acc = sess.run(accuracy,feed_dict={X:x_train, Y:y_train})
            print("     Train Accuracy= {:.3f}".format(acc))
            acc = sess.run(accuracy,feed_dict={X:x_validation, Y:y_validation})
            print("     Validation Accuracy= {:.3f}".format(acc))
            
    end = time.time()
    print("Training finished!") 
    print("Testing ACcuracy:", sess.run(accuracy, feed_dict={X:x_test, Y:y_test}))

step 0, Train time = 0.238
     Train Accuracy= 0.100
     Validation Accuracy= 0.096
step 100, Train time = 0.053
     Train Accuracy= 0.954
     Validation Accuracy= 0.960
step 200, Train time = 0.072
     Train Accuracy= 0.972
     Validation Accuracy= 0.973
step 300, Train time = 0.050
     Train Accuracy= 0.972
     Validation Accuracy= 0.972
step 400, Train time = 0.053
     Train Accuracy= 0.980
     Validation Accuracy= 0.980
step 500, Train time = 0.057
     Train Accuracy= 0.981
     Validation Accuracy= 0.982
step 600, Train time = 0.055
     Train Accuracy= 0.982
     Validation Accuracy= 0.980
step 700, Train time = 0.073
     Train Accuracy= 0.983
     Validation Accuracy= 0.979
step 800, Train time = 0.073
     Train Accuracy= 0.987
     Validation Accuracy= 0.985
step 900, Train time = 0.071
     Train Accuracy= 0.986
     Validation Accuracy= 0.983
Training finished!
Testing ACcuracy: 0.9842
