### Importing MNIST Dataset

In [1]:
import tensorflow as tf
import numpy as np

old_v = tf.logging.get_verbosity()
tf.logging.set_verbosity(tf.logging.ERROR)
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data')

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


### Model Hyperparams

In [2]:
IMAGE_SIZE = 28
NUM_CHANNELS = 1
PIXEL_DEPTH = 255
NUM_LABELS = 10
VALIDATION_SIZE = 5000  # Size of the validation set.
SEED = 66478  # Set to None for random seed.
BATCH_SIZE = 64
NUM_EPOCHS = 10
EVAL_BATCH_SIZE = 64
EVAL_FREQUENCY = 100

t_data = (mnist.train.images - (255/2.0))/255 
train_data = t_data.reshape(55000, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS)
train_labels = np.asarray(mnist.train.labels, dtype=np.int32)

e_data = (mnist.test.images - (255/2.0))/255
eval_data = e_data.reshape(10000, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS).astype(np.float32)
eval_labels = np.asarray(mnist.test.labels, dtype=np.int32)

validation_data = train_data[:VALIDATION_SIZE, ...].astype(np.float32)
validation_labels = train_labels[:VALIDATION_SIZE]
train_data = train_data[VALIDATION_SIZE:, ...].astype(np.float32)
train_labels = train_labels[VALIDATION_SIZE:]

### Convnet variables

In [3]:
train_data_node = tf.placeholder(tf.float32, shape=(BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS))
train_labels_node = tf.placeholder(tf.int32, shape=(BATCH_SIZE,))
eval_data = tf.placeholder(tf.float32, shape=(EVAL_BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS))

conv1_weights = tf.Variable(tf.truncated_normal([5, 5, NUM_CHANNELS, 32], stddev=0.1, seed=SEED, dtype=tf.float32))
conv1_biases = tf.Variable(tf.zeros([32], dtype=tf.float32))

conv2_weights = tf.Variable(tf.truncated_normal([5, 5, 32, 64], stddev=0.1, seed=SEED, dtype=tf.float32))
conv2_biases = tf.Variable(tf.zeros([64], dtype=tf.float32))

fc1_weights = tf.Variable(tf.truncated_normal([IMAGE_SIZE // 4 * IMAGE_SIZE // 4 * 64, 512], stddev=0.1, seed=SEED, dtype=tf.float32))
fc1_biases = tf.Variable(tf.constant(0.1, shape=[512], dtype=tf.float32))

fc2_weights = tf.Variable(tf.truncated_normal([512, NUM_LABELS], stddev=0.1, seed=SEED, dtype=tf.float32))
fc2_biases = tf.Variable(tf.constant(0.1, shape=[NUM_LABELS], dtype=tf.float32))


### LeNet5 Architecture

In [4]:
num_epochs = NUM_EPOCHS
train_size = train_labels.shape[0]

def model(data, train=False):
    conv1 = tf.nn.conv2d(train_data_node, conv1_weights, strides=[1,1,1,1], padding="SAME")
    relu1 = tf.nn.relu(tf.nn.bias_add(conv1, conv1_biases))
    pool1 = tf.nn.max_pool(relu1, ksize=[1,2,2,1], strides=[1,2,2,1], padding="SAME")

    conv2 = tf.nn.conv2d(pool1, conv2_weights, strides=[1,1,1,1], padding="SAME")
    relu2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_biases))
    pool2 = tf.nn.max_pool(relu2, ksize=[1,2,2,1], strides=[1,2,2,1], padding="SAME")

    pool_shape = pool2.get_shape().as_list()
    reshape = tf.reshape(pool2,[pool_shape[0], pool_shape[1] * pool_shape[2] * pool_shape[3]])
    hidden = tf.nn.relu(tf.matmul(reshape, fc1_weights) + fc1_biases)
    if train:
        hidden = tf.nn.dropout(hidden, 0.5, seed=SEED)
    return tf.matmul(hidden, fc2_weights) + fc2_biases

logits = model(train_data_node, True)
loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=train_labels_node, logits=logits))
regularizers = (tf.nn.l2_loss(fc1_weights)
                + tf.nn.l2_loss(fc1_biases)
                + tf.nn.l2_loss(fc2_weights)
                + tf.nn.l2_loss(fc2_biases))

loss += 5e-4 * regularizers
batch = tf.Variable(0, dtype=tf.float32)
learning_rate = tf.train.exponential_decay(0.01, batch * BATCH_SIZE, train_size,0.95,staircase=True)

optimizer = tf.train.MomentumOptimizer(learning_rate,0.9).minimize(loss,global_step=batch)
train_prediction = tf.nn.softmax(logits)
eval_prediction = tf.nn.softmax(model(eval_data))

In [None]:
import time

def error_rate(predictions, labels):
    return 100.0 - (
      100.0 *
      np.sum(np.argmax(predictions, 1) == labels) /
      predictions.shape[0])

def eval_in_batches(data, sess):
    size = data.shape[0]
    if size < EVAL_BATCH_SIZE:
        raise ValueError("batch size for evals larger than dataset: %d" % size)
    predictions = np.ndarray(shape=(size, NUM_LABELS),dtype=np.float32)
    for begin in range(0, size, EVAL_BATCH_SIZE):
        end = begin + EVAL_BATCH_SIZE
        if end <= size:
            predictions[begin:end, :] = sess.run(eval_prediction,feed_dict={eval_data: data[begin:end, ...]})
        else:
            batch_predictions = sess.run(
            eval_prediction, feed_dict={eval_data: data[-EVAL_BATCH_SIZE:]})
        predictions[begin:, :] = batch_predictions[begin - size:, :]
    return predictions

start_time = time.time()
with tf.Session() as sess:
    tf.global_variables_initializer().run()
    for step in range(int(num_epochs * train_size) // BATCH_SIZE):
        offset = (step * BATCH_SIZE) % (train_size - BATCH_SIZE)
        batch_data = train_data[offset:(offset + BATCH_SIZE)]
        batch_labels = train_labels[offset:(offset + BATCH_SIZE)]
        feed_dict = {train_data_node: batch_data, train_labels_node: batch_labels}
        sess.run(optimizer, feed_dict=feed_dict)
        if step % EVAL_FREQUENCY == 0:
            l, lr, predictions = sess.run([loss, learning_rate, train_prediction],feed_dict=feed_dict)
            elapsed_time = time.time() - start_time
            print('Step %d (epoch %.2f), %.1f ms' % (step, float(step) * BATCH_SIZE / train_size, 1000 * elapsed_time / EVAL_FREQUENCY))
            print('Minibatch loss: %.3f, learning rate: %.6f' % (l, lr))
            print('Minibatch error: %.1f%%' % error_rate(predictions, batch_labels))
#             print('Validation error: %.1f%%' % error_rate(eval_in_batches(validation_data, sess), validation_labels))
    
    test_error = error_rate(eval_in_batches(test_data, sess), test_labels)
    print('Test error: %.1f%%' % test_error)

Step 0 (epoch 0.00), 2.5 ms
Minibatch loss: 7.012, learning rate: 0.010000
Minibatch error: 96.9%
Step 100 (epoch 0.13), 90.6 ms
Minibatch loss: 5.410, learning rate: 0.010000
Minibatch error: 87.5%
Step 200 (epoch 0.26), 177.9 ms
Minibatch loss: 5.378, learning rate: 0.010000
Minibatch error: 89.1%
Step 300 (epoch 0.38), 273.9 ms
Minibatch loss: 5.340, learning rate: 0.010000
Minibatch error: 87.5%
Step 400 (epoch 0.51), 366.9 ms
Minibatch loss: 5.328, learning rate: 0.010000
Minibatch error: 96.9%
Step 500 (epoch 0.64), 460.4 ms
Minibatch loss: 5.280, learning rate: 0.010000
Minibatch error: 98.4%
Step 600 (epoch 0.77), 555.1 ms
Minibatch loss: 5.229, learning rate: 0.010000
Minibatch error: 90.6%
Step 700 (epoch 0.90), 647.7 ms
Minibatch loss: 5.220, learning rate: 0.010000
Minibatch error: 90.6%
Step 800 (epoch 1.02), 744.8 ms
Minibatch loss: 5.174, learning rate: 0.009500
Minibatch error: 89.1%
Step 900 (epoch 1.15), 834.7 ms
Minibatch loss: 5.134, learning rate: 0.009500
Minibatc