# Predicting MNIST Digits with Neural Nets:

Train a deep MLP on the MNIST dataset and see if you can get over 98% precision. Just like in the last exercise of Chapter 9, try adding all the bells and whistles (i.e., save checkpoints, restore the last checkpoint in case of an interruption, add summaries, plot learning curves using TensorBoard, and so on)

In [9]:
import tensorflow as tf
import numpy as np
n_inputs = 28*28 #MNIST features
n_hidden1 = 150
n_hidden2 = 150
n_hidden3 = 150
n_outputs = 10

#### Define input data placeholders and layers

First lets create the deep net. Add a tf.summary.scalar() to track the loss and the accuracy during training so we can view nice learning curves using TensorBoard.

In [3]:
tf.reset_default_graph()

#create input data placeholders
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int64, shape=(None), name="y")

#crete hidden and output layers
with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X, n_hidden1, name="hidden1", 
                              activation=tf.nn.relu)
    hidden2 = tf.layers.dense(hidden1, n_hidden2, name="hidden2",
                              activation=tf.nn.relu)
    hidden3 = tf.layers.dense(hidden2, n_hidden3, name="hidden3",
                              activation=tf.nn.relu)
    logits = tf.layers.dense(hidden2, n_outputs, name="outputs")

#### Define loss function and gradient descent

In [4]:
with tf.name_scope("loss"):
    #calculate cross entropy cost function with softmax for each 
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, 
                                                              logits=logits)
    #take mean crossentropy across all observations
    loss = tf.reduce_mean(xentropy, 
                          name="loss")
    #record mean cross entropy (loss)
    loss_summary = tf.summary.scalar('log_loss', 
                                     loss)
    
learning_rate=0.01
with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

#### Assess model accuracy

In [5]:
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits,y,1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    #record accuracy
    accuracy_summary = tf.summary.scalar('accuracy', accuracy)

#### Create function to create time specific log_dirs

In [6]:
from datetime import datetime

def log_dir(prefix=""):
    now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
    root_logdir = "tf_logs"
    if prefix:
        prefix += "-"
    name = prefix + "run-" + now
    return "{}/{}/".format(root_logdir, name)

log_dir('TEST')

'tf_logs/TEST-run-20180129224651/'

In [11]:
#create FileWriter that we use to write TensorBoard logs
logdir = log_dir("MCs_MNIST_DNN")
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

### Run the model!

Hey! Why don't we implement early stopping? For this, we are going to need a validation set. Luckily, the dataset returned by TensorFlow's input_data() function (see above) is already split into a training set (60,000 instances, already shuffled for us), a validation set (5,000 instances) and a test set (5,000 instances). So we can easily define X_valid and y_valid:

In [7]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data")
X_train = mnist.train.images
X_test = mnist.test.images
y_train = mnist.train.labels.astype("int")
y_test = mnist.test.labels.astype("int")

X_valid = mnist.validation.images
y_valid = mnist.validation.labels

print(X_train.shape)
m, n = X_train.shape

Extracting /tmp/data\train-images-idx3-ubyte.gz
Extracting /tmp/data\train-labels-idx1-ubyte.gz
Extracting /tmp/data\t10k-images-idx3-ubyte.gz
Extracting /tmp/data\t10k-labels-idx1-ubyte.gz
(55000, 784)


In [12]:
import os
#initialize initializer and saver
init = tf.global_variables_initializer()
saver = tf.train.Saver()

n_epochs = 10001
batch_size = 50
n_batches = int(np.ceil(m / batch_size))

checkpoint_path = "/tmp/my_deep_mnist_model.ckpt"
checkpoint_epoch_path = checkpoint_path + ".epoch"
final_model_path = "./my_deep_mnist_model"

best_loss = np.infty
epochs_without_progress = 0
max_epochs_without_progress = 50

with tf.Session() as sess:
    if os.path.isfile(checkpoint_epoch_path):
        # if the checkpoint file exists, restore the model and load the epoch number
        with open(checkpoint_epoch_path, "rb") as f:
            start_epoch = int(f.read())
        print("Training was interrupted. Continuing at epoch", start_epoch)
        saver.restore(sess, checkpoint_path)
    else:
        start_epoch = 0
        sess.run(init)

    for epoch in range(start_epoch, n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        accuracy_val, loss_val, accuracy_summary_str, loss_summary_str = sess.run([accuracy, loss, accuracy_summary, loss_summary], feed_dict={X: X_valid, y: y_valid})
        file_writer.add_summary(accuracy_summary_str, epoch)
        file_writer.add_summary(loss_summary_str, epoch)
        if epoch % 5 == 0:
            print("Epoch:", epoch,
                  "\tValidation accuracy: {:.3f}%".format(accuracy_val * 100),
                  "\tLoss: {:.5f}".format(loss_val))
            saver.save(sess, checkpoint_path)
            with open(checkpoint_epoch_path, "wb") as f:
                f.write(b"%d" % (epoch + 1))
            if loss_val < best_loss:
                saver.save(sess, final_model_path)
                best_loss = loss_val
            else:
                epochs_without_progress += 5
                if epochs_without_progress > max_epochs_without_progress:
                    print("Early stopping")
                    break

os.remove(checkpoint_epoch_path)

with tf.Session() as sess:
    saver.restore(sess, final_model_path)
    accuracy_val = accuracy.eval(feed_dict={X: X_test, y: y_test})

Epoch: 0 	Validation accuracy: 89.940% 	Loss: 0.38489
Epoch: 5 	Validation accuracy: 94.600% 	Loss: 0.19169
Epoch: 10 	Validation accuracy: 96.240% 	Loss: 0.14079
Epoch: 15 	Validation accuracy: 96.780% 	Loss: 0.11758
Epoch: 20 	Validation accuracy: 97.160% 	Loss: 0.10222
Epoch: 25 	Validation accuracy: 97.460% 	Loss: 0.08975
Epoch: 30 	Validation accuracy: 97.420% 	Loss: 0.08348
Epoch: 35 	Validation accuracy: 97.800% 	Loss: 0.07867
Epoch: 40 	Validation accuracy: 97.700% 	Loss: 0.07624
Epoch: 45 	Validation accuracy: 97.800% 	Loss: 0.07461
Epoch: 50 	Validation accuracy: 98.040% 	Loss: 0.07239
Epoch: 55 	Validation accuracy: 98.040% 	Loss: 0.07349
Epoch: 60 	Validation accuracy: 97.940% 	Loss: 0.07243
Epoch: 65 	Validation accuracy: 98.100% 	Loss: 0.07067
Epoch: 70 	Validation accuracy: 98.080% 	Loss: 0.07124
Epoch: 75 	Validation accuracy: 98.160% 	Loss: 0.07158
Epoch: 80 	Validation accuracy: 98.180% 	Loss: 0.07208
Epoch: 85 	Validation accuracy: 97.960% 	Loss: 0.07318
Epoch: 90 	V

In [14]:
accuracy_val

0.97750002

### 97.75% accuracy! Not bad!!! We can round that up to 98% for now!