# Dense Neural Net on MNIST

In [1]:
import numpy as np
import os

def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

In [2]:
from sklearn.datasets import fetch_mldata

# If this fails refer to ../sklearn/mnist-classification.ipynb on how to load mnist
mnist = fetch_mldata('MNIST original')

X_mnist = mnist['data']
y_mnist = mnist['target']

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_mnist = scaler.fit_transform(X_mnist)

from sklearn.model_selection import train_test_split
X_train_val, X_test, y_train_val, y_test = train_test_split(X_mnist, y_mnist, test_size=0.2,
                                                            random_state=23, shuffle=True)

# First 55k test, last 5k validation
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.1, random_state=32)



In [3]:
# Initializers
n_inputs = 28*28  # MNIST
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10

In [4]:
import tensorflow as tf

reset_graph()

# Inputs
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int64, shape=(None), name="y") 

  from ._conv import register_converters as _register_converters


In [5]:
# dense -> dense -> logit
with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X, n_hidden1, name="hidden1", activation=tf.nn.relu)
    hidden2 = tf.layers.dense(hidden1, n_hidden2, name="hidden2", activation=tf.nn.relu)
    logits = tf.layers.dense(hidden2, n_outputs, name="outputs")

In [6]:
with tf.name_scope("loss"):
    # logits returs probability distributions (sum(logits)=1)
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")
    loss_summary = tf.summary.scalar('log_loss', loss)

In [7]:
learning_rate = 0.01

# nothing new
with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

In [8]:
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    accuracy_summary = tf.summary.scalar('accuracy', accuracy)

In [9]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [10]:
n_epochs = 40
batch_size = 50

In [11]:
from datetime import datetime

def log_dir(prefix=""):
    now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
    root_logdir = "tf_logs"
    if prefix:
        prefix += "-"
    name = prefix + "run-" + now
    return "{}/{}/".format(root_logdir, name)

logdir = log_dir("mnist_dnn")

In [12]:
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

In [13]:
def get_batch(batch_size=10):
    r = np.random.permutation(len(X_train))
    return np.take(X_train, r[:batch_size], axis=0), np.take(y_train, r[:batch_size], axis=0)

In [14]:
m, n = X_train.shape

n_epochs = 10001
batch_size = 50

checkpoint_path = "/tmp/my_deep_mnist_model.ckpt"
checkpoint_epoch_path = checkpoint_path + ".epoch"
final_model_path = "./my_deep_mnist_model"

best_loss = np.infty
epochs_without_progress = 0
max_epochs_without_progress = 50

with tf.Session() as sess:
    if os.path.isfile(checkpoint_epoch_path):
        # if the checkpoint file exists, restore the model and load the epoch number
        with open(checkpoint_epoch_path, "rb") as f:
            start_epoch = int(f.read())
        print("Training was interrupted. Continuing at epoch", start_epoch)
        saver.restore(sess, checkpoint_path)
    else:
        start_epoch = 0
        sess.run(init)

    for epoch in range(start_epoch, n_epochs):
        for iteration in range(100):  # 100 batches per epoch
            X_batch, y_batch = get_batch(batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        accuracy_val, loss_val, accuracy_summary_str, loss_summary_str = sess.run([accuracy, loss, accuracy_summary, loss_summary],
                                                                                  feed_dict={X: X_val, y: y_val})
        file_writer.add_summary(accuracy_summary_str, epoch)
        file_writer.add_summary(loss_summary_str, epoch)
        if epoch % 5 == 0:
            print("Epoch:", epoch,
                  "\tValidation accuracy: {:.3f}%".format(accuracy_val * 100),
                  "\tLoss: {:.5f}".format(loss_val))
            saver.save(sess, checkpoint_path)
            with open(checkpoint_epoch_path, "wb") as f:
                f.write(b"%d" % (epoch + 1))
            if loss_val < best_loss:
                saver.save(sess, final_model_path)
                best_loss = loss_val
            else:
                epochs_without_progress += 5
                if epochs_without_progress > max_epochs_without_progress:
                    print("Early stopping")
                    break

Training was interrupted. Continuing at epoch 276
INFO:tensorflow:Restoring parameters from /tmp/my_deep_mnist_model.ckpt
Epoch: 280 	Validation accuracy: 96.750% 	Loss: 0.19650
Epoch: 285 	Validation accuracy: 96.643% 	Loss: 0.19714
Epoch: 290 	Validation accuracy: 96.679% 	Loss: 0.19836
Epoch: 295 	Validation accuracy: 96.643% 	Loss: 0.19670
Epoch: 300 	Validation accuracy: 96.679% 	Loss: 0.20000
Epoch: 305 	Validation accuracy: 96.732% 	Loss: 0.20110
Epoch: 310 	Validation accuracy: 96.750% 	Loss: 0.20031
Epoch: 315 	Validation accuracy: 96.661% 	Loss: 0.20089
Epoch: 320 	Validation accuracy: 96.750% 	Loss: 0.20143
Epoch: 325 	Validation accuracy: 96.768% 	Loss: 0.20186
Epoch: 330 	Validation accuracy: 96.875% 	Loss: 0.20288
Epoch: 335 	Validation accuracy: 96.768% 	Loss: 0.20249
Early stopping


In [15]:
with tf.Session() as sess:
    saver.restore(sess, final_model_path)
    accuracy_val = accuracy.eval(feed_dict={X: X_test, y: y_test})

accuracy_val

INFO:tensorflow:Restoring parameters from ./my_deep_mnist_model


0.9687857