In [5]:
import warnings
warnings.filterwarnings('ignore')
import math, sys, time, os
import numpy as np
import tensorflow as tf

def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)
    
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

In [6]:
reset_graph()
height = 28
width = 28
channels = 1
n_inputs = height * width

conv1_fmaps = 32
conv1_ksize = 3
conv1_stride = 1
conv1_pad = "SAME"

conv2_fmaps = 64
conv2_ksize = 3
conv2_stride = 2
conv2_pad = "SAME"

pool3_fmaps = conv2_fmaps
n_fc1 = 64
n_outputs = 10

with tf.name_scope("inputs"):
    X = tf.placeholder(tf.float32, shape=[None, n_inputs], name="X")
    X_reshaped = tf.reshape(X, shape=[-1, height, width, channels])
    y = tf.placeholder(tf.int32, shape=[None], name="y")
    
conv1 = tf.layers.conv2d(X_reshaped, filters=conv1_fmaps, kernel_size=conv1_ksize, 
                         strides=conv1_stride, padding=conv1_pad, 
                         activation=tf.nn.relu, name="conv1")
conv2 = tf.layers.conv2d(conv1, filters=conv2_fmaps, kernel_size=conv2_ksize, 
                         strides=conv2_stride, padding=conv2_pad, 
                         activation=tf.nn.relu, name="conv2")

with tf.name_scope("pool3"):
    pool3 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="VALID")
    pool3_flat = tf.reshape(pool3, shape=[-1, pool3_fmaps * 7 * 7])
    
with tf.name_scope("fc1"):
    fc1 = tf.layers.dense(pool3_flat, n_fc1, activation=tf.nn.relu, name="fc1")
    
with tf.name_scope("output"):
    logits = tf.layers.dense(fc1, n_outputs, name="output")
    Y_proba = tf.nn.softmax(logits, name="Y_proba")
    
with tf.name_scope("train"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y)
    loss = tf.reduce_mean(xentropy)
    optimizer = tf.train.AdamOptimizer()
    training_op = optimizer.minimize(loss)
    
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    
with tf.name_scope("init_and_save"):
    init = tf.global_variables_initializer()
    saver = tf.train.Saver()

In [7]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0
X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)
X_valid, X_train = X_train[:5000], X_train[5000:]
y_valid, y_train = y_train[:5000], y_train[5000:]

In [8]:
def shuffle_batch(X, y, batch_size):
    rnd_idx = np.random.permutation(len(X))
    n_batches = len(X) // batch_size
    for batch_idx in np.array_split(rnd_idx, n_batches):
        X_batch, y_batch = X[batch_idx], y[batch_idx]
        yield X_batch, y_batch

In [9]:
from datetime import datetime
now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tf_logs"
logdir = "{}/run-{}".format(root_logdir, now)

loss_summary = tf.summary.scalar("Loss", loss)
accuracy_summary = tf.summary.scalar("Accuracy", accuracy)
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

In [11]:
n_epochs = 5
batch_size = 100
n_batch = np.ceil(len(X_train) / batch_size)

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        summary_loss, summary_acc = sess.run([loss_summary, accuracy_summary], 
                                             feed_dict={X: X_batch, y: y_batch})
        file_writer.add_summary(summary_loss)
        file_writer.add_summary(summary_acc)
        acc_batch = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
        print("Epoch: ", epoch, "Last batch accuracy: ", acc_batch, "Test accuracy: ", acc_test)
        save_path = saver.save(sess, "./checkpoints/my_mnist_model")

file_writer.close()

Epoch:  0 Last batch accuracy:  0.98 Test accuracy:  0.9794
Epoch:  1 Last batch accuracy:  0.99 Test accuracy:  0.9775
Epoch:  2 Last batch accuracy:  0.98 Test accuracy:  0.9862
Epoch:  3 Last batch accuracy:  0.98 Test accuracy:  0.9889
Epoch:  4 Last batch accuracy:  1.0 Test accuracy:  0.9894


In [42]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train.astype(np.float32) / 255.0
X_test = X_test.astype(np.float32) / 255.0
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)
X_valid, X_train = X_train[:5000], X_train[5000:]
y_valid, y_train = y_train[:5000], y_train[5000:]

In [43]:
reset_graph()
height = 28
width = 28
inputs = 28 * 28
n_outputs = 10
learning_rate = 0.001

with tf.name_scope("inputs"):
    X = tf.placeholder(tf.float32, shape=[None, height, width], name="X")
    X_reshaped = tf.reshape(X, shape=[-1, height, width, 1])
    y = tf.placeholder(tf.int32, shape=[None], name="y")
    training = tf.placeholder_with_default(False, shape=[], name="training")
    
with tf.name_scope("model"):
    conv1 = tf.layers.conv2d(X_reshaped, filters=32, kernel_size=3, strides=[1, 1], 
                            padding="SAME", activation=None, name="conv1")
    bn1 = tf.layers.batch_normalization(conv1, training=training, momentum=0.9)
    relu1 = tf.nn.relu(bn1)
    conv2 = tf.layers.conv2d(relu1, filters=64, kernel_size=3, strides=[1, 1], 
                            padding="SAME", activation=None, name="conv2")
    bn2 = tf.layers.batch_normalization(conv2, training=training, momentum=0.9)
    relu2 = tf.nn.relu(bn2)
    max_pool = tf.nn.max_pool(relu2, ksize=[1,2,2,1], strides=[1,2,2,1], 
                              padding="VALID", name="max_pooling")
    max_pool_flat = tf.reshape(max_pool, shape=[-1, 64 * 14 * 14])
    dense = tf.layers.dense(max_pool_flat, units=128, activation=tf.nn.relu, name="dense")
    logits = tf.layers.dense(dense, units=n_outputs, name="dense_layer")
    logits_bn = tf.layers.batch_normalization(logits, training=training, momentum=0.9)
    
with tf.name_scope("train"):
    loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits_bn, labels=y))
    optimizer = tf.train.AdamOptimizer(learning_rate)
    extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(extra_update_ops):
        training_op = optimizer.minimize(loss)
        
with tf.name_scope("accuracy"):
    correct = tf.equal(tf.argmax(logits, 1), tf.cast(y, tf.int64))
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    init = tf.global_variables_initializer()

In [44]:
class mnist:
    def __init__(self, X, y):
        indices = np.random.permutation(len(X))
        self.X = X[indices]
        self.y = y[indices]
        self.i = 0
        
    def next_batch(self, batch_size):
        X_batch = self.X[self.i * batch_size : (self.i + 1) * batch_size]
        y_batch = self.y[self.i * batch_size : (self.i + 1) * batch_size]
        self.i += 1
        return X_batch, y_batch

In [45]:
def get_model_params():
    gvars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
    return {gvar.op.name: value for gvar, value in zip(gvars, tf.get_default_session().run(gvars))}

def restore_model_params(model_params):
    gvar_names = list(model_params.keys())
    assign_ops = {gvar_name: tf.get_default_graph().get_operation_by_name(gvar_name + "/Assign")
                  for gvar_name in gvar_names}
    init_values = {gvar_name: assign_op.inputs[1] for gvar_name, assign_op in assign_ops.items()}
    feed_dict = {init_values[gvar_name]: model_params[gvar_name] for gvar_name in gvar_names}
    tf.get_default_session().run(assign_ops, feed_dict=feed_dict)

In [41]:
n_epochs = 10000
batch_size = 128
iteration = 0
best_loss = float("inf")
check_interval = 500
check_since_last_progress = 0
max_checks = 20
best_model_params = None

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        start_time = time.time()
        data = mnist(X_train, y_train)
        n_batches = math.ceil(len(X_train) / batch_size)
        for i in range(n_batches):
            iteration += 1
            X_batch, y_batch = data.next_batch(batch_size)
            print("\r{}%".format(100 * i // n_batches), end="")
            sys.stdout.flush()
            sess.run([training_op, extra_update_ops], feed_dict={X: X_batch, y: y_batch, training:True})
            if iteration % check_interval == 0:
                loss_val = loss.eval({X: X_valid, y: y_valid})
                if loss_val < best_loss:
                    best_loss = loss_val
                    check_since_last_progress = 0
                    best_model_params = get_model_params()
                else:
                    check_since_last_progress += 1
        
        acc_batch = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_val = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
        acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
        print("\rEpoch: %s" % (epoch + 1),
                "\ttraining accuracy: %.4f" % acc_batch, 
                "\tvalid accuracy: %.4f" % acc_val, 
                "\ttest accuracy: %.4f" % acc_test, 
                "\ttime: %.2fs" % (time.time() - start_time))
        
        if check_since_last_progress > max_checks:
            print("Early Stopping !!!")
            break
            
    if best_model_params:
        restore_model_params(best_model_params)
    acc_test2 = accuracy.eval(feed_dict={X: X_test, y: y_test})
    print("Final Test Accuracy: ", acc_test2)

Epoch: 1 	training accuracy: 0.9205 	valid accuracy: 0.9466 	test accuracy: 0.9471 	time: 9.12s
Epoch: 2 	training accuracy: 0.9773 	valid accuracy: 0.9774 	test accuracy: 0.9776 	time: 9.05s
Epoch: 3 	training accuracy: 0.9886 	valid accuracy: 0.9748 	test accuracy: 0.9760 	time: 8.98s
Epoch: 4 	training accuracy: 1.0000 	valid accuracy: 0.9790 	test accuracy: 0.9814 	time: 9.02s
Epoch: 5 	training accuracy: 1.0000 	valid accuracy: 0.9880 	test accuracy: 0.9879 	time: 9.01s
Epoch: 6 	training accuracy: 1.0000 	valid accuracy: 0.9868 	test accuracy: 0.9879 	time: 9.02s
Epoch: 7 	training accuracy: 1.0000 	valid accuracy: 0.9884 	test accuracy: 0.9870 	time: 9.03s
Epoch: 8 	training accuracy: 1.0000 	valid accuracy: 0.9898 	test accuracy: 0.9885 	time: 8.90s
Epoch: 9 	training accuracy: 1.0000 	valid accuracy: 0.9866 	test accuracy: 0.9862 	time: 9.06s
Epoch: 10 	training accuracy: 1.0000 	valid accuracy: 0.9888 	test accuracy: 0.9903 	time: 9.04s
Epoch: 11 	training accuracy: 1.0000 	v

In [46]:
n_epochs = 10000
batch_size = 128
iteration = 0
best_loss = float("inf")
check_interval = 500
check_since_last_progress = 0
max_checks = 5
best_model_params = None

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        start_time = time.time()
        data = mnist(X_train, y_train)
        n_batches = math.ceil(len(X_train) / batch_size)
        for i in range(n_batches):
            iteration += 1
            X_batch, y_batch = data.next_batch(batch_size)
            print("\r{}%".format(100 * i // n_batches), end="")
            sys.stdout.flush()
            sess.run([training_op, extra_update_ops], feed_dict={X: X_batch, y: y_batch, training:True})
            if iteration % check_interval == 0:
                loss_val = loss.eval({X: X_valid, y: y_valid})
                if loss_val < best_loss:
                    best_loss = loss_val
                    check_since_last_progress = 0
                    best_model_params = get_model_params()
                else:
                    check_since_last_progress += 1
        
        acc_batch = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_val = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
        acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
        print("\rEpoch: %s" % (epoch + 1),
                "\ttraining accuracy: %.4f" % acc_batch, 
                "\tvalid accuracy: %.4f" % acc_val, 
                "\ttest accuracy: %.4f" % acc_test, 
                "\ttime: %.2fs" % (time.time() - start_time))
        
        if check_since_last_progress > max_checks:
            print("Early Stopping !!!")
            break
            
    if best_model_params:
        restore_model_params(best_model_params)
    acc_test2 = accuracy.eval(feed_dict={X: X_test, y: y_test})
    print("Final Test Accuracy: ", acc_test2)

Epoch: 1 	training accuracy: 0.9432 	valid accuracy: 0.9538 	test accuracy: 0.9549 	time: 9.14s
Epoch: 2 	training accuracy: 0.9886 	valid accuracy: 0.9796 	test accuracy: 0.9794 	time: 9.03s
Epoch: 3 	training accuracy: 0.9773 	valid accuracy: 0.9738 	test accuracy: 0.9748 	time: 8.99s
Epoch: 4 	training accuracy: 0.9886 	valid accuracy: 0.9796 	test accuracy: 0.9805 	time: 8.99s
Epoch: 5 	training accuracy: 0.9886 	valid accuracy: 0.9870 	test accuracy: 0.9866 	time: 8.99s
Epoch: 6 	training accuracy: 1.0000 	valid accuracy: 0.9882 	test accuracy: 0.9883 	time: 9.02s
Epoch: 7 	training accuracy: 1.0000 	valid accuracy: 0.9882 	test accuracy: 0.9882 	time: 9.04s
Epoch: 8 	training accuracy: 1.0000 	valid accuracy: 0.9896 	test accuracy: 0.9899 	time: 8.86s
Epoch: 9 	training accuracy: 0.9886 	valid accuracy: 0.9864 	test accuracy: 0.9870 	time: 9.03s
Epoch: 10 	training accuracy: 1.0000 	valid accuracy: 0.9884 	test accuracy: 0.9901 	time: 9.04s
Epoch: 11 	training accuracy: 1.0000 	v