In [2]:
import warnings
warnings.filterwarnings("ignore")
import math
import sys
import time
import numpy as np
import os
import tensorflow as tf

def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

In [17]:
height = 28
width = 28
channels = 1
n_inputs = height * width

conv1_fmaps = 32
conv1_ksize = 3
conv1_stride = 1
conv1_pad = "SAME"

conv2_fmaps = 64
conv2_ksize = 3
conv2_stride = 2
conv2_pad = "SAME"

pool3_fmaps = conv2_fmaps

n_fc1 = 64
n_outputs = 10

reset_graph()

with tf.name_scope("inputs"):
    X = tf.placeholder(tf.float32, shape=[None, n_inputs], name="X")
    X_reshaped = tf.reshape(X, shape=[-1, height, width, channels])
    y = tf.placeholder(tf.int32, shape=[None], name="y")

conv1 = tf.layers.conv2d(X_reshaped, filters=conv1_fmaps, kernel_size=conv1_ksize,
                         strides=conv1_stride, padding=conv1_pad,
                         activation=tf.nn.relu, name="conv1")
conv2 = tf.layers.conv2d(conv1, filters=conv2_fmaps, kernel_size=conv2_ksize,
                         strides=conv2_stride, padding=conv2_pad,
                         activation=tf.nn.relu, name="conv2")

with tf.name_scope("pool3"):
    pool3 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="VALID")
    pool3_flat = tf.reshape(pool3, shape=[-1, pool3_fmaps * 7 * 7])

with tf.name_scope("fc1"):
    fc1 = tf.layers.dense(pool3_flat, n_fc1, activation=tf.nn.relu, name="fc1")

with tf.name_scope("output"):
    logits = tf.layers.dense(fc1, n_outputs, name="output")
    Y_proba = tf.nn.softmax(logits, name="Y_proba")

with tf.name_scope("train"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y)
    loss = tf.reduce_mean(xentropy)
    optimizer = tf.train.AdamOptimizer()
    training_op = optimizer.minimize(loss)

with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

with tf.name_scope("init_and_save"):
    init = tf.global_variables_initializer()
    saver = tf.train.Saver()

In [18]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0
X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)
X_valid, X_train = X_train[:5000], X_train[5000:]
y_valid, y_train = y_train[:5000], y_train[5000:]

In [19]:
def shuffle_batch(X, y, batch_size):
    rnd_idx = np.random.permutation(len(X))
    n_batches = len(X) // batch_size
    for batch_idx in np.array_split(rnd_idx, n_batches):
        X_batch, y_batch = X[batch_idx], y[batch_idx]
        yield X_batch, y_batch

In [20]:
from datetime import datetime
now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tf_logs"
logdir = "{}/run-{}".format(root_logdir, now)

loss_summary = tf.summary.scalar("Loss", loss)
accuracy_summary = tf.summary.scalar("Accuracy", accuracy)
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

In [21]:
n_epochs = 5
batch_size = 100
n_batch = np.ceil(len(X_train) / batch_size)

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        summary_loss, summary_acc = sess.run([loss_summary, accuracy_summary], feed_dict={X: X_batch, y: y_batch})
        file_writer.add_summary(summary_loss)
        file_writer.add_summary(summary_acc)
        acc_batch = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
        print(epoch, "Last batch accuracy:", acc_batch, "Test accuracy:", acc_test)

        save_path = saver.save(sess, "./checkpoints/my_mnist_model")

0 Last batch accuracy: 0.99 Test accuracy: 0.9781
1 Last batch accuracy: 0.97 Test accuracy: 0.9831
2 Last batch accuracy: 0.98 Test accuracy: 0.9799
3 Last batch accuracy: 0.98 Test accuracy: 0.9881
4 Last batch accuracy: 0.99 Test accuracy: 0.9876


In [22]:
file_writer.close()

------

In [24]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train.astype(np.float32) / 255.0
X_test = X_test.astype(np.float32) / 255.0
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)
X_valid, X_train = X_train[:5000], X_train[5000:]
y_valid, y_train = y_train[:5000], y_train[5000:]

In [25]:
class mnist:
    def __init__(self, X, y):
        indices = np.random.permutation(len(X))
        self.X = X[indices]
        self.y = y[indices]
        self.i = 0

    def next_batch(self, batch_size):
        X_batch = self.X[self.i * batch_size : (self.i + 1) * batch_size]
        y_batch = self.y[self.i * batch_size : (self.i + 1) * batch_size]
        self.i += 1
        return X_batch, y_batch

In [65]:
reset_graph()

In [69]:
height = 28
width = 28
inputs = 28 * 28
n_outputs = 10
learning_rate = 0.001


with tf.name_scope("inputs"):
    X = tf.placeholder(tf.float32, shape=[None, height, width], name="X")
    X_reshaped = tf.reshape(X, shape=[-1, height, width, 1])
    y = tf.placeholder(tf.int32, shape=[None], name="y")

with tf.name_scope("model"):
    conv1 = tf.layers.conv2d(X_reshaped, filters=32, kernel_size=3, strides=[1, 1], 
                        padding="SAME", activation=tf.nn.relu, name="conv1")
    conv2 = tf.layers.conv2d(conv1, filters=64, kernel_size=3, strides=[1, 1],
                        padding="same", activation=tf.nn.relu, name="conv2")
    max_pool = tf.nn.max_pool(conv2, ksize=[1,2,2,1], strides=[1,2,2,1], 
                        padding="VALID", name="max_pooling")
    max_pool_flat = tf.reshape(max_pool, shape=[-1, 64 * 14 * 14])
    dense = tf.layers.dense(max_pool_flat, units=128, activation=tf.nn.relu, name="dense")
    logits = tf.layers.dense(dense, units=n_outputs, name="dense_layer")

with tf.name_scope("train"):
    loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y))
    optimizer = tf.train.AdamOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)
    
with tf.name_scope("train"):
    correct = tf.equal(tf.argmax(logits, 1), tf.cast(y, tf.int64))
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    init = tf.global_variables_initializer()

In [67]:
n_epochs = 20
batch_size = 128

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        start = time.time()
        data = mnist(X_train, y_train)
        n_batches = math.ceil(len(X_train) / batch_size)
        for i in range(n_batches):
            X_batch, y_batch = data.next_batch(batch_size)
            print("\r{}%".format(100 * i // n_batches), end="")
            sys.stdout.flush()
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        print("\rtraining time: %.2fs" % (time.time() - start))
            
        loss_train, acc = sess.run([loss, accuracy], feed_dict={ 
                                X: X_batch, y: y_batch})
        acc_test = accuracy.eval({X: X_test, y: y_test})
        print("\rEpoch: %s" % (epoch + 1),
                "\ttraining loss: %.4f" % loss_train, 
                "\ttraining accuracy: %.4f" % acc, 
                "\ttest accuracy: %.4f" % acc_test, 
                "\ttime: %.2fs" % (time.time() - start))

     #   batch *= 2  # conv linear + tf.nn.relu  # batchnorm

training time: 57.88s
Epoch: 1 	training loss: 0.0156 	training accuracy: 1.0000 	test accuracy: 0.9854 	time: 60.98s
training time: 57.91s
Epoch: 2 	training loss: 0.0351 	training accuracy: 0.9886 	test accuracy: 0.9881 	time: 61.00s
50%

KeyboardInterrupt: 

In [None]:
class mnist:
    def __init__(self, X, y):
        indices = np.random.permutation(len(X))
        self.X = X[indices]
        self.y = y[indices]
        self.i = 0

    def next_batch(self, batch_size):
        X_batch = self.X[self.i * batch_size : (self.i + 1) * batch_size]
        y_batch = self.y[self.i * batch_size : (self.i + 1) * batch_size]
        self.i += 1
        return X_batch, y_batch

In [68]:
reset_graph()

In [None]:
n_epochs = 20
batch_size = 128

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        start = time.time()
        data = mnist(X_train, y_train)
        n_batches = math.ceil(len(X_train) / batch_size)
        print("batch size: ", batch_size, "n_batches: ", n_batches)
        for i in range(n_batches):
            X_batch, y_batch = data.next_batch(batch_size)
            print("\r{}%".format(100 * i // n_batches), end="")
            sys.stdout.flush()
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        print("\rtraining time: %.2fs" % (time.time() - start))    
        
        loss_train, acc = sess.run([loss, accuracy], feed_dict={ 
                                X: X_batch, y: y_batch})
        acc_test = accuracy.eval({X: X_test, y: y_test})
        batch_size *= 2                                ################ batch * 2,  check test ACCURACY & PAPER
        print("\rEpoch: %s" % (epoch + 1),
                "\ttraining loss: %.4f" % loss_train, 
                "\ttraining accuracy: %.4f" % acc, 
                "\ttest accuracy: %.4f" % acc_test, 
                "\ttime: %.2fs" % (time.time() - start))

batch size:  128 n_batches:  430
training time: 58.05s
Epoch: 1 	training loss: 0.0156 	training accuracy: 1.0000 	test accuracy: 0.9854 	time: 61.19s
batch size:  256 n_batches:  215
training time: 60.10s
Epoch: 2 	training loss: 0.0543 	training accuracy: 0.9769 	test accuracy: 0.9881 	time: 63.25s
batch size:  512 n_batches:  108
training time: 61.80s
Epoch: 3 	training loss: 0.0178 	training accuracy: 0.9954 	test accuracy: 0.9891 	time: 64.98s
batch size:  1024 n_batches:  54
training time: 61.50s
Epoch: 4 	training loss: 0.0088 	training accuracy: 0.9973 	test accuracy: 0.9902 	time: 64.81s
batch size:  2048 n_batches:  27
training time: 61.36s
Epoch: 5 	training loss: 0.0084 	training accuracy: 0.9960 	test accuracy: 0.9898 	time: 65.01s
batch size:  4096 n_batches:  14
training time: 61.10s
Epoch: 6 	training loss: 0.0096 	training accuracy: 0.9977 	test accuracy: 0.9904 	time: 64.65s
batch size:  8192 n_batches:  7
training time: 60.89s
Epoch: 7 	training loss: 0.0071 	trainin

In [59]:
reset_graph()

In [60]:
height = 28   ######### conv linear + relu
width = 28
inputs = 28 * 28
n_outputs = 10
learning_rate = 0.001


with tf.name_scope("inputs"):
    X = tf.placeholder(tf.float32, shape=[None, height, width], name="X")
    X_reshaped = tf.reshape(X, shape=[-1, height, width, 1])
    y = tf.placeholder(tf.int32, shape=[None], name="y")

with tf.name_scope("model"):
    conv1 = tf.layers.conv2d(X_reshaped, filters=32, kernel_size=3, strides=[1, 1], 
                        padding="SAME", activation=None, name="conv1")
    relu1 = tf.nn.relu(conv1)
    conv2 = tf.layers.conv2d(relu1, filters=64, kernel_size=3, strides=[1, 1],
                        padding="same", activation=None, name="conv2")
    relu2 = tf.nn.relu(conv2)
    max_pool = tf.nn.max_pool(relu2, ksize=[1,2,2,1], strides=[1,2,2,1], 
                        padding="VALID", name="max_pooling")
    max_pool_flat = tf.reshape(max_pool, shape=[-1, 64 * 14 * 14])
    dense = tf.layers.dense(max_pool_flat, units=128, activation=tf.nn.relu, name="dense")
    logits = tf.layers.dense(dense, units=n_outputs, name="dense_layer")

with tf.name_scope("train"):
    loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y))
    optimizer = tf.train.AdamOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)
    
with tf.name_scope("train"):
    correct = tf.equal(tf.argmax(logits, 1), tf.cast(y, tf.int64))
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    init = tf.global_variables_initializer()

In [61]:
n_epochs = 20
batch_size = 128

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        start = time.time()
        data = mnist(X_train, y_train)
        n_batches = math.ceil(len(X_train) / batch_size)
        for i in range(n_batches):
            X_batch, y_batch = data.next_batch(batch_size)
            print("\r{}%".format(100 * i // n_batches), end="")
            sys.stdout.flush()
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        loss_train, acc = sess.run([loss, accuracy], feed_dict={ 
                                X: X_batch, y: y_batch})
        acc_test = accuracy.eval({X: X_test, y: y_test})
        print("\rEpoch: %s" % (epoch + 1),
                "\ttraining loss: %.4f" % loss_train, 
                "\ttraining accuracy: %.4f" % acc, 
                "\ttest accuracy: %.4f" % acc_test, 
                "\ttime: %.2fs" % (time.time() - start))

Epoch: 1 	training loss: 0.015613079 	training accuracy: 1.0 	test accuracy: 0.9854 	time: 61.22s
Epoch: 2 	training loss: 0.035062652 	training accuracy: 0.9886364 	test accuracy: 0.9881 	time: 60.80s
24%

KeyboardInterrupt: 

In [55]:
reset_graph()

In [56]:
height = 28
width = 28
inputs = 28 * 28
n_outputs = 10
learning_rate = 0.001
conv2_dropout_rate = 0.25
dense_dropout_rate = 0.5


with tf.name_scope("inputs"):
    X = tf.placeholder(tf.float32, shape=[None, height, width], name="X")
    X_reshaped = tf.reshape(X, shape=[-1, height, width, 1])
    y = tf.placeholder(tf.int32, shape=[None], name="y")
    training = tf.placeholder_with_default(False, shape=[], name="training")

with tf.name_scope("model"):
    conv1 = tf.layers.conv2d(X_reshaped, filters=32, kernel_size=3, strides=[1, 1], 
                        padding="SAME", activation=tf.nn.relu, name="conv1")
    conv2 = tf.layers.conv2d(conv1, filters=64, kernel_size=3, strides=[1, 1],
                        padding="same", activation=tf.nn.relu, name="conv2")
    max_pool = tf.nn.max_pool(conv2, ksize=[1,2,2,1], strides=[1,2,2,1], 
                        padding="VALID", name="max_pooling")
    max_pool_flat = tf.reshape(max_pool, shape=[-1, 64 * 14 * 14])
    max_pool_drop = tf.layers.dropout(max_pool_flat, conv2_dropout_rate, training=training)
    dense = tf.layers.dense(max_pool_flat, units=128, activation=tf.nn.relu, name="dense")
    dense_drop = tf.layers.dropout(dense, dense_dropout_rate, training=training)
    logits = tf.layers.dense(dense, units=n_outputs, name="dense_layer")

with tf.name_scope("train"):
    loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y))
    optimizer = tf.train.AdamOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)
    
with tf.name_scope("train"):
    correct = tf.equal(tf.argmax(logits, 1), tf.cast(y, tf.int64))
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    init = tf.global_variables_initializer()

In [57]:
def get_model_params():
    gvars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
    return {gvar.op.name: value for gvar, value in zip(gvars, tf.get_default_session().run(gvars))}

def restore_model_params(model_params):
    gvar_names = list(model_params.keys())
    assign_ops = {gvar_name: tf.get_default_graph().get_operation_by_name(gvar_name + "/Assign")
                  for gvar_name in gvar_names}
    init_values = {gvar_name: assign_op.inputs[1] for gvar_name, assign_op in assign_ops.items()}
    feed_dict = {init_values[gvar_name]: model_params[gvar_name] for gvar_name in gvar_names}
    tf.get_default_session().run(assign_ops, feed_dict=feed_dict)

In [58]:
n_epochs = 10000
batch_size = 128
iteration = 0
best_loss = np.infty
check_interval = 500
check_since_last_progress = 0
max_checks = 20
best_model_params = None

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        start = time.time()
        data = mnist(X_train, y_train)
        n_batches = math.ceil(len(X_train) / batch_size)
        for i in range(n_batches):
            iteration += 1
            X_batch, y_batch = data.next_batch(batch_size)
            print("\r{}%".format(100 * i // n_batches), end="")
            sys.stdout.flush()
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch, training: True})
            if iteration % check_interval == 0:
                loss_val = loss.eval({X: X_valid, y: y_valid})
                if loss_val < best_loss:
                    best_loss = loss_val
                    check_since_last_progress = 0
                    best_model_params = get_model_params()
                else:
                    check_since_last_progress += 1
        
        acc_batch = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_val = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
        acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
        print("\rEpoch: %s" % (epoch + 1),
                "\ttraining accuracy: %.4f" % acc_batch, 
                "\tvalid accuracy: %.4f" % acc_val, 
                "\ttest accuracy: %.4f" % acc_test, 
                "\ttime: %.2fs" % (time.time() - start))
        if check_since_last_progress > max_checks:
            print("Early Stopping")
            break
            
    if best_model_params:
        restore_model_params(best_model_params)
    acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
    print("Final accuracy on test set:", acc_test)

Epoch: 1 	training accuracy: 1.0 	valid accuracy: 0.9842 	test accuracy: 0.9844 	time: 62.87s
Epoch: 2 	training accuracy: 0.9886364 	valid accuracy: 0.9872 	test accuracy: 0.9867 	time: 64.04s
Epoch: 3 	training accuracy: 0.97727275 	valid accuracy: 0.9764 	test accuracy: 0.9766 	time: 64.06s
Epoch: 4 	training accuracy: 1.0 	valid accuracy: 0.988 	test accuracy: 0.9891 	time: 64.08s
Epoch: 5 	training accuracy: 1.0 	valid accuracy: 0.9894 	test accuracy: 0.9886 	time: 64.16s
Epoch: 6 	training accuracy: 1.0 	valid accuracy: 0.9904 	test accuracy: 0.989 	time: 64.08s
Epoch: 7 	training accuracy: 1.0 	valid accuracy: 0.991 	test accuracy: 0.9888 	time: 64.17s
Epoch: 8 	training accuracy: 1.0 	valid accuracy: 0.9906 	test accuracy: 0.9895 	time: 62.59s
Epoch: 9 	training accuracy: 1.0 	valid accuracy: 0.989 	test accuracy: 0.9896 	time: 64.10s
Epoch: 10 	training accuracy: 1.0 	valid accuracy: 0.9878 	test accuracy: 0.9875 	time: 63.69s
Epoch: 11 	training accuracy: 1.0 	valid accuracy:

KeyboardInterrupt: 