In [20]:
import tensorflow as tf
from tensorflow.contrib.layers import batch_norm
from functools import partial
import numpy as np
import os

def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)
    
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

In [12]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0
X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)
X_valid, X_train = X_train[:5000], X_train[5000:]
y_valid, y_train = y_train[:5000], y_train[5000:]

Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz


In [14]:
def selu(z,
         scale=1.0507009873554804934193349852946,
         alpha=1.6732632423543772848170429916717):
    return scale * tf.where(z >= 0.0, z, alpha * tf.nn.elu(z))

def shuffle_batch(X, y, batch_size):
    rnd_idx = np.random.permutation(len(X))
    n_batches = len(X) // batch_size
    for batch_idx in np.array_split(rnd_idx, n_batches):
        X_batch, y_batch = X[batch_idx], y[batch_idx]
        yield X_batch, y_batch

In [19]:
reset_graph()
n_inputs = 28*28
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name='X')
y = tf.placeholder(tf.int32, shape=(None), name='y')

with tf.name_scope('dnn'):
    hidden1 = tf.layers.dense(X, n_hidden1, activation=selu, name='hidden1')
    hidden2 = tf.layers.dense(hidden1, n_hidden2, activation=selu, name='hidden2')
    logits = tf.layers.dense(hidden2, n_outputs, name='outputs')
    
with tf.name_scope('loss'):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name='loss')
    
learning_rate = 0.01

with tf.name_scope('train'):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)
    
with tf.name_scope('eval'):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    
init = tf.global_variables_initializer()
saver = tf.train.Saver()
n_epochs = 40
batch_size = 50
means = X_train.mean(axis=0, keepdims=True)
stds = X_train.std(axis=0, keepdims=True) + 1e-10
X_val_scaled = (X_valid - means) / stds

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            X_batch_scaled = (X_batch - means) / stds
            sess.run(training_op, feed_dict={X:X_batch_scaled, y:y_batch})
        if epoch % 5 == 0:
            acc_batch = accuracy.eval(feed_dict={X:X_batch_scaled, y:y_batch})
            acc_valid = accuracy.eval(feed_dict={X:X_val_scaled, y:y_valid})
            print(epoch, 'Batch accuracy:', acc_batch, 'validation accuracy:', acc_valid)
    save_path = saver.save(sess, './my_model_final_selu.ckpt')

0 Batch accuracy: 0.88 validation accuracy: 0.9232
5 Batch accuracy: 0.98 validation accuracy: 0.9574
10 Batch accuracy: 1.0 validation accuracy: 0.9664
15 Batch accuracy: 0.96 validation accuracy: 0.9682
20 Batch accuracy: 1.0 validation accuracy: 0.9692
25 Batch accuracy: 1.0 validation accuracy: 0.969
30 Batch accuracy: 1.0 validation accuracy: 0.9694
35 Batch accuracy: 1.0 validation accuracy: 0.9698


In [22]:
# batch normalization
reset_graph()
n_inputs = 28*28
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10

batch_norm_momentum = 0.9

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name='X')
y = tf.placeholder(tf.int32, shape=(None), name='y')
training = tf.placeholder_with_default(False, shape=(), name='training')

with tf.name_scope('dnn'):
    he_init = tf.variance_scaling_initializer()
    
    my_batch_norm_layer = partial(
        tf.layers.batch_normalization,
        training=training,
        momentum=batch_norm_momentum)
    my_dense_layer = partial(
        tf.layers.dense, 
        kernel_initializer=he_init)
    
    hidden1 = my_dense_layer(X, n_hidden1, name='hidden1')
    bn1 = tf.nn.elu(my_batch_norm_layer(hidden1))
    hidden2 = my_dense_layer(bn1, n_hidden2, name='hidden2')
    bn2 = tf.nn.elu(my_batch_norm_layer(hidden2))
    logits_before_bn = my_dense_layer(bn2, n_outputs, name='outputs')
    logits = my_batch_norm_layer(logits_before_bn)
    
with tf.name_scope('loss'):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name='loss')
    
learning_rate = 0.01

with tf.name_scope('train'):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)
    
with tf.name_scope('eval'):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    
init = tf.global_variables_initializer()
saver = tf.train.Saver()
n_epochs = 40
batch_size = 200
extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            X_batch_scaled = (X_batch - means) / stds
            sess.run([training_op, extra_update_ops],
                     feed_dict={training:True,X:X_batch_scaled, y:y_batch})
        if epoch % 5 == 0:
            acc_batch = accuracy.eval(feed_dict={X:X_batch_scaled, y:y_batch})
            acc_valid = accuracy.eval(feed_dict={X:X_val_scaled, y:y_valid})
            print(epoch, 'Batch accuracy:', acc_batch, 'validation accuracy:', acc_valid)
    save_path = saver.save(sess, './my_model_final_selu.ckpt')

0 Batch accuracy: 0.915 validation accuracy: 0.8864
5 Batch accuracy: 0.935 validation accuracy: 0.937
10 Batch accuracy: 0.98 validation accuracy: 0.9534
15 Batch accuracy: 0.975 validation accuracy: 0.9598
20 Batch accuracy: 0.98 validation accuracy: 0.9644
25 Batch accuracy: 0.98 validation accuracy: 0.9664
30 Batch accuracy: 1.0 validation accuracy: 0.9684
35 Batch accuracy: 0.975 validation accuracy: 0.9692
