In [1]:
import tensorflow as tf
import numpy as np

import warnings
warnings.filterwarnings('ignore')

In [2]:
n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10

In [3]:
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int64, shape=(None), name="y")

#### Build Computation Graph
* remove activation from layer, perform batch norm before activation
* use partial function to stay DRY
* drop out regularization

In [4]:
def leaky_relu(z, name=None):
    return tf.maximum(0.01 * z, z, name=name)

```python
# can also use dense layer
with tf.name_scope("dnn"):
    training = tf.placeholder_with_default(False, shape=(), name="training")
    
    # dense layer uses xavier initialization by default, can change to he initialization
    he_init = tf.contrib.layers.variance_scaling_initializer()
    
    # exponential, do not use activation
    hidden1 = tf.layers.dense(X, n_hidden1, name="hidden1", kernel_initializer=he_init)
    bn1 = tf.layers.batch_normalization(hidden1, training=training, momentum=0.9)
    bn1_act = tf.nn.elu(bn1)
    
    # plug in leaky_relu as activation
    hidden2 = tf.layers.dense(bn1_act, n_hidden2, name="hidden2")
    bn2 = tf.layers.batch_normalization(hidden2, training=training, momentum=0.9)
    bn2_act = leaky_relu(bn2)
    
    logits_before_bn = tf.layers.dense(hidden2, n_outputs, name="outputs")
    logits = tf.layers.batch_normalization(logits_before_bn, training=training, momentum=0.9)
    logits = tf.nn.relu(logits)
```

In [5]:
from functools import partial

# drop some of the input
training = tf.placeholder_with_default(False, shape=(), name="training")
drop_rate = 0.1
X_drop = tf.layers.dropout(X, drop_rate, training=training)

# can also use dense layer
with tf.name_scope("dnn"):
    
    bn_layer = partial(tf.layers.batch_normalization, training=training, momentum=0.9)
    
    # dense layer uses xavier initialization by default, can change to he initialization
    he_init = tf.contrib.layers.variance_scaling_initializer()
    
    # exponential, do not use activation
    hidden1 = tf.layers.dense(X, n_hidden1, name="hidden1", kernel_initializer=he_init)
    hidden1_drop = tf.layers.dropout(hidden1, drop_rate, training=training)
    bn1 = bn_layer(hidden1_drop)
    bn1_act = tf.nn.elu(bn1)
    
    # plug in leaky_relu as activation
    hidden2 = tf.layers.dense(bn1_act, n_hidden2, name="hidden2")
    hidden2_drop = tf.layers.dropout(hidden2, drop_rate, training=training)
    bn2 = bn_layer(hidden2_drop)
    bn2_act = leaky_relu(bn2)
    
    logits_before_bn = tf.layers.dense(hidden2, n_outputs, name="outputs")
    logits = bn_layer(logits_before_bn)
    logits = tf.nn.relu(logits)

Instructions for updating:
Use keras.layers.dropout instead.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Colocations handled automatically by placer.

For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
Use keras.layers.dense instead.
Instructions for updating:
Use keras.layers.batch_normalization instead.


In [6]:
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")

In [7]:
learning_rate = 0.01

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)
    
    # for batch norm to keep running average
    extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

Instructions for updating:
Use tf.cast instead.


In [8]:
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

In [9]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()

#### Execution Phase

In [10]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/")

Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting /tmp/data/train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.


In [11]:
n_epochs = 5
batch_size = 50

In [12]:
with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for interaction in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            feed = {X: X_batch, y: y_batch, training: True}
            sess.run([training_op, extra_update_ops], feed_dict=feed)
        acc_train = accuracy.eval(feed_dict=feed)
        
        acc_val = accuracy.eval(feed_dict={X: mnist.validation.images, y: mnist.validation.labels})
        print("epoch %i, train acc %.5f, val acc %.5f"%(epoch, acc_train, acc_val))
    saver.save(sess, "outputs/final_model.ckpt")

epoch 0, train acc 0.86000, val acc 0.90220
epoch 1, train acc 0.82000, val acc 0.92020
epoch 2, train acc 0.90000, val acc 0.93220
epoch 3, train acc 0.90000, val acc 0.94120
epoch 4, train acc 0.96000, val acc 0.94380


#### Test

In [13]:
with tf.Session() as sess:
    saver.restore(sess, "outputs/final_model.ckpt")
    Z = logits.eval(feed_dict={X: mnist.test.images})
    acc_test = accuracy.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels})
acc_test

Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from outputs/final_model.ckpt


0.945