### MNIST DNN with Estimators & Layers

In this example, we will use a deep neural network to classify the MNIST dataset. This code is very similar to the 5th example (deep_neural_network_mnist_layers), however instead of manually managing the session and TensorBoard, we let Estimators handle this for us.

In [None]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import math
import os

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

When using Estimators, we do not manage the TensorFlow session directly. Instead, we skip straight to defining our hyperparameters.

In [None]:
# number of neurons in each hidden layer
HIDDEN1_SIZE = 500
HIDDEN2_SIZE = 250

NUM_CLASSES = 10  # 10 digits 0-9
NUM_PIXELS = 28 * 28  # dataset size.

# experiment with the nubmer of training steps to 
# see the effect
TRAIN_STEPS = 2000
BATCH_SIZE = 100

# we're using a different learning rate than the previous
# notebook, and a new optimizer
LEARNING_RATE = 0.001

In [None]:
# Define our model
def model_fn(features, labels, mode):
    
    # First we'll create 2 fully-connected layers, with ReLU activations.
    fc1 = tf.layers.dense(features, HIDDEN1_SIZE, activation=tf.nn.relu, name="fc1")
    fc2 = tf.layers.dense(fc1, HIDDEN2_SIZE, activation=tf.nn.relu, name="fc2")

    # Next, we'll apply Dropout to the second layer
    # This can help prevent overfitting, and I've added it here
    # for illustration. You can comment this out, if you like.
    dropped = tf.nn.dropout(fc2, keep_prob=0.9, name="dropout1")

    # Finally, we'll calculate logists. This will be
    # the input to our Softmax function. Notice we 
    # don't apply an activation at this layer.
    # If you've commented out the dropout layer,
    # switch the input here to 'fc2'.
    y = tf.layers.dense(dropped, NUM_CLASSES, name="output")
    
    # Compute the loss
    loss = tf.losses.softmax_cross_entropy(
        onehot_labels=labels, logits=y)
    
    tf.summary.scalar('loss', loss)
    
    learning_rate = LEARNING_RATE
    # Alternate learning rate calculation
    #learning_rate = tf.train.exponential_decay(
    #  LEARNING_RATE, tf.train.get_global_step(), 100000, 0.96)
    
    # Define the optimizer
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
    train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
    
    # Return an estimator spec that encapsulates your model.
    return tf.estimator.EstimatorSpec(
        mode=mode, loss=loss, train_op=train_op)


In [None]:
# Define our data input function
def input_fn():
    # Use the new datasets API!
    mnist = input_data.read_data_sets('/tmp/data', one_hot=True)
    images = mnist.train.images
    labels = mnist.train.labels
    assert(images.shape[0] == labels.shape[0])  # Same number of examples
    
    image_dataset = tf.contrib.data.Dataset.from_tensor_slices(images)
    label_dataset = tf.contrib.data.Dataset.from_tensor_slices(labels)
    dataset = tf.contrib.data.Dataset.zip((image_dataset, label_dataset))
    dataset = dataset.repeat()  # repeat indefinitely
    dataset = dataset.shuffle(buffer_size=labels.shape[0])
    dataset = dataset.batch(BATCH_SIZE)
    
    return dataset.make_one_shot_iterator().get_next()

In [None]:
# Define the run configuration.
estimator = tf.estimator.Estimator(model_fn=model_fn)

In [None]:
# Train the estimator using our input function
estimator.train(input_fn=input_fn, steps=TRAIN_STEPS)

Exercise for the reader: support the other modes for estimators. (i.e. test and predict.)