In [1]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

import numpy as np
import pylab
import math

%matplotlib inline
pylab.rcParams['figure.figsize'] = (10, 10)
import matplotlib.pyplot as plt

In [2]:
sess = None
def ResetSession():
    tf.reset_default_graph()
    global sess
    if sess is not None: sess.close()
    sess = tf.InteractiveSession()

In [9]:
ResetSession()

mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)

NUM_CLASSES = 10
NUM_PIXELS = 28*28
TRAIN_STEPS = 5000
BATCH_SIZE = 100

HIDDEN1_UNITS = 512
LEARNING_RATE = 0.1

x = tf.placeholder(dtype = tf.float32, shape = [None, NUM_PIXELS], name = "pixels")
y_ = tf.placeholder(dtype = tf.float32, shape = [None, NUM_CLASSES], name = "labels")

def weight_variable(num_inputs, num_outputs, name):
    initial = tf.truncated_normal(shape = [num_inputs, num_outputs], stddev=1.0/ math.sqrt(float(num_inputs)))
    return tf.Variable(initial, name=name)

def bias_variable(shape, name):
    initial = tf.constant(0.0, shape = [shape])
    return tf.Variable(initial, name=name)

#define weights and bias terms for initial layer
weights1 = weight_variable(NUM_PIXELS, HIDDEN1_UNITS, "weights1")
biases1 = bias_variable(HIDDEN1_UNITS, "biases1")

#define predictions into first layer using rectifier linear "link" functions
hidden1 = tf.nn.relu(tf.matmul(x, weights1) + biases1, name="hidden1")

#define weights and bias terms for hidden layer into output layer
weights2 = weight_variable(HIDDEN1_UNITS, NUM_CLASSES, "weights2")
biases2 = bias_variable(NUM_CLASSES, "biases2")

#define predictions into output layer
y = tf.add(tf.matmul(hidden1, weights2), biases2)

summary_writer = tf.train.SummaryWriter("summaries/single_hidden_layer", graph=tf.get_default_graph())
summary_writer.close()

#define cost function
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y, y_)) #y depends on x, and y_ is a placeholder for true labels
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)

#initialize graph
sess.run(tf.initialize_all_variables())

#training routine
for i in range(TRAIN_STEPS):
    batch_xs, batch_ys = mnist.train.next_batch(BATCH_SIZE)
    _, loss = sess.run([train_step, cross_entropy], feed_dict={x: batch_xs, y_: batch_ys})
    # train_step updates model parameters
    if i%200 == 0:
        print("loss is %f" % loss)

#define a prediction to be correct if it agrees with the label:
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))

#define accuracy by average number of correct predictions over test sample
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

#evaluate correctness of predictions averaged across all test images
print("\nAccuracy of trained model on test images is %f" % sess.run(accuracy, feed_dict = {x:mnist.test.images, y_: mnist.test.labels}))

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please switch to tf.summary.FileWriter. The interface and behavior is the same; this is just a rename.
Instructions for updating:
Use `tf.global_variables_initializer` instead.
loss is 2.302305
loss is 0.161721
loss is 0.201341
loss is 0.185893
loss is 0.064358
loss is 0.104824
loss is 0.056118
loss is 0.059488
loss is 0.079860
loss is 0.037278
loss is 0.046379
loss is 0.022636
loss is 0.022660
loss is 0.024213
loss is 0.016777
loss is 0.046433
loss is 0.037938
loss is 0.008680
loss is 0.003901
loss is 0.025421
loss is 0.017159
loss is 0.018320
loss is 0.013975
loss is 0.026688
loss is 0.007928

Accuracy of trained model is 0.982300


### Exercise

Add a second hidden layer to the above code, with 64 units. Experiment with the parameters (batch size, steps, learning rate, units per layer) to see if you can achieve higher accuracy than the single hidden layer model. Keep in mind there's randomness between runs.

In [14]:
def run(train_steps, batch_size, units1, units2, learning_rate):
    """runner function that generalizes above setup to include a second intermediate layer"""
    ResetSession()

    mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)

    NUM_CLASSES = 10
    NUM_PIXELS = 28*28
    TRAIN_STEPS = train_steps
    BATCH_SIZE = batch_size

    HIDDEN1_UNITS = units1
    HIDDEN2_UNITS = units2
    LEARNING_RATE = learning_rate

    x = tf.placeholder(dtype = tf.float32, shape = [None, NUM_PIXELS], name = "pixels")
    y_ = tf.placeholder(dtype = tf.float32, shape = [None, NUM_CLASSES], name = "labels")

    def weight_variable(inputs, outputs, name):
        initial = tf.truncated_normal(shape = [inputs, outputs], stddev=1.0/ math.sqrt(float(inputs)))
        return tf.Variable(initial, name=name)

    def bias_variable(shape, name):
        initial = tf.constant(0.0, shape = [shape])
        return tf.Variable(initial, name=name)

    #parameters and predictions from input layer to first hidden layer
    weights1 = weight_variable(NUM_PIXELS, HIDDEN1_UNITS, "weights1")
    biases1 = bias_variable(HIDDEN1_UNITS, "biases1")
    hidden1 = tf.nn.relu(tf.matmul(x, weights1) + biases1, name="hidden1")

    #parameters and predictions from first hidden layer to second hidden layer
    weights2 = weight_variable(HIDDEN1_UNITS, HIDDEN2_UNITS, "weights2")
    biases2 = bias_variable(HIDDEN2_UNITS, "biases2")
    hidden2 = tf.nn.relu(tf.matmul(hidden1, weights2) + biases2, name = "hidden2")

    #parameters and predictions from second hidden layer to output layer
    weights3 = weight_variable(HIDDEN2_UNITS, NUM_CLASSES, "weights3")
    biases3 = bias_variable(NUM_CLASSES, "biases3")
    y = tf.add(tf.matmul(hidden2, weights3), biases3)

    summary_writer = tf.train.SummaryWriter("summaries/single_hidden_layer", graph=tf.get_default_graph())
    summary_writer.close()

    #define loss function
    cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y, y_))
    
    #define train op
    train_step = tf.train.GradientDescentOptimizer(learning_rate=LEARNING_RATE).minimize(cross_entropy)

    #init op
    sess.run(tf.initialize_all_variables())

    #training routine
    for i in range(TRAIN_STEPS):
        batch_xs, batch_ys = mnist.train.next_batch(BATCH_SIZE)
        _, loss = sess.run([train_step, cross_entropy], feed_dict={x: batch_xs, y_: batch_ys})
        #parameters updated in train step
        if i%200 == 0:
            print("At step %d, loss is %f" % (i, loss))

    #define a prediction to be correct if it agrees with the label:
    correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
    
    #define accuracy by average number of correct predictions over test sample
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    print("Accuracy is %f" % sess.run(accuracy, feed_dict = {x:mnist.test.images, y_: mnist.test.labels}))
    
    #return value for runner function
    return sess.run(accuracy, feed_dict = {x:mnist.test.images, y_: mnist.test.labels})

In [15]:
class GsRunner():
    """run grid search on hyperparameters for runner function"""
    
    def __init__(
        self,
        train_steps = [100,500,1000,],
        batch_size = [100],
        units1 = [256, 512,],
        units2 = [256, 512,],
        learning_rate = [.1, .3, .5]
                ):
        self.best = {
            "val": 0, 
            "train_steps": None, 
            "batch_size": None, 
            "units1": None, 
            "units2": None, 
            "learning_rate": None
        }
        self.HP = {
            "train_steps": train_steps,
            "batch_size": batch_size,
            "units1": units1,
            "units2": units2,
            "learning_rate": learning_rate,
            
        }

    def update(self, val, **kwargs):
        if val>self.best["val"]:
            self.best["val"] = val
            for k in kwargs.keys():
                self.best[k] = kwargs[k]
            return

    def runner(self):
        for train_steps in self.HP["train_steps"]:
            for batch_size in self.HP["batch_size"]:
                for units1 in self.HP["units1"]:
                    for units2 in self.HP["units2"]:
                        for learning_rate in self.HP["learning_rate"]:
                            kwargs={
                                    "train_steps": train_steps,
                                    "batch_size": batch_size,
                                    "units1": units1,
                                    "units2": units2,
                                    "learning_rate": learning_rate,
                                    
                                }
                            self.update(run(train_steps, batch_size, units1, units2, learning_rate),
                                       **kwargs)
        print("\n\nThe best model trained was :", self.best)
        return self.best

In [18]:
x = GsRunner(
    train_steps=[1000, 2000, 5000], 
    batch_size=[100], 
    units1=[256,512,1024,], 
    units2=[256,512,1024,], 
    learning_rate=[.1, .2, .5]
            )
x.runner()

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please switch to tf.summary.FileWriter. The interface and behavior is the same; this is just a rename.
Instructions for updating:
Use `tf.global_variables_initializer` instead.
At step 0, loss is 2.301068
At step 200, loss is 0.295217
At step 400, loss is 0.436825
At step 600, loss is 0.248036
At step 800, loss is 0.146709
Accuracy is 0.947500
Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please switch to tf.summary.FileWriter. The interface and behavior is the same; this is just a rename.
Instructions for updating:
Use `tf.global_variables_initializer` instead.
At step 0, loss is 2.305903
At step 200, lo

{'batch_size': 100,
 'learning_rate': 0.5,
 'train_steps': 5000,
 'units1': 512,
 'units2': 512,
 'val': 0.98430002}