# Chapter 11 Exercises

In [22]:
# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals

# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
np.random.seed(42)

def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

# To plot pretty figures
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

def save_fig(fig_id, tight_layout=True):
    path = os.path.join("images", fig_id + ".png")
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format='png', dpi=300)

In [2]:
import tensorflow as tf

Build a DNN with five hidden layers of 100 neurons each, He initialization, and the ELU activation function.

Using Adam optimization and early stopping, try training it on MNIST but only on digits 0 to 4, as we will use transfer learning for digits 5 to 9 in the next exercise. You will need a softmax output layer with five neurons, and as always make sure to save checkpoints at regular intervals and save the final model so you can reuse it later.

Tune the hyperparameters using cross-validation and see what precision you can achieve.

Now try adding Batch Normalization and compare the learning curves: is it converging faster than before? Does it produce a better model?

Is the model overfitting the training set? Try adding dropout to every layer and try again. Does it help?

In [3]:
from tensorflow.examples.tutorials.mnist import input_data

mnist = input_data.read_data_sets("/tmp/data/")

Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.
Extracting /tmp/data/train-images-idx3-ubyte.gz
Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [25]:
X_train1 = mnist.train.images[mnist.train.labels < 5]
y_train1 = mnist.train.labels[mnist.train.labels < 5]
X_valid1 = mnist.validation.images[mnist.validation.labels < 5]
y_valid1 = mnist.validation.labels[mnist.validation.labels < 5]
X_test1 = mnist.test.images[mnist.test.labels < 5]
y_test1 = mnist.test.labels[mnist.test.labels < 5]

In [6]:
reset_graph()

In [5]:
n_inputs = 28*28  # MNIST
n_hidden1 = 100
n_hidden2 = 100
n_hidden3 = 100
n_hidden4 = 100
n_hidden5 = 100
n_outputs = 5

In [31]:
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int64, shape=(None), name="y") 

In [32]:
training = tf.placeholder_with_default(False, shape=(), name='training')

dropout_rate = 0.5
X_drop = tf.layers.dropout(X, dropout_rate, training=training)

In [33]:
he_init = tf.contrib.layers.variance_scaling_initializer()

In [34]:
with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X_drop, n_hidden1, name="hidden1", kernel_initializer=he_init,
            activation=tf.nn.elu)
    hidden1_drop = tf.layers.dropout(hidden1, dropout_rate, training=training)
    
    hidden2 = tf.layers.dense(hidden1_drop, n_hidden2, name="hidden2", kernel_initializer=he_init,
            activation=tf.nn.elu)
    hidden2_drop = tf.layers.dropout(hidden2, dropout_rate, training=training)
    
    hidden3 = tf.layers.dense(hidden2_drop, n_hidden3, name="hidden3", kernel_initializer=he_init,
            activation=tf.nn.elu)
    hidden3_drop = tf.layers.dropout(hidden3, dropout_rate, training=training)
    
    hidden4 = tf.layers.dense(hidden3_drop, n_hidden4, name="hidden4", kernel_initializer=he_init,
            activation=tf.nn.elu)
    hidden4_drop = tf.layers.dropout(hidden4, dropout_rate, training=training)
    
    hidden5 = tf.layers.dense(hidden4_drop, n_hidden5, name="hidden5", kernel_initializer=he_init,
            activation=tf.nn.elu)
    hidden5_drop = tf.layers.dropout(hidden5, dropout_rate, training=training)
    
    logits = tf.layers.dense(hidden5_drop, n_outputs, name="outputs", kernel_initializer=he_init)
    
    Y_proba = tf.nn.softmax(logits, name="Y_proba")

In [35]:
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")
    loss_summary = tf.summary.scalar('log_loss', loss)

In [36]:
with tf.name_scope("train"):
    optimizer = tf.train.AdamOptimizer()
    training_op = optimizer.minimize(loss)

In [37]:
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    accuracy_summary = tf.summary.scalar('accuracy', accuracy)

In [38]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [39]:
n_epochs = 1000
batch_size = 20

max_checks_without_progress = 20
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()

    for epoch in range(n_epochs):
        rnd_idx = np.random.permutation(len(X_train1))
        for rnd_indices in np.array_split(rnd_idx, len(X_train1) // batch_size):
            X_batch, y_batch = X_train1[rnd_indices], y_train1[rnd_indices]
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch, training:True})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={X: X_valid1, y: y_valid1, training:False})
        if loss_val < best_loss:
            save_path = saver.save(sess, "./my_mnist_model_0_to_4.ckpt")
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stopping!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))

with tf.Session() as sess:
    saver.restore(sess, "./my_mnist_model_0_to_4.ckpt")
    acc_test = accuracy.eval(feed_dict={X: X_test1, y: y_test1})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

0	Validation loss: 0.158070	Best loss: 0.158070	Accuracy: 95.00%
1	Validation loss: 0.119034	Best loss: 0.119034	Accuracy: 96.13%
2	Validation loss: 0.106546	Best loss: 0.106546	Accuracy: 96.91%
3	Validation loss: 0.112165	Best loss: 0.106546	Accuracy: 96.64%
4	Validation loss: 0.098725	Best loss: 0.098725	Accuracy: 97.26%
5	Validation loss: 0.091709	Best loss: 0.091709	Accuracy: 97.58%
6	Validation loss: 0.083497	Best loss: 0.083497	Accuracy: 97.65%
7	Validation loss: 0.069394	Best loss: 0.069394	Accuracy: 98.05%
8	Validation loss: 0.076006	Best loss: 0.069394	Accuracy: 97.97%
9	Validation loss: 0.067357	Best loss: 0.067357	Accuracy: 98.32%
10	Validation loss: 0.067301	Best loss: 0.067301	Accuracy: 98.08%
11	Validation loss: 0.060715	Best loss: 0.060715	Accuracy: 98.32%
12	Validation loss: 0.060734	Best loss: 0.060715	Accuracy: 98.51%
13	Validation loss: 0.058280	Best loss: 0.058280	Accuracy: 98.36%
14	Validation loss: 0.057121	Best loss: 0.057121	Accuracy: 98.40%
15	Validation loss: 

Transfer learning

Create a new DNN that reuses all the pretrained hidden layers of the previous model, freezes them, and replaces the softmax output layer with a fresh new one.

Train this new DNN on digits 5 to 9, using only 100 images per digit, and time how long it takes. Despite this small number of examples, can you achieve high precision?

Try caching the frozen layers, and train the model again: how much faster is it now?

Try again reusing just four hidden layers instead of five. Can you achieve a higher precision?

Now unfreeze the top two hidden layers and continue training: can you get the model to perform even better?

In [41]:
reset_graph()

restore_saver = tf.train.import_meta_graph("./my_mnist_model_0_to_4.ckpt.meta")

X = tf.get_default_graph().get_tensor_by_name("X:0")
y = tf.get_default_graph().get_tensor_by_name("y:0")
accuracy = tf.get_default_graph().get_tensor_by_name("accuracy:0")
loss = tf.get_default_graph().get_tensor_by_name("loss:0")
Y_proba = tf.get_default_graph().get_tensor_by_name("Y_proba:0")
logits = Y_proba.op.inputs[0]


KeyError: "The name 'accuracy:0' refers to a Tensor which does not exist. The operation, 'accuracy', does not exist in the graph."