## Prepare

In [1]:
# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals

# Common imports
import numpy as np
import os
import tensorflow as tf

# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

reset_graph()

In [2]:
# load data: digits 5 to 9, but still label with 0 to 4, 
# because TensorFlow expects label's integers from 0 to n_classes-1.
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/")

X_train2_full = mnist.train.images[mnist.train.labels >= 5]
y_train2_full = mnist.train.labels[mnist.train.labels >= 5] - 5
X_valid2_full = mnist.validation.images[mnist.validation.labels >= 5]
y_valid2_full = mnist.validation.labels[mnist.validation.labels >= 5] - 5
X_test2 = mnist.test.images[mnist.test.labels >= 5]
y_test2 = mnist.test.labels[mnist.test.labels >= 5] - 5

print(X_train2_full.shape)
print(y_train2_full.shape)
print(X_valid2_full.shape)
print(y_valid2_full.shape)
print(X_test2.shape)
print(y_test2.shape)
train_num = X_train2_full.shape[0]

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz
(26962, 784)
(26962,)
(2442, 784)
(2442,)
(4861, 784)
(4861,)


In [3]:
# we want to keep only 100 instances per class in the training set 
# and let's keep only 30 instances per class in the validation set
# tesing set is already loaded above
def sample_n_instances_per_class(X, y, n=100):
    Xs, ys = [], []
    for label in np.unique(y):
        idx = (y == label)
        Xc = X[idx][:n]
        yc = y[idx][:n]
        Xs.append(Xc)
        ys.append(yc)
    return np.concatenate(Xs), np.concatenate(ys)

X_train2, y_train2 = sample_n_instances_per_class(X_train2_full, y_train2_full, n=100)
X_valid2, y_valid2 = sample_n_instances_per_class(X_valid2_full, y_valid2_full, n=30)

## Part1
### exclude their variables from the optimizer's list of trainable variables, keeping only the output layer's trainable variables

### load the checkpoint weights from HW2,then assign the tensor&operation based on the name

In [4]:
reset_graph()

restore_saver = tf.train.import_meta_graph("./Team06_HW2.meta")

X = tf.get_default_graph().get_tensor_by_name("X:0")
y = tf.get_default_graph().get_tensor_by_name("y:0")
loss = tf.get_default_graph().get_tensor_by_name("loss:0")
Y_proba = tf.get_default_graph().get_tensor_by_name("pred:0")
logits = Y_proba.op.inputs[0]
accuracy = tf.get_default_graph().get_tensor_by_name("accuracy:0")

### only let the 'FC' (softmax layer) trainable,freeze the other 5 layers

In [5]:
learning_rate = 0.01

output_layer_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="FC")
optimizer = tf.train.AdamOptimizer(learning_rate, name="Adam2")
training_op = optimizer.minimize(loss, var_list=output_layer_vars)

In [6]:
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

init = tf.global_variables_initializer()
five_frozen_saver = tf.train.Saver()

### Apply early stop on training then save the model as HW3_1, test accuracy:0.53

In [7]:
import time

n_epochs = 1000
batch_size = 20

max_checks_without_progress = 20
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()
    restore_saver.restore(sess, "./Team06_HW2")
    for var in output_layer_vars:
        var.initializer.run()

    t0 = time.time()
        
    for epoch in range(n_epochs):
        rnd_idx = np.random.permutation(len(X_train2))
        for rnd_indices in np.array_split(rnd_idx, len(X_train2) // batch_size):
            X_batch, y_batch = X_train2[rnd_indices], y_train2[rnd_indices]
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={X: X_valid2, y: y_valid2})
        if loss_val < best_loss:
            save_path = five_frozen_saver.save(sess, "./Team06_HW3_1")
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stopping!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))

    t1 = time.time()
    print("Total training time: {:.1f}s".format(t1 - t0))

with tf.Session() as sess:
    five_frozen_saver.restore(sess, "./Team06_HW3_1")
    acc_test = accuracy.eval(feed_dict={X: X_test2, y: y_test2})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

INFO:tensorflow:Restoring parameters from ./Team06_HW2
0	Validation loss: 2.914337	Best loss: 2.914337	Accuracy: 34.67%
1	Validation loss: 2.186097	Best loss: 2.186097	Accuracy: 34.67%
2	Validation loss: 2.610965	Best loss: 2.186097	Accuracy: 36.00%
3	Validation loss: 2.514665	Best loss: 2.186097	Accuracy: 29.33%
4	Validation loss: 2.171296	Best loss: 2.171296	Accuracy: 36.00%
5	Validation loss: 3.188393	Best loss: 2.171296	Accuracy: 42.00%
6	Validation loss: 2.123855	Best loss: 2.123855	Accuracy: 42.00%
7	Validation loss: 2.242008	Best loss: 2.123855	Accuracy: 40.67%
8	Validation loss: 2.226936	Best loss: 2.123855	Accuracy: 38.00%
9	Validation loss: 1.853105	Best loss: 1.853105	Accuracy: 45.33%
10	Validation loss: 1.978021	Best loss: 1.853105	Accuracy: 38.00%
11	Validation loss: 1.769349	Best loss: 1.769349	Accuracy: 36.00%
12	Validation loss: 2.105936	Best loss: 1.769349	Accuracy: 42.67%
13	Validation loss: 2.169199	Best loss: 1.769349	Accuracy: 46.67%
14	Validation loss: 2.335070	Be

## Part 2
### compute the output of the top frozen layer at the beginning (both for the training set and the validation set), and we cache it. This makes training roughly faster

In [8]:
hidden5_out = tf.get_default_graph().get_tensor_by_name("h5:0")

### test accuracy:0.5 though fast training

In [9]:
import time

n_epochs = 1000
batch_size = 20

max_checks_without_progress = 20
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()
    restore_saver.restore(sess, "./Team06_HW2")
    for var in output_layer_vars:
        var.initializer.run()

    t0 = time.time()
    
    # predict the output for both train and valid set
    hidden5_train = hidden5_out.eval(feed_dict={X: X_train2, y: y_train2})
    hidden5_valid = hidden5_out.eval(feed_dict={X: X_valid2, y: y_valid2})
        
    for epoch in range(n_epochs):
        rnd_idx = np.random.permutation(len(X_train2))
        for rnd_indices in np.array_split(rnd_idx, len(X_train2) // batch_size):
            h5_batch, y_batch = hidden5_train[rnd_indices], y_train2[rnd_indices]
            sess.run(training_op, feed_dict={hidden5_out: h5_batch, y: y_batch})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={hidden5_out: hidden5_valid, y: y_valid2})
        if loss_val < best_loss:
            save_path = five_frozen_saver.save(sess, "./Team06_HW3_2")
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stopping!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))

    t1 = time.time()
    print("Total training time: {:.1f}s".format(t1 - t0))

with tf.Session() as sess:
    five_frozen_saver.restore(sess, "./Team06_HW3_2")
    acc_test = accuracy.eval(feed_dict={X: X_test2, y: y_test2})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

INFO:tensorflow:Restoring parameters from ./Team06_HW2
0	Validation loss: 3.262501	Best loss: 3.262501	Accuracy: 35.33%
1	Validation loss: 2.061656	Best loss: 2.061656	Accuracy: 34.67%
2	Validation loss: 2.407110	Best loss: 2.061656	Accuracy: 35.33%
3	Validation loss: 2.271895	Best loss: 2.061656	Accuracy: 30.67%
4	Validation loss: 2.096857	Best loss: 2.061656	Accuracy: 38.67%
5	Validation loss: 2.535693	Best loss: 2.061656	Accuracy: 35.33%
6	Validation loss: 2.027433	Best loss: 2.027433	Accuracy: 34.67%
7	Validation loss: 2.627248	Best loss: 2.027433	Accuracy: 45.33%
8	Validation loss: 2.194199	Best loss: 2.027433	Accuracy: 36.67%
9	Validation loss: 1.899158	Best loss: 1.899158	Accuracy: 41.33%
10	Validation loss: 1.783677	Best loss: 1.783677	Accuracy: 45.33%
11	Validation loss: 1.738483	Best loss: 1.738483	Accuracy: 44.00%
12	Validation loss: 1.756335	Best loss: 1.738483	Accuracy: 45.33%
13	Validation loss: 1.816615	Best loss: 1.738483	Accuracy: 46.00%
14	Validation loss: 2.183896	Be

## Part 3
### create a new softmax output layer on top of the 4th hidden layer
### freeze all the layers except for the new output layer
### test accuracy:0.58 (improved)

In [11]:
reset_graph()

n_outputs = 5

he_init = tf.contrib.layers.variance_scaling_initializer()

restore_saver = tf.train.import_meta_graph("./Team06_HW2.meta")

X = tf.get_default_graph().get_tensor_by_name("X:0")
y = tf.get_default_graph().get_tensor_by_name("y:0")

hidden4_out = tf.get_default_graph().get_tensor_by_name("h4:0")
logits = tf.layers.dense(hidden4_out, n_outputs, kernel_initializer=he_init, name="new_logits")
Y_proba = tf.nn.softmax(logits)
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
loss = tf.reduce_mean(xentropy)
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

In [12]:
learning_rate = 0.01

output_layer_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="new_logits")
optimizer = tf.train.AdamOptimizer(learning_rate, name="Adam2")
training_op = optimizer.minimize(loss, var_list=output_layer_vars)

init = tf.global_variables_initializer()
four_frozen_saver = tf.train.Saver()

In [13]:
n_epochs = 1000
batch_size = 20

max_checks_without_progress = 20
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()
    restore_saver.restore(sess, "./Team06_HW2")
        
    for epoch in range(n_epochs):
        rnd_idx = np.random.permutation(len(X_train2))
        for rnd_indices in np.array_split(rnd_idx, len(X_train2) // batch_size):
            X_batch, y_batch = X_train2[rnd_indices], y_train2[rnd_indices]
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={X: X_valid2, y: y_valid2})
        if loss_val < best_loss:
            save_path = four_frozen_saver.save(sess, "./Team06_HW3_3")
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stopping!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))

with tf.Session() as sess:
    four_frozen_saver.restore(sess, "./Team06_HW3_3")
    acc_test = accuracy.eval(feed_dict={X: X_test2, y: y_test2})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

INFO:tensorflow:Restoring parameters from ./Team06_HW2
0	Validation loss: 2.064406	Best loss: 2.064406	Accuracy: 41.33%
1	Validation loss: 1.420861	Best loss: 1.420861	Accuracy: 50.00%
2	Validation loss: 1.235759	Best loss: 1.235759	Accuracy: 60.00%
3	Validation loss: 1.188183	Best loss: 1.188183	Accuracy: 57.33%
4	Validation loss: 1.089760	Best loss: 1.089760	Accuracy: 61.33%
5	Validation loss: 1.201103	Best loss: 1.089760	Accuracy: 59.33%
6	Validation loss: 1.104514	Best loss: 1.089760	Accuracy: 63.33%
7	Validation loss: 1.110676	Best loss: 1.089760	Accuracy: 60.67%
8	Validation loss: 1.191903	Best loss: 1.089760	Accuracy: 62.67%
9	Validation loss: 1.202659	Best loss: 1.089760	Accuracy: 62.00%
10	Validation loss: 1.152949	Best loss: 1.089760	Accuracy: 63.33%
11	Validation loss: 1.202276	Best loss: 1.089760	Accuracy: 62.00%
12	Validation loss: 1.121616	Best loss: 1.089760	Accuracy: 66.00%
13	Validation loss: 1.296584	Best loss: 1.089760	Accuracy: 61.33%
14	Validation loss: 1.147583	Be

## Part 4
### unfreeze the top two hidden layers and continue training
### test accuracy:0.59

In [14]:
learning_rate = 0.01

unfrozen_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="h[34]|new_logits")
optimizer = tf.train.AdamOptimizer(learning_rate, name="Adam3")
training_op = optimizer.minimize(loss, var_list=unfrozen_vars)

init = tf.global_variables_initializer()
two_frozen_saver = tf.train.Saver()

In [16]:
n_epochs = 1000
batch_size = 20

max_checks_without_progress = 20
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()
    four_frozen_saver.restore(sess, "./Team06_HW3_3")
        
    for epoch in range(n_epochs):
        rnd_idx = np.random.permutation(len(X_train2))
        for rnd_indices in np.array_split(rnd_idx, len(X_train2) // batch_size):
            X_batch, y_batch = X_train2[rnd_indices], y_train2[rnd_indices]
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={X: X_valid2, y: y_valid2})
        if loss_val < best_loss:
            save_path = two_frozen_saver.save(sess, "./Team06_HW3_4")
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stopping!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))

with tf.Session() as sess:
    two_frozen_saver.restore(sess, "./Team06_HW3_4")
    acc_test = accuracy.eval(feed_dict={X: X_test2, y: y_test2})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

INFO:tensorflow:Restoring parameters from ./Team06_HW3_3
0	Validation loss: 1.241230	Best loss: 1.241230	Accuracy: 59.33%
1	Validation loss: 1.075959	Best loss: 1.075959	Accuracy: 64.00%
2	Validation loss: 1.114527	Best loss: 1.075959	Accuracy: 61.33%
3	Validation loss: 1.268440	Best loss: 1.075959	Accuracy: 55.33%
4	Validation loss: 1.149581	Best loss: 1.075959	Accuracy: 64.00%
5	Validation loss: 1.116969	Best loss: 1.075959	Accuracy: 65.33%
6	Validation loss: 1.203748	Best loss: 1.075959	Accuracy: 62.00%
7	Validation loss: 1.158077	Best loss: 1.075959	Accuracy: 61.33%
8	Validation loss: 1.241382	Best loss: 1.075959	Accuracy: 56.67%
9	Validation loss: 1.164335	Best loss: 1.075959	Accuracy: 64.00%
10	Validation loss: 1.193445	Best loss: 1.075959	Accuracy: 63.33%
11	Validation loss: 1.216349	Best loss: 1.075959	Accuracy: 58.00%
12	Validation loss: 1.245532	Best loss: 1.075959	Accuracy: 62.00%
13	Validation loss: 1.135479	Best loss: 1.075959	Accuracy: 66.67%
14	Validation loss: 1.165663	