In [1]:
# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals

# Common imports
import numpy as np
import os
import tensorflow as tf

# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

reset_graph()

In [2]:
# load data: digits 5 to 9, but still label with 0 to 4, 
# because TensorFlow expects label's integers from 0 to n_classes-1.
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/")

X_train2_full = mnist.train.images[mnist.train.labels >= 5]
y_train2_full = mnist.train.labels[mnist.train.labels >= 5] - 5
X_valid2_full = mnist.validation.images[mnist.validation.labels >= 5]
y_valid2_full = mnist.validation.labels[mnist.validation.labels >= 5] - 5
X_test2 = mnist.test.images[mnist.test.labels >= 5]
y_test2 = mnist.test.labels[mnist.test.labels >= 5] - 5

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [3]:
# we want to keep only 100 instances per class in the training set 
# and let's keep only 30 instances per class in the validation set
# tesing set is already loaded above
def sample_n_instances_per_class(X, y, n=100):
    Xs, ys = [], []
    for label in np.unique(y):
        idx = (y == label)
        Xc = X[idx][:n]
        yc = y[idx][:n]
        Xs.append(Xc)
        ys.append(yc)
    return np.concatenate(Xs), np.concatenate(ys)

X_train2, y_train2 = sample_n_instances_per_class(X_train2_full, y_train2_full, n=100)
X_valid2, y_valid2 = sample_n_instances_per_class(X_valid2_full, y_valid2_full, n=30)

In [4]:
################# 3-1 Softmax only #################

# Step1: Get tensor from our HW2 model, according to our HW2 naming
# Import meta graph of out HW2 checkpoint
restore_saver = tf.train.import_meta_graph("./Team60_HW2.ckpt.meta") 
# Get tensor we need in HW3 training
X = tf.get_default_graph().get_tensor_by_name("X:0")
y = tf.get_default_graph().get_tensor_by_name("y:0")
dropout_rate2 = tf.get_default_graph().get_tensor_by_name("dropout_rate:0")
loss = tf.get_default_graph().get_tensor_by_name("loss:0")
softY = tf.get_default_graph().get_tensor_by_name("softmax:0")
logits = softY.op.inputs[0]        

In [5]:
# Step2: Get the softmax layer
learning_rate = 0.01
# output_layer_vars means the variables you want to train
# In HW3-1, we only need to train on the softmax layer, so we only get scope "logits" (from HW2 softmax layer)
output_layer_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="logits")
optimizer = tf.train.AdamOptimizer(learning_rate, name = 'Adam2')
training_op = optimizer.minimize(loss, var_list=output_layer_vars)

In [6]:
# Step3: Keep only the softmax trainable variables
# Calculate accuracy
predY = tf.nn.in_top_k(logits, y, 1) # boolean
accuracy = tf.reduce_mean(tf.cast(predY, tf.float32), name="accuracy") # boolean -> 0/1
init = tf.global_variables_initializer()
five_frozen_saver = tf.train.Saver()

In [7]:
# Step4: Start training and print every epoch
# Import time because we need to measure training time
import time
# Variables that we need in training
num_epochs = 1000
early_stop = 20
batch_size = 20
best_loss = np.infty
step_without_progress = 0 

with tf.Session() as sess:
    sess.run(init)
    restore_saver.restore(sess, "./Team60_HW2.ckpt")
    
    for var in output_layer_vars:
        var.initializer.run()

    time_start = time.time()
        
    for epoch in range(num_epochs):
        # Shuffle the data
        rnd_idx = list(range(len(X_train2)))
        np.random.shuffle(rnd_idx)
        # Get every batch
        for rnd_indices in np.array_split(rnd_idx, len(X_train2) // batch_size):
            X_batch, y_batch = X_train2[rnd_indices], y_train2[rnd_indices]
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch, dropout_rate2: 0.5})
        # Get validation accuracy
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={X: X_valid2, y: y_valid2, dropout_rate2: 1.0})
        # Check the progress of training, and see if it need to early stop
        if loss_val < best_loss:
            save_path = five_frozen_saver.save(sess, "./Team60_HW3_1.ckpt")
            best_loss = loss_val
            step_without_progress = 0
        else:
            step_without_progress += 1
            if step_without_progress > early_stop:
                print("Early stop!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))

    time_end = time.time()
    time_3_1 = time_end - time_start
    
    print("Total training time: {:.1f}s".format(time_end - time_start))
# Testing
with tf.Session() as sess:
    five_frozen_saver.restore(sess, "./Team60_HW3_1.ckpt")
    acc_test = accuracy.eval(feed_dict={X: X_test2, y: y_test2, dropout_rate2: 1.0})
    print("Testing accuracy: {:.2f}%".format(acc_test * 100))
    acc_3_1 = acc_test * 100

INFO:tensorflow:Restoring parameters from ./Team60_HW2.ckpt
0	Validation loss: 1.651778	Best loss: 1.651778	Accuracy: 41.33%
1	Validation loss: 1.759042	Best loss: 1.651778	Accuracy: 41.33%
2	Validation loss: 1.172509	Best loss: 1.172509	Accuracy: 52.67%
3	Validation loss: 1.338535	Best loss: 1.172509	Accuracy: 41.33%
4	Validation loss: 1.391729	Best loss: 1.172509	Accuracy: 47.33%
5	Validation loss: 1.286124	Best loss: 1.172509	Accuracy: 51.33%
6	Validation loss: 1.227214	Best loss: 1.172509	Accuracy: 52.00%
7	Validation loss: 1.085998	Best loss: 1.085998	Accuracy: 59.33%
8	Validation loss: 1.189639	Best loss: 1.085998	Accuracy: 51.33%
9	Validation loss: 1.341168	Best loss: 1.085998	Accuracy: 44.00%
10	Validation loss: 1.202298	Best loss: 1.085998	Accuracy: 50.67%
11	Validation loss: 1.282820	Best loss: 1.085998	Accuracy: 44.00%
12	Validation loss: 1.158277	Best loss: 1.085998	Accuracy: 44.67%
13	Validation loss: 1.235707	Best loss: 1.085998	Accuracy: 54.00%
14	Validation loss: 1.6318

In [8]:
################# 3-2 Cache 5th layer #################

# Step3.5: Cache 5th layer output before training
# Get output of our HW2 hidden layer 5 
hidden5_out = tf.get_default_graph().get_tensor_by_name("hidden_layer5/Elu:0")

# Step4: Start training and print every epoch (feed cached data)
num_epochs = 1000
early_stop = 20
batch_size = 20
best_loss = np.infty
step_without_progress = 0

with tf.Session() as sess:
    sess.run(init)
    restore_saver.restore(sess, "./Team60_HW2.ckpt")
    
    for var in output_layer_vars:
        var.initializer.run()

    time_start = time.time()
    # Get cached output of hidden layer 5 that we just got (both training data and validation data)
    train_cached = hidden5_out.eval(feed_dict={X: X_train2, y: y_train2, dropout_rate2: 0.5})
    valid_cached = hidden5_out.eval(feed_dict={X: X_valid2, y: y_valid2, dropout_rate2: 1.0})
     
    for epoch in range(num_epochs):
        # shuffle the data
        rnd_idx = list(range(len(X_train2)))
        np.random.shuffle(rnd_idx)
        # Get batch (Use the cached training and validation data that we just got)
        for rnd_indices in np.array_split(rnd_idx, len(X_train2) // batch_size):
            X_batch, y_batch = train_cached[rnd_indices], y_train2[rnd_indices]
            sess.run(training_op, feed_dict={hidden5_out: X_batch, y: y_batch, dropout_rate2: 0.5})
            
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={hidden5_out: valid_cached, y: y_valid2, dropout_rate2: 1.0})
        
        if loss_val < best_loss:
            save_path = five_frozen_saver.save(sess, "./Team60_HW3_2.ckpt")
            best_loss = loss_val
            step_without_progress = 0
        else:
            step_without_progress += 1
            if step_without_progress > early_stop:
                print("Early stop!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))

    time_end = time.time()
    print("Total training time of 3-1: {:.1f}s".format(time_3_1))
    print("Total training time of 3-2: {:.1f}s".format(time_end - time_start))
    print("Compare the training time of 3-1 and 3-2, we know that cache 5th layer really helps improving training speed!")

with tf.Session() as sess:
    five_frozen_saver.restore(sess, "./Team60_HW3_2.ckpt")
    acc_test = accuracy.eval(feed_dict={X: X_test2, y: y_test2, dropout_rate2: 1.0})
    print("Testing accuracy: {:.2f}%".format(acc_test * 100))

INFO:tensorflow:Restoring parameters from ./Team60_HW2.ckpt
0	Validation loss: 1.721285	Best loss: 1.721285	Accuracy: 44.67%
1	Validation loss: 1.730600	Best loss: 1.721285	Accuracy: 40.00%
2	Validation loss: 1.364526	Best loss: 1.364526	Accuracy: 50.00%
3	Validation loss: 1.352564	Best loss: 1.352564	Accuracy: 52.00%
4	Validation loss: 1.792190	Best loss: 1.352564	Accuracy: 32.67%
5	Validation loss: 1.584202	Best loss: 1.352564	Accuracy: 41.33%
6	Validation loss: 1.248564	Best loss: 1.248564	Accuracy: 51.33%
7	Validation loss: 1.393138	Best loss: 1.248564	Accuracy: 52.67%
8	Validation loss: 1.397118	Best loss: 1.248564	Accuracy: 38.67%
9	Validation loss: 1.743944	Best loss: 1.248564	Accuracy: 34.00%
10	Validation loss: 1.381750	Best loss: 1.248564	Accuracy: 50.00%
11	Validation loss: 1.652308	Best loss: 1.248564	Accuracy: 44.67%
12	Validation loss: 1.575054	Best loss: 1.248564	Accuracy: 39.33%
13	Validation loss: 1.488151	Best loss: 1.248564	Accuracy: 51.33%
14	Validation loss: 1.5613

In [17]:
################# 3-3 4 layers instead #################
reset_graph()

num_classes = 5

# Restore HW2 checkpoints
restore_saver = tf.train.import_meta_graph("./Team60_HW2.ckpt.meta")

he_init = tf.contrib.layers.variance_scaling_initializer()
X = tf.get_default_graph().get_tensor_by_name("X:0")
y = tf.get_default_graph().get_tensor_by_name("y:0")
dropout_rate2 = tf.get_default_graph().get_tensor_by_name("dropout_rate:0")

# Get output of our HW2 hidden layer 4 
hidden4_out = tf.get_default_graph().get_tensor_by_name("hidden_layer4/Elu:0")

# Build new dense and softmax layer
logits = tf.layers.dense(hidden4_out, num_classes, kernel_initializer=he_init, name="new_logits")
Y_proba = tf.nn.softmax(logits)

# Compute loss and accuracy
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
#loss = xentropy[20]
loss = tf.reduce_mean(xentropy)
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

learning_rate = 0.01

# frozen the parameters in 4 layers only train the "new_logits" layer 
output_layer_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="new_logits")
# Due to the same name "Adam" in Team60_HW2.ckpt.meta, we need to rename the optimizer.
optimizer = tf.train.AdamOptimizer(learning_rate, name="Adam2")
# Set the trainable parameters into minimize function
training_op = optimizer.minimize(loss, var_list=output_layer_vars)

init = tf.global_variables_initializer()
four_frozen_saver = tf.train.Saver()

num_epochs = 1000
early_stop = 20
batch_size = 20
best_loss = np.infty
step_without_progress = 0

with tf.Session() as sess:
    sess.run(init)
    restore_saver.restore(sess, "./Team60_HW2.ckpt")
    
    for var in output_layer_vars:
        var.initializer.run()
    
    # Retrain the model   
    for epoch in range(num_epochs):
        # shuffle the data
        rnd_idx = list(range(len(X_train2)))
        np.random.shuffle(rnd_idx)
        
        for rnd_indices in np.array_split(rnd_idx, len(X_train2) // batch_size):
            X_batch, y_batch = X_train2[rnd_indices], y_train2[rnd_indices]
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch, dropout_rate2: 0.5})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={X: X_valid2, y: y_valid2, dropout_rate2: 1.0})
        
        if loss_val < best_loss:
            save_path = five_frozen_saver.save(sess, "./Team60_HW3_3.ckpt")
            best_loss = loss_val
            step_without_progress = 0
        else:
            step_without_progress += 1
            if step_without_progress > early_stop:
                print("Early stop!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))

with tf.Session() as sess:
    five_frozen_saver.restore(sess, "./Team60_HW3_3.ckpt")
    acc_test = accuracy.eval(feed_dict={X: X_test2, y: y_test2, dropout_rate2: 1.0})
    print("Testing accuracy: {:.2f}%".format(acc_test * 100))
    
    
    print("Accuracy of 3-1: {:.2f}%".format(acc_3_1))
    print("Accuracy of 3-3: {:.2f}%".format(acc_test * 100))
    print("Compare the results of 3-1 and 3-3, we know that adding a new softmax layer can improve accuracy!")
    
    acc_3_3 = acc_test * 100

INFO:tensorflow:Restoring parameters from ./Team60_HW2.ckpt
0	Validation loss: 293.591766	Best loss: 293.591766	Accuracy: 54.00%
1	Validation loss: 308.124908	Best loss: 293.591766	Accuracy: 54.67%
2	Validation loss: 207.326416	Best loss: 207.326416	Accuracy: 62.00%
3	Validation loss: 199.797241	Best loss: 199.797241	Accuracy: 62.00%
4	Validation loss: 252.381683	Best loss: 199.797241	Accuracy: 58.00%
5	Validation loss: 226.818054	Best loss: 199.797241	Accuracy: 61.33%
6	Validation loss: 213.641235	Best loss: 199.797241	Accuracy: 57.33%
7	Validation loss: 192.438065	Best loss: 192.438065	Accuracy: 60.00%
8	Validation loss: 247.706940	Best loss: 192.438065	Accuracy: 58.00%
9	Validation loss: 165.158127	Best loss: 165.158127	Accuracy: 55.33%
10	Validation loss: 192.968582	Best loss: 165.158127	Accuracy: 60.67%
11	Validation loss: 185.270111	Best loss: 165.158127	Accuracy: 56.00%
12	Validation loss: 234.035950	Best loss: 165.158127	Accuracy: 54.00%
13	Validation loss: 244.649048	Best loss

In [None]:
################# 3-4 Bonus #################
# In 3-4, we frozen hidden_layer 3 and 4
# Retrian on hidden_layer 1,2 and new softmax layer

learning_rate = 0.01

# Set the variables that need to be retrained
# The unfrozen parameters contain in "hidden_layer1", "hidden_layer2", and "new_logits" layers
unfrozen_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="hidden_layer[12]|new_logits")
optimizer = tf.train.AdamOptimizer(learning_rate, name="Adam2")
# Set the unfrozen parameters into minimize function
training_op = optimizer.minimize(loss, var_list=unfrozen_vars)

init = tf.global_variables_initializer()
two_frozen_saver = tf.train.Saver()

n_epochs = 1000
batch_size = 20

max_checks_without_progress = 20
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()
    four_frozen_saver.restore(sess, "./Team60_HW3_3.ckpt")
        
    for epoch in range(n_epochs):
        # shuffle the data
        rnd_idx = list(range(len(X_train2)))
        np.random.shuffle(rnd_idx)
        
        # training in epoches
        for rnd_indices in np.array_split(rnd_idx, len(X_train2) // batch_size):
            X_batch, y_batch = X_train2[rnd_indices], y_train2[rnd_indices]
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch, dropout_rate2: 0.5})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={X: X_valid2, y: y_valid2, dropout_rate2: 1.0})
        if loss_val < best_loss:
            save_path = two_frozen_saver.save(sess, "./Team60_HW3_4.ckpt")
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stop!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))

with tf.Session() as sess:
    two_frozen_saver.restore(sess, "./Team60_HW3_4.ckpt")
    acc_test = accuracy.eval(feed_dict={X: X_test2, y: y_test2, dropout_rate2: 1.0})
    print("Testing accuracy: {:.2f}%".format(acc_test * 100))
    
    print("Accuracy of 3-3: {:.2f}%".format(acc_3_3))
    print("Accuracy of 3-4: {:.2f}%".format(acc_test * 100))
    print("Compare the results of 3-3 and 3-4, we know that unfreeze hidden 1 and 2 layers can get better!")