In [1]:
# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals

# Common imports
import numpy as np
import os
import tensorflow as tf
import time

# to make sure the graph is refresh
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

  return f(*args, **kwds)


In [2]:
# load data: digits 5 to 9, but still label with 0 to 4, 
# because TensorFlow expects label's integers from 0 to n_classes-1.
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/")

X_train2_full = mnist.train.images[mnist.train.labels >= 5]
y_train2_full = mnist.train.labels[mnist.train.labels >= 5] - 5
X_valid2_full = mnist.validation.images[mnist.validation.labels >= 5]
y_valid2_full = mnist.validation.labels[mnist.validation.labels >= 5] - 5
X_test2 = mnist.test.images[mnist.test.labels >= 5]
y_test2 = mnist.test.labels[mnist.test.labels >= 5] - 5

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [3]:
# we want to keep only 100 instances per class in the training set 
# and let's keep only 30 instances per class in the validation set
# tesing set is already loaded above
def sample_n_instances_per_class(X, y, n=100):
    Xs, ys = [], []
    for label in np.unique(y):
        idx = (y == label)
        Xc = X[idx][:n]
        yc = y[idx][:n]
        Xs.append(Xc)
        ys.append(yc)
    return np.concatenate(Xs), np.concatenate(ys)

X_train2, y_train2 = sample_n_instances_per_class(X_train2_full, y_train2_full, n=100)
X_valid2, y_valid2 = sample_n_instances_per_class(X_valid2_full, y_valid2_full, n=30)

In [4]:
# one_hot encoding 5, 6, 7, 8, 9 for all labels
def one_hot_encoding(y):
    tmp_y = np.zeros([y.shape[0], 5])
    for i in range(y.shape[0]):
        tmp_y[i][y[i]] = 1
    return tmp_y

y_train2 = one_hot_encoding(y_train2)
y_valid2 = one_hot_encoding(y_valid2)
y_test2 = one_hot_encoding(y_test2)

In [5]:
# define hyper-parameters for all networks structures
learning_rate = 0.01
batch_size = 32
epoch_bound = 1000
stop_threshold = 20

In [6]:
# HW3-1: Softmax Only
reset_graph()
pretrained_model_path = "./saved_model/Team35_HW2"
new_model_path = "./saved_model/Team35_HW3_1"
# get graph from pretrained model
pretrained_saver = tf.train.import_meta_graph(pretrained_model_path + ".ckpt.meta")
# new saver for HW3-1
new_saver = tf.train.Saver()

In [7]:
# get variables from pretrained model
X = tf.get_default_graph().get_tensor_by_name("X:0")
y = tf.get_default_graph().get_tensor_by_name("y:0")
loss = tf.get_default_graph().get_tensor_by_name("loss:0")
accuracy = tf.get_default_graph().get_tensor_by_name("accuracy:0")

# create new training layers
output_layer_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="logits")

# define new Adam optimizer and training steps
optimizer = tf.train.AdamOptimizer(learning_rate, name="AdamOp_3-1")
training_op = optimizer.minimize(loss, var_list=output_layer_vars, name="training_op_3-1")

init = tf.global_variables_initializer()

In [8]:
def train_with_only_softmax(X_train, y_train, X_validate, y_validate, train_op, epoch_bound, stop_threshold, batch_size, testing=False, new_saver=None, new_model_path=None):
    
    early_stop = 0
    winner_loss = np.infty
    winner_accuracy = 0.0
    
    t0 = time.time()
    
    for epoch in range(epoch_bound):

        # randomize training set
        indices_training = np.random.permutation(X_train.shape[0])
        X_train, y_train = X_train[indices_training,:], y_train[indices_training,:]

        # split training set into multiple mini-batches and start training
        total_batches = int(X_train.shape[0] / batch_size)
        for batch in range(total_batches):
            if batch == total_batches - 1:
                sess.run(train_op, feed_dict={X: X_train[batch*batch_size:], y: y_train[batch*batch_size:]})
            else:
                sess.run(train_op, feed_dict={X: X_train[batch*batch_size : (batch+1)*batch_size], y: y_train[batch*batch_size : (batch+1)*batch_size]})

        # compute validation accuracy
        cur_accuracy, cur_loss = evaluate_with_only_softmax(X_validate, y_validate)

        # If the accuracy rate does not increase for many times, it will early stop epochs-loop 
        if winner_loss > cur_loss:
            early_stop = 0
            winner_loss = cur_loss
            winner_accuracy = cur_accuracy
            # save best model in testing phase
            if testing == True:
                save_path = new_saver.save(sess, new_model_path + ".ckpt")
        else:
            early_stop += 1
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(epoch, cur_loss, winner_loss, cur_accuracy * 100))
        if early_stop == stop_threshold:
            print("Early Stop.")
            break
    t1 = time.time()
    print("Total training time of HW3-1: {:.1f}s".format(t1 - t0))
    
    return winner_accuracy, winner_loss

# evaluate model: compute accuracy and loss
def evaluate_with_only_softmax(Inputs, Labels):
    global accuracy, loss
    acc = sess.run(accuracy, feed_dict={X: Inputs, y:Labels})
    loss_val = sess.run(loss, feed_dict={X: Inputs, y:Labels})        
    return acc, loss_val

In [None]:
with tf.Session() as sess:
    
    # init weights
    sess.run(init)
    
    # restore value from pretrained model
    pretrained_saver.restore = (sess, pretrained_model_path + ".ckpt")

    # initialize value for softmax layer
    for var in output_layer_vars:
        sess.run(var.initializer)
        
    # training phase
    winner_accuracy, winner_loss = train_with_only_softmax(X_train2, y_train2, X_valid2, y_valid2, training_op, epoch_bound, stop_threshold, batch_size, testing=True, new_saver=new_saver, new_model_path=new_model_path)

    # testing phase
    new_saver.restore(sess, new_model_path + ".ckpt")
    test_accuracy, test_loss = evaluate_with_only_softmax(X_test2, y_test2)
    print("Test accuracy: {:.2f}%".format(test_accuracy * 100))

0	Validation loss: 0.858134	Best loss: 0.858134	Accuracy: 94.46%
1	Validation loss: 0.715841	Best loss: 0.715841	Accuracy: 94.46%
2	Validation loss: 0.602619	Best loss: 0.602619	Accuracy: 94.46%
3	Validation loss: 0.579836	Best loss: 0.579836	Accuracy: 94.46%
4	Validation loss: 0.562980	Best loss: 0.562980	Accuracy: 94.46%
5	Validation loss: 0.555397	Best loss: 0.555397	Accuracy: 94.46%
6	Validation loss: 0.580309	Best loss: 0.555397	Accuracy: 94.46%
7	Validation loss: 0.540856	Best loss: 0.540856	Accuracy: 94.46%
8	Validation loss: 0.518206	Best loss: 0.518206	Accuracy: 94.46%
9	Validation loss: 0.524833	Best loss: 0.518206	Accuracy: 94.46%
10	Validation loss: 0.513265	Best loss: 0.513265	Accuracy: 94.46%
11	Validation loss: 0.525739	Best loss: 0.513265	Accuracy: 94.46%
12	Validation loss: 0.516617	Best loss: 0.513265	Accuracy: 94.46%
13	Validation loss: 0.505502	Best loss: 0.505502	Accuracy: 94.46%
14	Validation loss: 0.519397	Best loss: 0.505502	Accuracy: 94.46%
15	Validation loss: 

In [None]:
# HW3-2: cache 5th hidden layer
# use the function, tf.get_default_graph().get_operations(), to find right tensor name
# print(tf.get_default_graph().get_operations())
reset_graph()
pretrained_model_path = "./saved_model/Team35_HW2"
new_model_path = "./saved_model/Team35_HW3_2"
# get graph from pretrained model
pretrained_saver = tf.train.import_meta_graph(pretrained_model_path + ".ckpt.meta")
# new saver for HW3-2
new_saver = tf.train.Saver()

In [None]:
# get variables from pretrained model
# print(tf.get_default_graph().get_operations())
X = tf.get_default_graph().get_tensor_by_name("X:0")
y = tf.get_default_graph().get_tensor_by_name("y:0")
loss = tf.get_default_graph().get_tensor_by_name("loss:0")
accuracy = tf.get_default_graph().get_tensor_by_name("accuracy:0")
# get 5th hidden layer tensor
h5_out = tf.get_default_graph().get_tensor_by_name("dnn_h5/Elu:0")

# create new training layers
output_layer_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="logits")

# define new Adam optimizer and training steps
optimizer = tf.train.AdamOptimizer(learning_rate, name="AdamOp_3-2")
training_op = optimizer.minimize(loss, var_list=output_layer_vars, name="training_op_3-2")

init = tf.global_variables_initializer()

In [None]:
def train_with_softmax_and_cache_h5(X_train, y_train, X_validate, y_validate, train_op, epoch_bound, stop_threshold, batch_size, testing=False, new_saver=None, new_model_path=None):
        
    early_stop = 0
    winner_loss = np.infty
    winner_accuracy = 0.0
    
    t0 = time.time()
    
    for epoch in range(epoch_bound):

        # randomize training set
        indices_training = np.random.permutation(X_train.shape[0])
        X_train, y_train = X_train[indices_training,:], y_train[indices_training,:]

        # split training set into multiple mini-batches and start training
        total_batches = int(X_train.shape[0] / batch_size)
        for batch in range(total_batches):
            if batch == total_batches - 1:
                sess.run(train_op, feed_dict={h5_out: X_train[batch*batch_size:], y: y_train[batch*batch_size:]})
            else:
                sess.run(train_op, feed_dict={h5_out: X_train[batch*batch_size : (batch+1)*batch_size], y: y_train[batch*batch_size : (batch+1)*batch_size]})

        # compute validation accuracy
        cur_accuracy, cur_loss = evaluate_with_softmax_and_cache_h5(X_validate, y_validate)

        # If the accuracy rate does not increase for many times, it will early stop epochs-loop 
        if winner_loss > cur_loss:
            early_stop = 0
            winner_loss = cur_loss
            winner_accuracy = cur_accuracy
            # save best model in testing phase
            if testing == True:
                save_path = new_saver.save(sess, new_model_path + ".ckpt")
        else:
            early_stop += 1
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(epoch, cur_loss, winner_loss, cur_accuracy * 100))
        if early_stop == stop_threshold:
            print("Early Stop.")
            break
    t1 = time.time()
    print("Total training time of HW3-2: {:.1f}s".format(t1 - t0))
    
    return winner_accuracy, winner_loss

# evaluate model: compute accuracy and loss
def evaluate_with_softmax_and_cache_h5(Inputs, Labels):
    global accuracy, loss
    acc = sess.run(accuracy, feed_dict={h5_out: Inputs, y:Labels})
    loss_val = sess.run(loss, feed_dict={h5_out: Inputs, y:Labels})        
    return acc, loss_val

In [None]:
with tf.Session() as sess:
    
    # init weights
    sess.run(init)
    
    # restore value from pretrained model
    pretrained_saver.restore = (sess, pretrained_model_path + ".ckpt")

    # initialize value for softmax layer
    for var in output_layer_vars:
        sess.run(var.initializer)
        
    # Feed training set and validation set into 5th layer
    h5_train = sess.run(h5_out, feed_dict={X: X_train2, y: y_train2})
    h5_valid = sess.run(h5_out, feed_dict={X: X_valid2, y: y_valid2})

    # training phase
    winner_accuracy, winner_loss = train_with_softmax_and_cache_h5(h5_train, y_train2, h5_valid, y_valid2, training_op, epoch_bound, stop_threshold, batch_size, testing=True, new_saver=new_saver, new_model_path=new_model_path)

    # testing phase
    new_saver.restore(sess, new_model_path + ".ckpt")
    test_accuracy, test_loss = evaluate_with_only_softmax(X_test2, y_test2)
    print("Test accuracy: {:.2f}%".format(test_accuracy * 100))

In [None]:
# HW3-3: 4 hidden layers instead and create new softmax layer
reset_graph()
pretrained_model_path = "./saved_model/Team35_HW2"
new_model_path = "./saved_model/Team35_HW3_3"
# get graph from pretrained model
pretrained_saver = tf.train.import_meta_graph(pretrained_model_path + ".ckpt.meta")
# new saver for HW3-3
new_saver = tf.train.Saver()

In [None]:
# get graph for 1~4 layers (transfer layers) 
X = tf.get_default_graph().get_tensor_by_name("X:0")
y = tf.get_default_graph().get_tensor_by_name("y:0")

# get 4th hidden layer tensor
h4_out = tf.get_default_graph().get_tensor_by_name("dnn_h4/Elu:0")

# create new training layers
# add outputs softmax layer with 5 neurals
logits = tf.layers.dense(inputs=h4_out, units=5, kernel_initializer=tf.contrib.layers.variance_scaling_initializer(), name="logits_3-3")
outputs = tf.nn.softmax(logits, name="Y_probability_3-3")
output_layer_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="logits_3-3")

# cross entropy loss function
cross_entropy_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y), name="loss_3-3")

# accuracy
correct_prediction = tf.equal(tf.argmax(outputs, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name="accuracy_3-3")

# training iteration and define Adam optimizer with learning rate
optimizer = tf.train.AdamOptimizer(learning_rate, name="AdamOp_3-3")
training_op = optimizer.minimize(cross_entropy_loss, var_list=output_layer_vars, name="training_op_3-3")

init = tf.global_variables_initializer()

In [None]:
def train_with_4_hidden_and_new_softmax(X_train, y_train, X_validate, y_validate, train_op, epoch_bound, stop_threshold, batch_size, testing=False, new_saver=None, new_model_path=None):
    
    early_stop = 0
    winner_loss = np.infty
    winner_accuracy = 0.0
    
    t0 = time.time()
    
    for epoch in range(epoch_bound):

        # randomize training set
        indices_training = np.random.permutation(X_train.shape[0])
        X_train, y_train = X_train[indices_training,:], y_train[indices_training,:]

        # split training set into multiple mini-batches and start training
        total_batches = int(X_train.shape[0] / batch_size)
        for batch in range(total_batches):
            if batch == total_batches - 1:
                sess.run(train_op, feed_dict={X: X_train[batch*batch_size:], y: y_train[batch*batch_size:]})
            else:
                sess.run(train_op, feed_dict={X: X_train[batch*batch_size : (batch+1)*batch_size], y: y_train[batch*batch_size : (batch+1)*batch_size]})

        # compute validation accuracy
        cur_accuracy, cur_loss = evaluate_with_4_hidden_and_new_softmax(X_validate, y_validate)

        # If the accuracy rate does not increase for many times, it will early stop epochs-loop 
        if winner_loss > cur_loss:
            early_stop = 0
            winner_loss = cur_loss
            winner_accuracy = cur_accuracy
            # save best model in testing phase
            if testing == True:
                save_path = new_saver.save(sess, new_model_path + ".ckpt")
        else:
            early_stop += 1
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(epoch, cur_loss, winner_loss, cur_accuracy * 100))
        if early_stop == stop_threshold:
            print("Early Stop.")
            break
    t1 = time.time()
    print("Total training time of HW3-3: {:.1f}s".format(t1 - t0))
    
    return winner_accuracy, winner_loss

# evaluate model: compute accuracy, precision, recall
def evaluate_with_4_hidden_and_new_softmax(Inputs, Labels):
    global accuracy, cross_entropy_loss
    acc = sess.run(accuracy, feed_dict={X: Inputs, y:Labels})
    loss_val = sess.run(cross_entropy_loss, feed_dict={X: Inputs, y:Labels})
    return acc, loss_val

In [None]:
with tf.Session() as sess:
    
    # init weights
    sess.run(init)
    
    # restore value from pretrained model
    pretrained_saver.restore = (sess, pretrained_model_path + ".ckpt")

    # initialize value for softmax layer
    for var in output_layer_vars:
        sess.run(var.initializer)
    
    # training phase
    winner_accuracy, winner_loss = train_with_4_hidden_and_new_softmax(X_train2, y_train2, X_valid2, y_valid2, training_op, epoch_bound, stop_threshold, batch_size, testing=True, new_saver=new_saver, new_model_path=new_model_path)

    # testing phase
    new_saver.restore(sess, new_model_path + ".ckpt")
    test_accuracy, test_loss = evaluate_with_4_hidden_and_new_softmax(X_test2, y_test2)
    print("Test accuracy: {:.2f}%".format(test_accuracy * 100))

In [None]:
# HW3-4: unfreeze the top two hidden layers (1st and 2nd hidden layers) and continue training
reset_graph()
pretrained_model_path = "./saved_model/Team35_HW2"
new_model_path = "./saved_model/Team35_HW3_4"
# get graph from pretrained model
pretrained_saver = tf.train.import_meta_graph(pretrained_model_path + ".ckpt.meta")
# new saver for HW3-4
new_saver = tf.train.Saver()

In [None]:
# get graph for 1~4 layers (transfer layers) 
X = tf.get_default_graph().get_tensor_by_name("X:0")
y = tf.get_default_graph().get_tensor_by_name("y:0")

# get 4th hidden layer tensor
h4_out = tf.get_default_graph().get_tensor_by_name("dnn_h4/Elu:0")

# create new training layers
# add outputs softmax layer with 5 neurals
logits = tf.layers.dense(inputs=h4_out, units=5, kernel_initializer=tf.contrib.layers.variance_scaling_initializer(), name="logits_3-4")
outputs = tf.nn.softmax(logits, name="Y_probability_3-4")

unfrozen_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="dnn_h1|dnn_h2|logits_3-4")

# cross entropy loss function
cross_entropy_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y), name="loss_3-4")

# accuracy
correct_prediction = tf.equal(tf.argmax(outputs, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name="accuracy_3-4")

# training iteration and define Adam optimizer with learning rate
optimizer = tf.train.AdamOptimizer(learning_rate, name="AdamOp_3-4")
training_op = optimizer.minimize(cross_entropy_loss, var_list=unfrozen_vars, name="training_op_3-4")

init = tf.global_variables_initializer()

In [None]:
def train_with_top_2_hidden_and_new_softmax(X_train, y_train, X_validate, y_validate, train_op, epoch_bound, stop_threshold, batch_size, testing=False, new_saver=None, new_model_path=None):
    
    early_stop = 0
    winner_loss = np.infty
    winner_accuracy = 0.0
    
    t0 = time.time()
    
    for epoch in range(epoch_bound):

        # randomize training set
        indices_training = np.random.permutation(X_train.shape[0])
        X_train, y_train = X_train[indices_training,:], y_train[indices_training,:]

        # split training set into multiple mini-batches and start training
        total_batches = int(X_train.shape[0] / batch_size)
        for batch in range(total_batches):
            if batch == total_batches - 1:
                sess.run(train_op, feed_dict={X: X_train[batch*batch_size:], y: y_train[batch*batch_size:]})
            else:
                sess.run(train_op, feed_dict={X: X_train[batch*batch_size : (batch+1)*batch_size], y: y_train[batch*batch_size : (batch+1)*batch_size]})

        # compute validation accuracy
        cur_accuracy, cur_loss = evaluate_with_top_2_hidden_and_new_softmax(X_validate, y_validate)

        # If the accuracy rate does not increase for many times, it will early stop epochs-loop 
        if winner_loss > cur_loss:
            early_stop = 0
            winner_loss = cur_loss
            winner_accuracy = cur_accuracy
            # save best model in testing phase
            if testing == True:
                save_path = new_saver.save(sess, new_model_path + ".ckpt")
        else:
            early_stop += 1
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(epoch, cur_loss, winner_loss, cur_accuracy * 100))
        if early_stop == stop_threshold:
            print("Early Stop.")
            break
    t1 = time.time()
    print("Total training time of HW3-4: {:.1f}s".format(t1 - t0))
    
    return winner_accuracy, winner_loss

# evaluate model: compute accuracy, precision, recall
def evaluate_with_top_2_hidden_and_new_softmax(Inputs, Labels):
    global accuracy, cross_entropy_loss
    acc = sess.run(accuracy, feed_dict={X: Inputs, y:Labels})
    loss_val = sess.run(cross_entropy_loss, feed_dict={X: Inputs, y:Labels})
    return acc, loss_val

In [None]:
with tf.Session() as sess:
    
    # init weights
    sess.run(init)
    
    # restore value from pretrained model
    pretrained_saver.restore = (sess, pretrained_model_path + ".ckpt")

    # initialize value for softmax layer
    for var in unfrozen_vars:
        sess.run(var.initializer)
    
    # training phase
    winner_accuracy, winner_loss = train_with_top_2_hidden_and_new_softmax(X_train2, y_train2, X_valid2, y_valid2, training_op, epoch_bound, stop_threshold, batch_size, testing=True, new_saver=new_saver, new_model_path=new_model_path)

    # testing phase
    new_saver.restore(sess, new_model_path + ".ckpt")
    test_accuracy, test_loss = evaluate_with_top_2_hidden_and_new_softmax(X_test2, y_test2)
    print("Test accuracy: {:.2f}%".format(test_accuracy * 100))