In [1]:
# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals

# Common imports
import numpy as np
import os
import tensorflow as tf

# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

reset_graph()

In [2]:
# load data: digits 5 to 9, but still label with 0 to 4, 
# because TensorFlow expects label's integers from 0 to n_classes-1.
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/")

X_train2_full = mnist.train.images[mnist.train.labels >= 5]
y_train2_full = mnist.train.labels[mnist.train.labels >= 5] - 5
X_valid2_full = mnist.validation.images[mnist.validation.labels >= 5]
y_valid2_full = mnist.validation.labels[mnist.validation.labels >= 5] - 5
X_test2 = mnist.test.images[mnist.test.labels >= 5]
y_test2 = mnist.test.labels[mnist.test.labels >= 5] - 5

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [3]:
# we want to keep only 100 instances per class in the training set 
# and let's keep only 30 instances per class in the validation set
# tesing set is already loaded above
def sample_n_instances_per_class(X, y, n=100):
    Xs, ys = [], []
    for label in np.unique(y):
        idx = (y == label)
        Xc = X[idx][:n]
        yc = y[idx][:n]
        Xs.append(Xc)
        ys.append(yc)
    return np.concatenate(Xs), np.concatenate(ys)

X_train2, y_train2 = sample_n_instances_per_class(X_train2_full, y_train2_full, n=100)
X_valid2, y_valid2 = sample_n_instances_per_class(X_valid2_full, y_valid2_full, n=30)

This is the function that I will use to transfer integer to one_hot array

In [4]:
import time

#function that transform integer to one_hot array
def one_hot(input_data):
    one_hot = []
    for item in input_data:
        if item == 0.:
            one_h = [1.,0.,0.,0.,0.]
        elif item == 1.:
            one_h = [0.,1.,0.,0.,0.]
        elif item == 2.:
            one_h = [0.,0.,1.,0.,0.]
        elif item == 3.:
            one_h = [0.,0.,0.,1.,0.]
        elif item == 4.:
            one_h = [0.,0.,0.,0.,1.]

        one_hot.append(one_h)
    one_hot = np.array(one_hot)
    return one_hot

3.1 restore HW2 and train the softmax layer only 

In [5]:
# Define training parameters
training_epochs = 1000
batch_size = 20
train_data_size = np.prod(X_train2.shape[0]) 
total_batch = int(train_data_size/batch_size)

# Training cycle
current_best = 100.0
stopping_step = 0
epoch_number = 0
  
# First, load meta graph so that we can get the structure of HW2
saver = tf.train.import_meta_graph('./checkpointFile/my_test_model.meta')

# Now, let's access and load all the tensor and function that we need
# :0 is the index of the tensor

graph = tf.get_default_graph()
x = graph.get_tensor_by_name("X:0")
true_labels = graph.get_tensor_by_name("TL:0")
dropout_keep_prob = graph.get_tensor_by_name("dropout:0")
loss = graph.get_tensor_by_name("loss:0")
accuracy = graph.get_tensor_by_name("accuracy:0")

output_layer_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="output")
Optimizer = tf.train.AdamOptimizer(0.01, name='Opt')
train_op = Optimizer.minimize(loss, var_list=output_layer_vars)
five_frozen_saver = tf.train.Saver()

# Transform true labels to one hot representation
y_train2 = one_hot(y_train2)
y_valid2 = one_hot(y_valid2)
y_test2 = one_hot(y_test2)
 
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    # Load and restore HW2 checkpoint into sess
    ckpt = tf.train.get_checkpoint_state(os.path.dirname('./checkpointFile/checkpoint'))
    saver.restore(sess,ckpt.model_checkpoint_path)
    # Initialize the variable that will be trained
    for var in output_layer_vars:
        sess.run(var.initializer)

    t0_3_1 = time.time()
    for epoch in range(training_epochs):
        epoch_number+=1

        # Generate random indexes
        indexes = np.random.permutation(train_data_size)

        # Run all datas for one epoch
        for position in range(0, train_data_size, batch_size):
            # Generate mini batch ids
            ids = indexes[position:(position+batch_size) if (position+batch_size) < train_data_size else train_data_size]
            batch_xs = X_train2[ids]
            batch_ts = y_train2[ids]
            sess.run(train_op,feed_dict={x:batch_xs, true_labels:batch_ts, dropout_keep_prob: 1})

        loss_val, acc_val = sess.run([loss, accuracy],feed_dict={x:X_valid2, true_labels:y_valid2, dropout_keep_prob: 1})
        
        print("{}\tValidation loss: {:.6f} \tAccuracy: {:.2f}%".format(epoch+1, loss_val, acc_val * 100))
        if(loss_val < current_best):
            # Save the best model so far into Team46_HW3_1 checkpoint
            save_path = five_frozen_saver.save(sess, "./Team46_HW3_1/Team46_HW3_1.ckpt")
            current_best = loss_val
            stopping_step = 0
        else:
            stopping_step +=1

        if(stopping_step >= 20):
            break

    print ('Early stop after %d epoches, best loss is %f'% (epoch_number,current_best))
    t1_3_1 = time.time()
    print ('Training time is %f sec.'% (t1_3_1-t0_3_1))
    
# Restore Team46_HW3_1 checkpoint and test the model
with tf.Session() as sess:
    five_frozen_saver.restore(sess, "./Team46_HW3_1/Team46_HW3_1.ckpt")
    acc_test = accuracy.eval(feed_dict={x: X_test2, true_labels: y_test2, dropout_keep_prob: 1})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

INFO:tensorflow:Restoring parameters from ./checkpointFile/my_test_model
1	Validation loss: 1.044013 	Accuracy: 61.33%
2	Validation loss: 0.983115 	Accuracy: 68.67%
3	Validation loss: 0.858689 	Accuracy: 72.00%
4	Validation loss: 0.799455 	Accuracy: 71.33%
5	Validation loss: 0.815953 	Accuracy: 70.00%
6	Validation loss: 0.833939 	Accuracy: 72.67%
7	Validation loss: 0.816728 	Accuracy: 72.67%
8	Validation loss: 0.757690 	Accuracy: 73.33%
9	Validation loss: 0.854479 	Accuracy: 66.67%
10	Validation loss: 0.770712 	Accuracy: 74.00%
11	Validation loss: 0.750588 	Accuracy: 76.67%
12	Validation loss: 0.812793 	Accuracy: 70.00%
13	Validation loss: 0.674246 	Accuracy: 76.67%
14	Validation loss: 0.757171 	Accuracy: 74.67%
15	Validation loss: 0.782226 	Accuracy: 73.33%
16	Validation loss: 0.728165 	Accuracy: 76.00%
17	Validation loss: 0.726828 	Accuracy: 74.00%
18	Validation loss: 0.737237 	Accuracy: 71.33%
19	Validation loss: 0.745196 	Accuracy: 72.00%
20	Validation loss: 0.693489 	Accuracy: 76.

3.2 restore HW2 and cache 5th layer before training

In [6]:
# Get layer5 of HW2
layer5 = graph.get_tensor_by_name("Layer5:0")

# Training cycle
current_best = 100.0
stopping_step = 0
epoch_number = 0
cache_five_frozen_saver = tf.train.Saver()

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    ckpt = tf.train.get_checkpoint_state(os.path.dirname('./checkpointFile/checkpoint'))
    saver.restore(sess,ckpt.model_checkpoint_path)
    for var in output_layer_vars:
        sess.run(var.initializer)
    
    # Feed X_train2 and X_valid2 into layer5 before training to accelerate 
    layer5_train = layer5.eval(feed_dict={x:X_train2})
    layer5_valid = layer5.eval(feed_dict={x:X_valid2})
    
    t0_3_2 = time.time()
    for epoch in range(training_epochs):
        epoch_number+=1

        # Generate random indexes
        indexes = np.random.permutation(train_data_size)

        # Run all datas for one epoch
        for position in range(0, train_data_size, batch_size):
            # Generate mini batch ids
            ids = indexes[position:(position+batch_size) if (position+batch_size) < train_data_size else train_data_size]
            batch_xs = layer5_train[ids]
            batch_ts = y_train2[ids]
            # Feed layer5 into training process 
            sess.run(train_op,feed_dict={layer5:batch_xs, true_labels:batch_ts, dropout_keep_prob: 1})

        loss_val, acc_val = sess.run([loss, accuracy],feed_dict={layer5:layer5_valid, true_labels:y_valid2, dropout_keep_prob: 1})

        print("{}\tValidation loss: {:.6f} \tAccuracy: {:.2f}%".format(epoch+1, loss_val, acc_val * 100))
        if(loss_val < current_best):
            save_path = five_frozen_saver.save(sess, "./Team46_HW3_2/Team46_HW3_2.ckpt")
            current_best = loss_val
            stopping_step = 0
        else:
            stopping_step +=1

        if(stopping_step >= 20):
            break

    print ('Early stop after %d epoches, best loss is %f'% (epoch_number,current_best))
    t1_3_2 = time.time()
    print ('Training time is %f sec.'% (t1_3_2-t0_3_2))
    
with tf.Session() as sess:
    cache_five_frozen_saver.restore(sess, "./Team46_HW3_2/Team46_HW3_2.ckpt")
    acc_test = accuracy.eval(feed_dict={x: X_test2, true_labels: y_test2, dropout_keep_prob: 1})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

INFO:tensorflow:Restoring parameters from ./checkpointFile/my_test_model
1	Validation loss: 1.027873 	Accuracy: 64.00%
2	Validation loss: 0.937901 	Accuracy: 64.00%
3	Validation loss: 0.906457 	Accuracy: 70.00%
4	Validation loss: 0.820263 	Accuracy: 68.00%
5	Validation loss: 0.796498 	Accuracy: 70.67%
6	Validation loss: 0.870613 	Accuracy: 64.67%
7	Validation loss: 0.773138 	Accuracy: 69.33%
8	Validation loss: 0.750011 	Accuracy: 72.67%
9	Validation loss: 0.809875 	Accuracy: 71.33%
10	Validation loss: 0.843827 	Accuracy: 68.67%
11	Validation loss: 0.744980 	Accuracy: 75.33%
12	Validation loss: 0.767153 	Accuracy: 73.33%
13	Validation loss: 0.782638 	Accuracy: 73.33%
14	Validation loss: 0.750746 	Accuracy: 75.33%
15	Validation loss: 0.700559 	Accuracy: 75.33%
16	Validation loss: 0.796858 	Accuracy: 72.67%
17	Validation loss: 0.730536 	Accuracy: 75.33%
18	Validation loss: 0.792971 	Accuracy: 73.33%
19	Validation loss: 0.701393 	Accuracy: 75.33%
20	Validation loss: 0.699928 	Accuracy: 76.

3.3 Train 4 layers instead and add new softmax layer

In [7]:
# Get layer4 of HW2
layer4 = graph.get_tensor_by_name("Layer4:0")

# Training cycle
current_best = 100.0
stopping_step = 0
epoch_number = 0

OUTPUT_NODE = 5

# Since the structure of our new model is different form HW2, we should define our new output layer,
# loss&accuracy function, and optimizer ourselves.
initializer=tf.contrib.layers.variance_scaling_initializer(factor=1.0, mode='FAN_AVG', uniform=True)

output_3 = tf.layers.dense(inputs=layer4, units=OUTPUT_NODE, kernel_initializer=initializer, name='output_3')
output_after_softmax_3 = tf.nn.softmax(output_3, name='output_after_softmax_3')

loss_3 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits= output_3,labels= tf.argmax(true_labels, 1)), name='loss_3')
correct_prediction_3 = tf.equal(tf.argmax(output_after_softmax_3, 1), tf.argmax(true_labels, 1))
accuracy_3 = tf.reduce_mean(tf.cast(correct_prediction_3, tf.float32), name='accuracy_3')

output_layer_vars_3 = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="output_3")
Optimizer_3 = tf.train.AdamOptimizer(0.01, name='Opt_3')
train_op_3 = Optimizer_3.minimize(loss_3, var_list=output_layer_vars_3)

four_frozen_saver = tf.train.Saver()

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    ckpt = tf.train.get_checkpoint_state(os.path.dirname('./checkpointFile/checkpoint'))
    saver.restore(sess,ckpt.model_checkpoint_path)
    for var in output_layer_vars_3:
        sess.run(var.initializer)
    
    # We cache 4th layer of HW2 to accelerate the training process.
    # We can do this because the first four layers of the new model is the same as which in HW2.
    layer4_train = layer4.eval(feed_dict={x:X_train2,true_labels:y_train2})
    layer4_valid = layer4.eval(feed_dict={x:X_valid2,true_labels:y_train2})
    
    t0_3_3 = time.time()
    for epoch in range(training_epochs):
        epoch_number+=1

        # Generate random indexes
        indexes = np.random.permutation(train_data_size)

        # Run all datas for one epoch
        for position in range(0, train_data_size, batch_size):
            # Generate mini batch ids
            ids = indexes[position:(position+batch_size) if (position+batch_size) < train_data_size else train_data_size]
            batch_xs = layer4_train[ids]
            batch_ts = y_train2[ids]
            # Feed layer4 into training process
            sess.run(train_op_3,feed_dict={layer4:batch_xs, true_labels:batch_ts, dropout_keep_prob: 1})

        loss_val, acc_val = sess.run([loss_3, accuracy_3],feed_dict={layer4:layer4_valid, true_labels:y_valid2, dropout_keep_prob: 1})

        print("{}\tValidation loss: {:.6f} \tAccuracy: {:.2f}%".format(epoch+1, loss_val, acc_val * 100))
        if(loss_val < current_best):
            save_path = four_frozen_saver.save(sess, "./Team46_HW3_3/Team46_HW3_3.ckpt")
            current_best = loss_val
            stopping_step = 0
        else:
            stopping_step +=1

        if(stopping_step >= 20):
            break

    print ('Early stop after %d epoches, best loss is %f'% (epoch_number,current_best))
    t1_3_3 = time.time()
    print ('Training time is %f sec.'% (t1_3_3-t0_3_3))

with tf.Session() as sess:
    four_frozen_saver.restore(sess, "./Team46_HW3_3/Team46_HW3_3.ckpt")
    acc_test = accuracy.eval(feed_dict={x: X_test2, true_labels: y_test2, dropout_keep_prob: 1})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

INFO:tensorflow:Restoring parameters from ./checkpointFile/my_test_model
1	Validation loss: 0.928629 	Accuracy: 63.33%
2	Validation loss: 0.799891 	Accuracy: 72.67%
3	Validation loss: 0.784990 	Accuracy: 75.33%
4	Validation loss: 0.706666 	Accuracy: 78.00%
5	Validation loss: 0.669182 	Accuracy: 78.67%
6	Validation loss: 0.682850 	Accuracy: 79.33%
7	Validation loss: 0.669138 	Accuracy: 77.33%
8	Validation loss: 0.722017 	Accuracy: 72.00%
9	Validation loss: 0.697509 	Accuracy: 75.33%
10	Validation loss: 0.664611 	Accuracy: 78.67%
11	Validation loss: 0.687688 	Accuracy: 74.67%
12	Validation loss: 0.645189 	Accuracy: 76.00%
13	Validation loss: 0.680613 	Accuracy: 78.00%
14	Validation loss: 0.645354 	Accuracy: 79.33%
15	Validation loss: 0.704536 	Accuracy: 73.33%
16	Validation loss: 0.670711 	Accuracy: 76.67%
17	Validation loss: 0.746355 	Accuracy: 74.67%
18	Validation loss: 0.697532 	Accuracy: 71.33%
19	Validation loss: 0.693980 	Accuracy: 76.00%
20	Validation loss: 0.687959 	Accuracy: 78.

3.4 Base on 3.3 but freeze only layer3 and layer4 while making layer1 and layer2 trainable

In [8]:
# Get TRAINABLE_VARIABLES within scope output_3 or Layer1 or Layer2
output_layer_vars_4 = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="output_3|Layer1|Layer2")

# Feed new vaberiable set into new optimizer
Optimizer = tf.train.AdamOptimizer(0.01, name='Opt_4')
train_op = Optimizer.minimize(loss_3, var_list=output_layer_vars_4)

two_frozen_saver = tf.train.Saver()

# Training cycle
current_best = 100.0
stopping_step = 0
epoch_number = 0


with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    ckpt = tf.train.get_checkpoint_state(os.path.dirname('./checkpointFile/checkpoint'))
    saver.restore(sess,ckpt.model_checkpoint_path)
    for var in output_layer_vars_4:
        sess.run(var.initializer)
    
    t0_3_4 = time.time()
    for epoch in range(training_epochs):
        epoch_number+=1

        # Generate random indexes
        indexes = np.random.permutation(train_data_size)

        # Run all datas for one epoch
        for position in range(0, train_data_size, batch_size):
            # Generate mini batch ids
            ids = indexes[position:(position+batch_size) if (position+batch_size) < train_data_size else train_data_size]
            batch_xs = X_train2[ids]
            batch_ts = y_train2[ids]
            # We have to feed X_train2 into the train process and can not use cache since layer1 and lsayer2 are trainable
            sess.run(train_op,feed_dict={x:batch_xs, true_labels:batch_ts, dropout_keep_prob: 1})
            
        loss_val, acc_val = sess.run([loss_3, accuracy_3],feed_dict={x:X_valid2, true_labels:y_valid2, dropout_keep_prob: 1})

        print("{}\tValidation loss: {:.6f} \tAccuracy: {:.2f}%".format(epoch+1, loss_val, acc_val * 100))
        if(loss_val < current_best):
            save_path = two_frozen_saver.save(sess, "./Team46_HW3_4/Team46_HW3_4.ckpt")
            current_best = loss_val
            stopping_step = 0
        else:
            stopping_step +=1

        if(stopping_step >= 20):
            break

    print ('Early stop after %d epoches, best loss is %f'% (epoch_number,current_best))
    t1_3_4 = time.time()
    print ('Training time is %f sec.'% (t1_3_4-t0_3_4))

with tf.Session() as sess:
    two_frozen_saver.restore(sess, "./Team46_HW3_4/Team46_HW3_4.ckpt")
    acc_test = accuracy.eval(feed_dict={x: X_test2, true_labels: y_test2, dropout_keep_prob: 1})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

INFO:tensorflow:Restoring parameters from ./checkpointFile/my_test_model
1	Validation loss: 0.438192 	Accuracy: 87.33%
2	Validation loss: 0.371540 	Accuracy: 90.67%
3	Validation loss: 0.471534 	Accuracy: 90.00%
4	Validation loss: 0.548758 	Accuracy: 88.00%
5	Validation loss: 0.523636 	Accuracy: 90.67%
6	Validation loss: 0.584493 	Accuracy: 90.00%
7	Validation loss: 0.574390 	Accuracy: 86.67%
8	Validation loss: 0.569987 	Accuracy: 93.33%
9	Validation loss: 0.514868 	Accuracy: 92.00%
10	Validation loss: 0.787381 	Accuracy: 86.00%
11	Validation loss: 0.654636 	Accuracy: 90.67%
12	Validation loss: 0.658456 	Accuracy: 92.67%
13	Validation loss: 0.687839 	Accuracy: 92.67%
14	Validation loss: 0.621362 	Accuracy: 92.67%
15	Validation loss: 0.613981 	Accuracy: 92.67%
16	Validation loss: 0.613642 	Accuracy: 92.67%
17	Validation loss: 0.612441 	Accuracy: 92.67%
18	Validation loss: 0.613361 	Accuracy: 92.67%
19	Validation loss: 0.614865 	Accuracy: 92.67%
20	Validation loss: 0.615448 	Accuracy: 92.