In [2]:
# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals

# Common imports
import numpy as np
import os
import tensorflow as tf
from time import time

# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

reset_graph()

# load data: digits 5 to 9, but still label with 0 to 4, 
# because TensorFlow expects label's integers from 0 to n_classes-1.
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/")

X_train2_full = mnist.train.images[mnist.train.labels >= 5]
y_train2_full = mnist.train.labels[mnist.train.labels >= 5] - 5
X_valid2_full = mnist.validation.images[mnist.validation.labels >= 5]
y_valid2_full = mnist.validation.labels[mnist.validation.labels >= 5] - 5
X_test2 = mnist.test.images[mnist.test.labels >= 5]
y_test2 = mnist.test.labels[mnist.test.labels >= 5] - 5

# we want to keep only 100 instances per class in the training set 
# and let's keep only 30 instances per class in the validation set
# tesing set is already loaded above
def sample_n_instances_per_class(X, y, n=100):
    Xs, ys = [], []
    for label in np.unique(y):
        idx = (y == label)
        Xc = X[idx][:n]
        yc = y[idx][:n]
        Xs.append(Xc)
        ys.append(yc)
    return np.concatenate(Xs), np.concatenate(ys)

X_train2, y_train2 = sample_n_instances_per_class(X_train2_full, y_train2_full, n=100)
X_valid2, y_valid2 = sample_n_instances_per_class(X_valid2_full, y_valid2_full, n=30)

'''
Homework 3-1
'''

#Early stop mechanism
class early_stop():
    def __init__(self, stop_margin, max_epochs):
        self.val_not_better_counter = 0 # Count epoch after best validation error > stop_margin then stop
        self.stop_margin = stop_margin # Count epochs if no progress
        self.last_val_acc = 0 # Record accuracy of last epoch
        self.epoch_counter = 0 # Record epoch count
        self.max_epochs = max_epochs
        
    def monitor(self, val_acc):        
        if(val_acc <= self.last_val_acc):
            # If no progress, counter++
            self.val_not_better_counter += 1
        else:
            self.val_not_better_counter = 0
        self.last_val_acc = val_acc # Record accuracy
        
        self.epoch_counter += 1 # Count epochs
            
    def continue_training(self):
        # Continue training if no progress epochs < stop_margin epochs and current epoch < max_epochs
        return self.val_not_better_counter < self.stop_margin and self.epoch_counter < self.max_epochs
    
num_class = 5

# Convert label to one-hot encoding matrix
def label_to_onehot(label):
    result = np.empty([label.shape[0], num_class])
    for i in range(label.shape[0]):
        enc = np.zeros(5)
        enc[label[i]] = 1.0
        result[i] = enc
    return result

y_train2 = label_to_onehot(y_train2)
y_valid2 = label_to_onehot(y_valid2)
y_test2 = label_to_onehot(y_test2)

# Load model from homework2
restore_saver = tf.train.import_meta_graph('./Team32_HW2.ckpt.meta')

# Get necessary variables/tensors
x = tf.get_default_graph().get_tensor_by_name('x:0')
y = tf.get_default_graph().get_tensor_by_name('y:0')
loss_func = tf.get_default_graph().get_tensor_by_name('loss:0')
y_prob = tf.get_default_graph().get_tensor_by_name('y_prob:0')
output_layer_vars_W = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='Wo')
output_layer_vars_b = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='bo')
# Train only on selected variables
training_op = tf.train.GradientDescentOptimizer(1).minimize(loss_func, var_list=[output_layer_vars_W, output_layer_vars_b])
    
saver = tf.train.Saver()
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer()) #init
    els = early_stop(stop_margin = 20,
                    max_epochs = 1000)
    
    # Record total training time
    totalTime = 0        
    while(els.continue_training()):
        tStart = time()
        _, loss = sess.run([training_op, loss_func], feed_dict={x: X_train2, y: y_train2})
        totalTime += time() - tStart
        loss = np.mean(loss)
        
        #validate
        y_pre = sess.run(y_prob, feed_dict={x: X_valid2})
        correct_prediction = tf.equal(tf.argmax(y_pre, 1), tf.argmax(y_valid2, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        val_acc = sess.run(accuracy, feed_dict={x: X_valid2})
        
        els.monitor(val_acc)
        print('Epoch: {:3}, Loss: {:.8f}, Accuracy: {:.3f}%'.format(els.epoch_counter, loss, val_acc * 100))
    
    tElapse = time() - tStart
    saver.save(sess, './Team32_HW3_1.ckpt')
    print('Model saved!')
    
    # Run test
    y_pre = sess.run(y_prob, feed_dict={x: X_test2})
    correct_prediction = tf.equal(tf.argmax(y_pre, 1), tf.argmax(y_test2, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    test_acc = sess.run(accuracy, feed_dict={x: X_test2})
    print('Test accuracy: {:.3f}%'.format(test_acc * 100))
    print('Total training time: {:.3f}ms'.format(totalTime * 1000))

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz
Epoch:   1, Loss: 1.79848671, Accuracy: 26.667%
Epoch:   2, Loss: 1.65778697, Accuracy: 24.667%
Epoch:   3, Loss: 1.51020598, Accuracy: 52.667%
Epoch:   4, Loss: 1.43087661, Accuracy: 48.667%
Epoch:   5, Loss: 1.36191952, Accuracy: 62.000%
Epoch:   6, Loss: 1.30241823, Accuracy: 63.333%
Epoch:   7, Loss: 1.25005722, Accuracy: 65.333%
Epoch:   8, Loss: 1.20372045, Accuracy: 66.000%
Epoch:   9, Loss: 1.16237962, Accuracy: 66.667%
Epoch:  10, Loss: 1.12527347, Accuracy: 66.667%
Epoch:  11, Loss: 1.09177649, Accuracy: 67.333%
Epoch:  12, Loss: 1.06138396, Accuracy: 67.333%
Epoch:  13, Loss: 1.03367972, Accuracy: 68.000%
Epoch:  14, Loss: 1.00831890, Accuracy: 68.000%
Epoch:  15, Loss: 0.98501265, Accuracy: 68.667%
Epoch:  16, Loss: 0.96351731, Accuracy: 69.333%
Epoch:  17, Loss: 0.94362593, Accuracy: 69

In [3]:
# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals

# Common imports
import numpy as np
import os
import tensorflow as tf
from time import time

# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

reset_graph()

# load data: digits 5 to 9, but still label with 0 to 4, 
# because TensorFlow expects label's integers from 0 to n_classes-1.
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/")

X_train2_full = mnist.train.images[mnist.train.labels >= 5]
y_train2_full = mnist.train.labels[mnist.train.labels >= 5] - 5
X_valid2_full = mnist.validation.images[mnist.validation.labels >= 5]
y_valid2_full = mnist.validation.labels[mnist.validation.labels >= 5] - 5
X_test2 = mnist.test.images[mnist.test.labels >= 5]
y_test2 = mnist.test.labels[mnist.test.labels >= 5] - 5

# we want to keep only 100 instances per class in the training set 
# and let's keep only 30 instances per class in the validation set
# tesing set is already loaded above
def sample_n_instances_per_class(X, y, n=100):
    Xs, ys = [], []
    for label in np.unique(y):
        idx = (y == label)
        Xc = X[idx][:n]
        yc = y[idx][:n]
        Xs.append(Xc)
        ys.append(yc)
    return np.concatenate(Xs), np.concatenate(ys)

X_train2, y_train2 = sample_n_instances_per_class(X_train2_full, y_train2_full, n=100)
X_valid2, y_valid2 = sample_n_instances_per_class(X_valid2_full, y_valid2_full, n=30)

'''
Homework 3-2
'''

#Early stop mechanism
class early_stop():
    def __init__(self, stop_margin, max_epochs):
        self.val_not_better_counter = 0 # Count epoch after best validation error > stop_margin then stop
        self.stop_margin = stop_margin # Count epochs if no progress
        self.last_val_acc = 0 # Record accuracy of last epoch
        self.epoch_counter = 0 # Record epoch count
        self.max_epochs = max_epochs
        
    def monitor(self, val_acc):        
        if(val_acc <= self.last_val_acc):
            # If no progress, counter++
            self.val_not_better_counter += 1
        else:
            self.val_not_better_counter = 0
        self.last_val_acc = val_acc # Record accuracy
        
        self.epoch_counter += 1 # Count epochs
            
    def continue_training(self):
        # Continue training if no progress epochs < stop_margin epochs and current epoch < max_epochs
        return self.val_not_better_counter < self.stop_margin and self.epoch_counter < self.max_epochs
    
num_class = 5

# Convert label to one-hot encoding matrix
def label_to_onehot(label):
    result = np.empty([label.shape[0], num_class])
    for i in range(label.shape[0]):
        enc = np.zeros(5)
        enc[label[i]] = 1.0
        result[i] = enc
    return result

y_train2 = label_to_onehot(y_train2)
y_valid2 = label_to_onehot(y_valid2)
y_test2 = label_to_onehot(y_test2)

# Load model from homework2
restore_saver = tf.train.import_meta_graph('./Team32_HW2.ckpt.meta')

# Get necessary variables/tensors
x = tf.get_default_graph().get_tensor_by_name('x:0')
y = tf.get_default_graph().get_tensor_by_name('y:0')
loss_func = tf.get_default_graph().get_tensor_by_name('loss:0')
y_prob = tf.get_default_graph().get_tensor_by_name('y_prob:0')
output_layer_vars_W = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='Wo')
output_layer_vars_b = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='bo')
# Get 5th hidden layer's tensor for compute
out5 = tf.get_default_graph().get_tensor_by_name('out5:0')
# Train only on selected variables
training_op = tf.train.GradientDescentOptimizer(1).minimize(loss_func, var_list=[output_layer_vars_W, output_layer_vars_b])
    
saver = tf.train.Saver()
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer()) #init
    els = early_stop(stop_margin = 20,
                    max_epochs = 1000)
    
    # Record total training time
    totalTime = 0
    # Get 5th hidden layer's output in order to cache
    X_cache = sess.run(out5, feed_dict={x: X_train2})
    while(els.continue_training()):
        tStart = time()
        # Feed cached set directly into softmax
        _, loss = sess.run([training_op, loss_func], feed_dict={out5: X_cache, y: y_train2})
        totalTime += time() - tStart
        loss = np.mean(loss)
        
        #validate
        y_pre = sess.run(y_prob, feed_dict={x: X_valid2})
        correct_prediction = tf.equal(tf.argmax(y_pre, 1), tf.argmax(y_valid2, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        val_acc = sess.run(accuracy, feed_dict={x: X_valid2})
        els.monitor(val_acc)
        print('Epoch: {:3}, Loss: {:.8f}, Accuracy: {:.3f}%'.format(els.epoch_counter, loss, val_acc * 100))
    
    tElapse = time() - tStart
    saver.save(sess, './Team32_HW3_2.ckpt')
    print('Model saved!')
    
    # Run test
    y_pre = sess.run(y_prob, feed_dict={x: X_test2})
    correct_prediction = tf.equal(tf.argmax(y_pre, 1), tf.argmax(y_test2, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    test_acc = sess.run(accuracy, feed_dict={x: X_test2})
    print('Test accuracy: {:.3f}%'.format(test_acc * 100))
    print('Total training time: {:.3f}ms'.format(tElapse * 1000))
    
"""
Much faster than HW3.1(162.935ms vs 372.966ms)
"""
    

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz
Epoch:   1, Loss: 1.79848647, Accuracy: 26.667%
Epoch:   2, Loss: 1.65778697, Accuracy: 24.667%
Epoch:   3, Loss: 1.51020586, Accuracy: 52.667%
Epoch:   4, Loss: 1.43087649, Accuracy: 48.667%
Epoch:   5, Loss: 1.36191952, Accuracy: 62.000%
Epoch:   6, Loss: 1.30241823, Accuracy: 63.333%
Epoch:   7, Loss: 1.25005722, Accuracy: 65.333%
Epoch:   8, Loss: 1.20372045, Accuracy: 66.000%
Epoch:   9, Loss: 1.16237962, Accuracy: 66.667%
Epoch:  10, Loss: 1.12527347, Accuracy: 66.667%
Epoch:  11, Loss: 1.09177649, Accuracy: 67.333%
Epoch:  12, Loss: 1.06138396, Accuracy: 67.333%
Epoch:  13, Loss: 1.03367960, Accuracy: 68.000%
Epoch:  14, Loss: 1.00831890, Accuracy: 68.000%
Epoch:  15, Loss: 0.98501265, Accuracy: 68.667%
Epoch:  16, Loss: 0.96351725, Accuracy: 69.333%
Epoch:  17, Loss: 0.94362587, Accuracy: 69

In [4]:
# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals

# Common imports
import numpy as np
import os
import tensorflow as tf
from time import time
from tensorflow.contrib.layers import variance_scaling_initializer

# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

reset_graph()

# load data: digits 5 to 9, but still label with 0 to 4, 
# because TensorFlow expects label's integers from 0 to n_classes-1.
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/")

X_train2_full = mnist.train.images[mnist.train.labels >= 5]
y_train2_full = mnist.train.labels[mnist.train.labels >= 5] - 5
X_valid2_full = mnist.validation.images[mnist.validation.labels >= 5]
y_valid2_full = mnist.validation.labels[mnist.validation.labels >= 5] - 5
X_test2 = mnist.test.images[mnist.test.labels >= 5]
y_test2 = mnist.test.labels[mnist.test.labels >= 5] - 5

# we want to keep only 100 instances per class in the training set 
# and let's keep only 30 instances per class in the validation set
# tesing set is already loaded above
def sample_n_instances_per_class(X, y, n=100):
    Xs, ys = [], []
    for label in np.unique(y):
        idx = (y == label)
        Xc = X[idx][:n]
        yc = y[idx][:n]
        Xs.append(Xc)
        ys.append(yc)
    return np.concatenate(Xs), np.concatenate(ys)

X_train2, y_train2 = sample_n_instances_per_class(X_train2_full, y_train2_full, n=100)
X_valid2, y_valid2 = sample_n_instances_per_class(X_valid2_full, y_valid2_full, n=30)

'''
Homework 3-3
'''

#Early stop mechanism
class early_stop():
    def __init__(self, stop_margin, max_epochs):
        self.val_not_better_counter = 0 # Count epoch after best validation error > stop_margin then stop
        self.stop_margin = stop_margin # Count epochs if no progress
        self.last_val_acc = 0 # Record accuracy of last epoch
        self.epoch_counter = 0 # Record epoch count
        self.max_epochs = max_epochs
        
    def monitor(self, val_acc):        
        if(val_acc <= self.last_val_acc):
            # If no progress, counter++
            self.val_not_better_counter += 1
        else:
            self.val_not_better_counter = 0
        self.last_val_acc = val_acc # Record accuracy
        
        self.epoch_counter += 1 # Count epochs
            
    def continue_training(self):
        # Continue training if no progress epochs < stop_margin epochs and current epoch < max_epochs
        return self.val_not_better_counter < self.stop_margin and self.epoch_counter < self.max_epochs
    
num_class = 5
hidden_neuron_size = 128

# Convert label to one-hot encoding matrix
def label_to_onehot(label):
    result = np.empty([label.shape[0], num_class])
    for i in range(label.shape[0]):
        enc = np.zeros(5)
        enc[label[i]] = 1.0
        result[i] = enc
    return result

y_train2 = label_to_onehot(y_train2)
y_valid2 = label_to_onehot(y_valid2)
y_test2 = label_to_onehot(y_test2)

# Load model from homework2
restore_saver = tf.train.import_meta_graph('./Team32_HW2.ckpt.meta')

# Get necessary variables/tensors
x = tf.get_default_graph().get_tensor_by_name('x:0')
y = tf.get_default_graph().get_tensor_by_name('y:0')
y_prob = tf.get_default_graph().get_tensor_by_name('y_prob:0')
# Get 4th hidden layer's tensor
out4 = tf.get_default_graph().get_tensor_by_name('out4:0')

def init_var(name, shape):
    return tf.get_variable(name, shape, dtype=tf.float32, 
                           initializer=variance_scaling_initializer(factor=1.0))
# Build a new softmax layer connected by 4th hidden layer
Wn = init_var("Wn", [hidden_neuron_size, num_class])
bn = init_var("bn", [num_class])
logits = tf.add(tf.matmul(out4, Wn), bn)
y_prob = tf.nn.softmax(logits)
loss_func = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels = y, logits = logits))

# Train only on selected variables
# Only train new softmax layer
training_op = tf.train.GradientDescentOptimizer(1).minimize(loss_func, var_list=[Wn, bn])
    
saver = tf.train.Saver()
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer()) #init
    els = early_stop(stop_margin = 20,
                    max_epochs = 1000)
    
    # Record total training time
    totalTime = 0        
    while(els.continue_training()):
        tStart = time()
        _, loss = sess.run([training_op, loss_func], feed_dict={x: X_train2, y: y_train2})
        totalTime += time() - tStart
        loss = np.mean(loss)
        
        #validate
        y_pre = sess.run(y_prob, feed_dict={x: X_valid2})
        correct_prediction = tf.equal(tf.argmax(y_pre, 1), tf.argmax(y_valid2, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        val_acc = sess.run(accuracy, feed_dict={x: X_valid2})
        
        els.monitor(val_acc)
        print('Epoch: {:3}, Loss: {:.8f}, Accuracy: {:.3f}%'.format(els.epoch_counter, loss, val_acc * 100))
    
    tElapse = time() - tStart
    saver.save(sess, './Team32_HW3_3.ckpt')
    print('Model saved!')
    
    # Run test
    y_pre = sess.run(y_prob, feed_dict={x: X_test2})
    correct_prediction = tf.equal(tf.argmax(y_pre, 1), tf.argmax(y_test2, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    test_acc = sess.run(accuracy, feed_dict={x: X_test2})
    print('Test accuracy: {:.3f}%'.format(test_acc * 100))
    print('Total training time: {:.3f}ms'.format(tElapse * 1000))
    
"""
Better than HW3.2(79.757% vs 77.227%)
"""

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz
Epoch:   1, Loss: 1.73934174, Accuracy: 39.333%
Epoch:   2, Loss: 1.51412582, Accuracy: 52.000%
Epoch:   3, Loss: 1.40322530, Accuracy: 60.000%
Epoch:   4, Loss: 1.31406987, Accuracy: 67.333%
Epoch:   5, Loss: 1.23960829, Accuracy: 68.000%
Epoch:   6, Loss: 1.17652893, Accuracy: 68.000%
Epoch:   7, Loss: 1.12244976, Accuracy: 68.667%
Epoch:   8, Loss: 1.07559156, Accuracy: 69.333%
Epoch:   9, Loss: 1.03460133, Accuracy: 69.333%
Epoch:  10, Loss: 0.99843538, Accuracy: 70.000%
Epoch:  11, Loss: 0.96627951, Accuracy: 72.000%
Epoch:  12, Loss: 0.93749028, Accuracy: 72.667%
Epoch:  13, Loss: 0.91155291, Accuracy: 73.333%
Epoch:  14, Loss: 0.88805163, Accuracy: 74.667%
Epoch:  15, Loss: 0.86664677, Accuracy: 74.667%
Epoch:  16, Loss: 0.84705877, Accuracy: 74.667%
Epoch:  17, Loss: 0.82905573, Accuracy: 74

In [5]:
# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals

# Common imports
import numpy as np
import os
import tensorflow as tf
from time import time
from tensorflow.contrib.layers import variance_scaling_initializer

# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

reset_graph()

# load data: digits 5 to 9, but still label with 0 to 4, 
# because TensorFlow expects label's integers from 0 to n_classes-1.
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/")

X_train2_full = mnist.train.images[mnist.train.labels >= 5]
y_train2_full = mnist.train.labels[mnist.train.labels >= 5] - 5
X_valid2_full = mnist.validation.images[mnist.validation.labels >= 5]
y_valid2_full = mnist.validation.labels[mnist.validation.labels >= 5] - 5
X_test2 = mnist.test.images[mnist.test.labels >= 5]
y_test2 = mnist.test.labels[mnist.test.labels >= 5] - 5

# we want to keep only 100 instances per class in the training set 
# and let's keep only 30 instances per class in the validation set
# tesing set is already loaded above
def sample_n_instances_per_class(X, y, n=100):
    Xs, ys = [], []
    for label in np.unique(y):
        idx = (y == label)
        Xc = X[idx][:n]
        yc = y[idx][:n]
        Xs.append(Xc)
        ys.append(yc)
    return np.concatenate(Xs), np.concatenate(ys)

X_train2, y_train2 = sample_n_instances_per_class(X_train2_full, y_train2_full, n=100)
X_valid2, y_valid2 = sample_n_instances_per_class(X_valid2_full, y_valid2_full, n=30)

'''
Homework 3-4
'''

#Early stop mechanism
class early_stop():
    def __init__(self, stop_margin, max_epochs):
        self.val_not_better_counter = 0 # Count epoch after best validation error > stop_margin then stop
        self.stop_margin = stop_margin # Count epochs if no progress
        self.last_val_acc = 0 # Record accuracy of last epoch
        self.epoch_counter = 0 # Record epoch count
        self.max_epochs = max_epochs
        
    def monitor(self, val_acc):        
        if(val_acc <= self.last_val_acc):
            # If no progress, counter++
            self.val_not_better_counter += 1
        else:
            self.val_not_better_counter = 0
        self.last_val_acc = val_acc # Record accuracy
        
        self.epoch_counter += 1 # Count epochs
            
    def continue_training(self):
        # Continue training if no progress epochs < stop_margin epochs and current epoch < max_epochs
        return self.val_not_better_counter < self.stop_margin and self.epoch_counter < self.max_epochs
    
num_class = 5
hidden_neuron_size = 128

# Convert label to one-hot encoding matrix
def label_to_onehot(label):
    result = np.empty([label.shape[0], num_class])
    for i in range(label.shape[0]):
        enc = np.zeros(5)
        enc[label[i]] = 1.0
        result[i] = enc
    return result

y_train2 = label_to_onehot(y_train2)
y_valid2 = label_to_onehot(y_valid2)
y_test2 = label_to_onehot(y_test2)

# Load model from homework2
restore_saver = tf.train.import_meta_graph('./Team32_HW2.ckpt.meta')

# Get necessary variables/tensors
x = tf.get_default_graph().get_tensor_by_name('x:0')
y = tf.get_default_graph().get_tensor_by_name('y:0')
y_prob = tf.get_default_graph().get_tensor_by_name('y_prob:0')
# Get variables of first and second layers to train
W1 = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='W1')
b1 = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='b1')
W2 = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='W2')
b2 = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='b2')
out4 = tf.get_default_graph().get_tensor_by_name('out4:0')

def init_var(name, shape):
    return tf.get_variable(name, shape, dtype=tf.float32, 
                           initializer=variance_scaling_initializer(factor=1.0))
# Let new weight and bias variables for new softmax layer
Wn = init_var("Wn", [hidden_neuron_size, num_class])
bn = init_var("bn", [num_class])
logits = tf.add(tf.matmul(out4, Wn), bn)
y_prob = tf.nn.softmax(logits)
loss_func = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels = y, logits = logits))

# Train only on selected variables
# Only train first, second hidden layers, and new softmax layer
training_op = tf.train.GradientDescentOptimizer(0.1).minimize(loss_func, var_list=[W1, b1, W2, b2, Wn, bn])
    
saver = tf.train.Saver()
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer()) #init
    els = early_stop(stop_margin = 20,
                    max_epochs = 1000)
    
    # Record total training time
    totalTime = 0        
    while(els.continue_training()):
        tStart = time()
        _, loss = sess.run([training_op, loss_func], feed_dict={x: X_train2, y: y_train2})
        totalTime += time() - tStart
        loss = np.mean(loss)
        
        #validate
        y_pre = sess.run(y_prob, feed_dict={x: X_valid2})
        correct_prediction = tf.equal(tf.argmax(y_pre, 1), tf.argmax(y_valid2, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        val_acc = sess.run(accuracy, feed_dict={x: X_valid2})
        
        els.monitor(val_acc)
        print('Epoch: {:3}, Loss: {:.8f}, Accuracy: {:.3f}%'.format(els.epoch_counter, loss, val_acc * 100))
    
    tElapse = time() - tStart
    saver.save(sess, './Team32_HW3_4.ckpt')
    print('Model saved!')
    
    # Run test
    y_pre = sess.run(y_prob, feed_dict={x: X_test2})
    correct_prediction = tf.equal(tf.argmax(y_pre, 1), tf.argmax(y_test2, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    test_acc = sess.run(accuracy, feed_dict={x: X_test2})
    print('Test accuracy: {:.3f}%'.format(test_acc * 100))
    print('Total training time: {:.3f}ms'.format(tElapse * 1000))
    
"""
Much better than HW3.2(89.961% vs 79.757%)
"""

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz
Epoch:   1, Loss: 1.73934162, Accuracy: 16.000%
Epoch:   2, Loss: 1.61377740, Accuracy: 27.333%
Epoch:   3, Loss: 1.51546288, Accuracy: 48.667%
Epoch:   4, Loss: 1.43313193, Accuracy: 64.667%
Epoch:   5, Loss: 1.35915387, Accuracy: 70.667%
Epoch:   6, Loss: 1.28832805, Accuracy: 74.667%
Epoch:   7, Loss: 1.21826220, Accuracy: 76.667%
Epoch:   8, Loss: 1.14852190, Accuracy: 77.333%
Epoch:   9, Loss: 1.07970595, Accuracy: 78.000%
Epoch:  10, Loss: 1.01282287, Accuracy: 78.000%
Epoch:  11, Loss: 0.94894958, Accuracy: 80.667%
Epoch:  12, Loss: 0.88893229, Accuracy: 82.000%
Epoch:  13, Loss: 0.83328021, Accuracy: 82.000%
Epoch:  14, Loss: 0.78219539, Accuracy: 82.667%
Epoch:  15, Loss: 0.73561192, Accuracy: 82.667%
Epoch:  16, Loss: 0.69328332, Accuracy: 83.333%
Epoch:  17, Loss: 0.65487736, Accuracy: 83