In [None]:
import numpy as np
import os
import tensorflow as tf

###### Do not modify here ###### 

# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

reset_graph()

from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/")

# training on MNIST but only on digits 0 to 4
X_train1 = mnist.train.images[mnist.train.labels < 5]
y_train1 = mnist.train.labels[mnist.train.labels < 5]
X_valid1 = mnist.validation.images[mnist.validation.labels < 5]
y_valid1 = mnist.validation.labels[mnist.validation.labels < 5]
X_test1 = mnist.test.images[mnist.test.labels < 5]
y_test1 = mnist.test.labels[mnist.test.labels < 5]

###### Do not modify here ###### 

# one_hot encoding 0, 1, 2, 3, 4 for all labels
def one_hot_encoding(y):
    tmp_y = np.zeros([y.shape[0], 5])
    for i in range(y.shape[0]):
        tmp_y[i][y[i]] = 1
    return tmp_y

# split dataset into training set and one validation set
def split_folds(indices, Inputs, Labels, cross_validation, fold):
    n = Inputs.shape[0]
    if fold == cross_validation:
        validation_size = n - (int(n/cross_validation) * (cross_validation-1))
        X_train_idx, X_validate_idx = indices[:(n-validation_size)], indices[(n-validation_size):]
        y_train_idx, y_validate_idx = indices[:(n-validation_size)], indices[(n-validation_size):]
    else:
        validation_size = int(n/cross_validation)
        X_train_idx, X_validate_idx = np.concatenate((indices[:validation_size*(fold-1)], indices[validation_size*fold:]), axis=0), indices[(validation_size*(fold-1)):(validation_size*fold)]
        y_train_idx, y_validate_idx = np.concatenate((indices[:validation_size*(fold-1)], indices[validation_size*fold:]), axis=0), indices[(validation_size*(fold-1)):(validation_size*fold)]
    X_train, X_validate = np.array(Inputs[X_train_idx,:]), np.array(Inputs[X_validate_idx,:])
    y_train, y_validate = np.array(Labels[y_train_idx,:]), np.array(Labels[y_validate_idx,:])
    return X_train, y_train, X_validate, y_validate

# split training data into multiple mini-batches
def split_batches(X_train, y_train, batch_size):
#     batch_section = []
#     for i in range(1, int(X_train.shape[0]/batch_size)):
#         batch_section.append(batch_size*i)
#     print(batch_section)
#     X_train = np.array(np.split(X_train, batch_section))
#     y_train = np.array(np.split(y_train, batch_section))
    
    X_train = np.array(np.array_split(X_train, X_train.shape[0]/batch_size))
    y_train = np.array(np.array_split(y_train, y_train.shape[0]/batch_size))
    
    return X_train, y_train

# adding layer function
def add_layer(name, inputs, in_size, out_size, Weights_initializer, activation, dropout_rate):
    Weights = tf.get_variable("Weights"+name, initializer=Weights_initializer, shape=[in_size, out_size], dtype=tf.float32)
    biases = tf.get_variable("biases"+name, initializer=tf.random_uniform_initializer(-1.0, 1.0, dtype=tf.float32), shape=[1, out_size], dtype=tf.float32)
    # input x weight
    XW_plus_b = tf.matmul(inputs, Weights)
    # drop out
    XW_plus_b = tf.nn.dropout(XW_plus_b, (1-dropout_rate))
    # add bias
    XW_plus_b += biases
    
    if activation is None:
        outputs = XW_plus_b
    else:
        outputs = activation(XW_plus_b)
    
    return outputs

def train(Inputs, Labels, train_step, epoch_bound=500, stop_threshold=20, cross_validation=1):
    # randomize dataset
    indices = np.random.permutation(Inputs.shape[0])
    # start 10-folds cross validation
    avg_accuracy = 0.0

    for fold in range(1, cross_validation+1):
        sess.run(init)
        if cross_validation > 1:
            X_train, y_train, X_validate, y_validate = split_folds(indices, Inputs, Labels, cross_validation, fold)
        else:
            X_train, y_train, X_validate, y_validate = split_folds(indices, Inputs, Labels, 10, fold)
        
        early_stop = 0
        winner_accuracy = 0.0
        winner_precision = 0.0
        winner_recall = 0.0

        for epoch in range(epoch_bound):

            # randomize training set
            indices_training = np.random.permutation(X_train.shape[0])
            X_train, y_train = X_train[indices_training,:], y_train[indices_training,:]

            # split training set into multiple mini-batches
            X_train_batches, y_train_batches = split_batches(X_train, y_train, batch_size)
            
            # start training
#             for batch in range(X_train_batches.shape[0]):
#                 sess.run(train_step, feed_dict={x: X_train_batches[batch], y: y_train_batches[batch]})
            # close mini batch
            sess.run(train_step, feed_dict={x:X_train, y:y_train})
                     
            # compute validation accuracy
            cur_accuracy, cur_precision, cur_recall = evaluate(X_validate, y_validate)
            # print("Average accuracy of", epoch," epoch accuracy:", cur_accuracy)

            # If the accuracy rate does not increase for consecutive times, it will early stop epochs-loop 
            if winner_accuracy < cur_accuracy:
                early_stop = 0
                winner_accuracy = cur_accuracy
                winner_precision = cur_precision
                winner_recall = cur_recall     
            else:
                early_stop += 1
            if early_stop == stop_threshold:
                break

        avg_accuracy += winner_accuracy
        print("Fold:", fold, "epoch:", epoch+1, "batch:", batch+1)
        print("  accuracy:", winner_accuracy)
        print("  precision:", winner_precision)
        print("  recall:", winner_recall)

    avg_accuracy /= cross_validation
# evaluate model: compute accuracy, precision, recall
def evaluate(Inputs, Labels):
    global outputs
    y_predict = sess.run(outputs, feed_dict={x: Inputs})
    correct_prediction = tf.equal(tf.argmax(y_predict, 1), tf.argmax(Labels, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    accuracy = sess.run(accuracy)
    
    all_precision = []
    all_recall = []
    for i in range(y_validate.shape[1]):
        pred_y = tf.argmax(y_predict, 1)
        true_y = tf.argmax(Labels, 1)
        
        pred_y_equal_i = tf.cast(tf.equal(pred_y, tf.constant(np.full((pred_y.shape),i))), tf.float32)
        true_y_equal_i = tf.cast(tf.equal(true_y, tf.constant(np.full((true_y.shape),i))), tf.float32)

        TP_TF = tf.count_nonzero(pred_y_equal_i)
        TP_TN = tf.count_nonzero(true_y_equal_i)
        TP = tf.count_nonzero(tf.multiply(pred_y_equal_i,true_y_equal_i))
        
        precision = tf.divide(TP,TP_TF)
        recall = tf.divide(TP, TP_TN)
        
        all_precision.append(sess.run(precision))
        all_recall.append(sess.run(recall))
        
    return accuracy, all_precision, all_recall

########## Data Processing ##########
# one_hot encoding 0, 1, 2, 3, 4 for all labels
y_train1 = one_hot_encoding(y_train1)
y_valid1 = one_hot_encoding(y_valid1)
y_test1 = one_hot_encoding(y_test1)

Inputs = np.concatenate((X_train1, X_valid1), axis=0)
Labels = np.concatenate((y_train1, y_valid1), axis=0)
########## Data Processing ##########

########## Define hyper-parameters ##########
learning_rate = 0.01
dropout_rate = 0.3
batch_size = 128
epoch_bound = 500
cross_validation = 10
stop_threshold = 20
########## Define hyper-parameters ##########

########## Build DNN ##########
# define placeholder for inputs and average accuray
x = tf.placeholder(tf.float32, [None, Inputs.shape[1]])
y = tf.placeholder(tf.float32, [None, 5])

# add 5 hidden layers with 128 neurals each
h1 = add_layer("h1", x, Inputs.shape[1], 128, tf.contrib.layers.variance_scaling_initializer(), tf.nn.elu, dropout_rate)
h2 = add_layer("h2", h1, 128, 128, tf.contrib.layers.variance_scaling_initializer(), tf.nn.elu, dropout_rate)
h3 = add_layer("h3", h2, 128, 128, tf.contrib.layers.variance_scaling_initializer(), tf.nn.elu, dropout_rate)
h4 = add_layer("h4", h3, 128, 128, tf.contrib.layers.variance_scaling_initializer(), tf.nn.elu, dropout_rate)
h5 = add_layer("h5", h4, 128, 128, tf.contrib.layers.variance_scaling_initializer(), tf.nn.elu, dropout_rate)

# add outputs softmax layer with 5 neurals
outputs = add_layer("outputs", h5, 128, 5, tf.contrib.layers.variance_scaling_initializer(), tf.nn.softmax, 0.0)

# cross entropy loss function
cross_entropy = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits=outputs, labels=y))

# training iteration and define Adam optimizer with learning rate
train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy)
########## Build DNN ##########

#start TF session
init = tf.global_variables_initializer()
sess = tf.Session()

# train
# train(Inputs, Labels, train_step, epoch_bound, stop_threshold, cross_validation)

# saver = tf.train.Saver()
# save_path = saver.save(sess, "saved_model/DNN.ckpt")
# print("Average accuracy of", cross_validation,"fold cross validation:", avg_accuracy)

########## Test ##########
sess.run(init)
train(Inputs, Labels, train_step, epoch_bound, stop_threshold)
test_accuracy, test_precision, test_recall = evaluate(X_test1, y_test1)
print("Test result")
print("  accuracy:", test_accuracy)
print("  precision:", test_precision)
print("  recall:", test_recall)
########## Test ##########

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz
