In [None]:
import numpy as np
import os
import tensorflow as tf

###### Do not modify here ###### 

# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

reset_graph()

from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/")

# training on MNIST but only on digits 0 to 4
X_train1 = mnist.train.images[mnist.train.labels < 5]
y_train1 = mnist.train.labels[mnist.train.labels < 5]
X_valid1 = mnist.validation.images[mnist.validation.labels < 5]
y_valid1 = mnist.validation.labels[mnist.validation.labels < 5]
X_test1 = mnist.test.images[mnist.test.labels < 5]
y_test1 = mnist.test.labels[mnist.test.labels < 5]

###### Do not modify here ###### 

# one_hot encoding 0, 1, 2, 3, 4 for all labels
def one_hot_encoding(y):
    tmp_y = np.zeros([y.shape[0], 5])
    for i in range(y.shape[0]):
        tmp_y[i][y[i]] = 1
    return tmp_y

# split dataset into mini-batches of training set and one validation set
def split_batches(indices, Inputs, Labels, folds_validation, fold, batch_size):
    n = Inputs.shape[0]
    
    if fold == folds_validation:
        validation_size = n - (int(n/folds_validation) * (folds_validation-1))
        training_idx, validation_idx = indices[:(n-validation_size)], indices[(n-validation_size):]
        training_X, validation_X = Inputs[training_idx,:], Inputs[validation_idx,:]
        training_idy, validation_idy = indices[:(n-validation_size)], indices[(n-validation_size):]
        training_y, validation_y = Labels[training_idy,:], Labels[validation_idy,:]
    else:
        validation_size = int(n/folds_validation)
        training_idx, validation_idx = np.concatenate((indices[:validation_size*(fold-1)], indices[validation_size*fold:]), axis=0), indices[(validation_size*(fold-1)):(validation_size*fold)]
        training_X, validation_X = Inputs[training_idx,:], Inputs[validation_idx,:]
        training_idy, validation_idy = np.concatenate((indices[:validation_size*(fold-1)], indices[validation_size*fold:]), axis=0), indices[(validation_size*(fold-1)):(validation_size*fold)]
        training_y, validation_y = Labels[training_idy,:], Labels[validation_idy,:]
    
    batch_section = []
    for i in range(1, int((n-validation_size)/batch_size)):
        batch_section.append(128*i)
    training_X = np.split(training_X, batch_section)
    training_y = np.split(training_y, batch_section)
                   
    return training_X, training_y, validation_X, validation_y
    

# adding layer function
def add_layer(name, inputs, in_size, out_size, Weights_initializer, activation, dropout):
    Weights = tf.get_variable("Weights"+name, initializer=Weights_initializer, shape=[in_size, out_size], dtype=tf.float32)
    biases = tf.get_variable("biases"+name, initializer=tf.random_uniform_initializer(-1.0, 1.0, dtype=tf.float32), shape=[1, out_size], dtype=tf.float32)
    XW_plus_b = tf.matmul(inputs, Weights) + biases
    # add dropout
    XW_plus_b = tf.nn.dropout(XW_plus_b, (1-dropout))
    if activation is None:
        outputs = XW_plus_b
    else:
        outputs = activation(XW_plus_b)
    return outputs

# computing accuravy function
def compute_accuracy(Valid_X, Valid_y):
    global outputs
    y_pre = sess.run(outputs, feed_dict={x: Valid_X, y: Valid_y})
    correct_prediction = tf.equal(tf.argmax(y_pre, 1), tf.argmax(Valid_y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    result = sess.run(accuracy, feed_dict={x: Valid_X, y: Valid_y})
    return result

# one_hot encoding 0, 1, 2, 3, 4 for all labels
y_train1 = one_hot_encoding(y_train1)
y_valid1 = one_hot_encoding(y_valid1)
y_test1 = one_hot_encoding(y_test1)

Inputs = np.concatenate((X_train1, X_valid1), axis=0)
Labels = np.concatenate((y_train1, y_valid1), axis=0)
print(y_train1.shape)
print(y_valid1.shape)
print(y_test1.shape)
print(Inputs.shape)
print(Labels.shape)


# define hyper-parameters
learning_rate = 0.01
batch_size = 128
num_epochs = 500
folds_validation = 5

# define placeholder for inputs and the best accuracy variable
x = tf.placeholder(tf.float32, [None, Inputs.shape[1]])
y = tf.placeholder(tf.float32, [None, 5])
winner_accuracy = 0.0

# add 5 hidden layers with 128 neurals each
h1 = add_layer("h1", x, Inputs.shape[1], 128, tf.contrib.layers.variance_scaling_initializer(factor=1.0, mode="FAN_IN", uniform=True), tf.nn.elu, 0.3)
h2 = add_layer("h2", h1, 128, 128, tf.contrib.layers.variance_scaling_initializer(factor=1.0, mode="FAN_IN", uniform=True), tf.nn.elu, 0.3)
h3 = add_layer("h3", h2, 128, 128, tf.contrib.layers.variance_scaling_initializer(factor=1.0, mode="FAN_IN", uniform=True), tf.nn.elu, 0.3)
h4 = add_layer("h4", h3, 128, 128, tf.contrib.layers.variance_scaling_initializer(factor=1.0, mode="FAN_IN", uniform=True), tf.nn.elu, 0.3)
h5 = add_layer("h5", h4, 128, 128, tf.contrib.layers.variance_scaling_initializer(factor=1.0, mode="FAN_IN", uniform=True), tf.nn.elu, 0.3)

# add outputs softmax layer with 5 neurals
outputs = add_layer("outputs", h5, 128, 5, tf.contrib.layers.variance_scaling_initializer(factor=1.0, mode="FAN_IN", uniform=True), tf.nn.softmax, 0.0)

# cross entropy loss function
cross_entropy = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits=outputs, labels=y))

# training iteration and define Adam optimizer with learning rate 0.01
train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy)

# employ early stopping
saver = tf.train.Saver()

#start TF session
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
for epoch in range(num_epochs):
    print("epoch:", epoch)
    # randomize dataset
    indices = np.random.permutation(Inputs.shape[0])
    # start 10-folds cross validation
    for fold in range(1, folds_validation+1):
        training_X, training_y, validation_X, validation_y = split_batches(indices, Inputs, Labels, folds_validation, fold, batch_size) 
        for batch in range(len(training_X)):
            sess.run(train_step, feed_dict={x: training_X[batch], y: training_y[batch]})
        cur_accuracy = compute_accuracy(validation_X, validation_y)
        if winner_accuracy < cur_accuracy:
            winner_accuracy = cur_accuracy
            save_path = saver.save(sess, "saved_model/DNN.ckpt")
            print("epoch:", epoch, "fold:", fold, "batch:", batch, "winner_accuracy:", winner_accuracy)
        
        