In [103]:
import tensorflow as tf
import numpy as np
from sklearn.datasets import fetch_mldata
from sklearn.model_selection import train_test_split, KFold
from tensorflow.contrib.layers import fully_connected
from tensorflow.contrib.framework import arg_scope
from tensorflow.contrib.layers import dropout

In [104]:
mnist = fetch_mldata('MNIST original')
mnist_X = mnist.data/255 #normalized
mnist_y = mnist.target

mnist_04_X = []
mnist_04_y = []

for i in range(mnist_X.shape[0]):
    if mnist_y[i] >= 0 and mnist_y[i] <= 4: #filtering out digits from 0 to 4 inclusively
        mnist_04_X.append(mnist_X[i])
        mnist_04_y.append(mnist_y[i])
    
mnist_04_X = np.array(mnist_04_X)
mnist_04_y = np.array(mnist_04_y)

In [105]:
train_X, test_X, train_y, test_y = train_test_split(mnist_04_X, mnist_04_y, test_size=0.15, shuffle=True)
train_X, val_X, train_y, val_y = train_test_split(train_X, train_y, test_size=0.18, shuffle=True)

In [106]:
print('train_X.shape',train_X.shape)
print('train_y.shape',train_y.shape, "----->", round(train_X.shape[0]/mnist_04_X.shape[0], 3), "%")
print('val_X.shape',val_X.shape)
print('val_y.shape',val_y.shape, "----->", round(val_X.shape[0]/mnist_04_X.shape[0], 3), "%")
print('test_X.shape',test_X.shape)
print('test_y.shape',test_y.shape, "----->", round(test_X.shape[0]/mnist_04_X.shape[0], 3), "%")

train_X.shape (24906, 784)
train_y.shape (24906,) -----> 0.697 %
val_X.shape (5468, 784)
val_y.shape (5468,) -----> 0.153 %
test_X.shape (5361, 784)
test_y.shape (5361,) -----> 0.15 %


In [121]:
n = train_X.shape[1] #num of features
n_classes = 5 #number of classes to predict
num_neurons = 100 #num of neurons in each hidden layer
learning_rate = 0.002
n_epochs = 200
batch_size = 64
keep_prob = 0.65
max_no_winner = 6000 #if no validation loss improvement within max_no_winner steps, stop learning

In [108]:
def random_batch(X, y, batch_size):
    
    idx = np.random.randint(0, X.shape[0], batch_size)
    batch_X = X[idx]
    batch_y = y[idx]
    
    return batch_X, batch_y

In [109]:
from datetime import datetime

def logdir_generate(mode):
    root_logdir = "tf_logs"
    now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
    logdir = "{}/{}/run-{}/".format(root_logdir, mode, now)
    
    return logdir

In [122]:
tf.reset_default_graph()
X = tf.placeholder(tf.float32, shape=(None, n), name='X')
y = tf.placeholder(tf.int64, shape=(None), name='y')
is_training = tf.placeholder(tf.bool, shape=(), name="is_training")

bn_params = {
    'is_training': is_training,
    'decay': 0.99,
    'updates_collections': None,
    'scale': True
}

with tf.name_scope('DNN'):
    with arg_scope([fully_connected], activation_fn=tf.nn.elu, 
                   weights_initializer=tf.contrib.layers.xavier_initializer(),
                   normalizer_fn=tf.contrib.layers.batch_norm,
                   normalizer_params=bn_params):
        X_drop = dropout(X, keep_prob, is_training=is_training)
        
        hidden1 = fully_connected(X_drop, num_neurons)
        hidden1_drop = dropout(hidden1, keep_prob, is_training=is_training)
        
        hidden2 = fully_connected(hidden1_drop, num_neurons)
        hidden2_drop = dropout(hidden2, keep_prob, is_training=is_training)
        
        hidden3 = fully_connected(hidden2_drop, num_neurons)
        hidden3_drop = dropout(hidden3, keep_prob, is_training=is_training)
        
        hidden4 = fully_connected(hidden3_drop, num_neurons)
        hidden4_drop = dropout(hidden4, keep_prob, is_training=is_training)
        
        hidden5 = fully_connected(hidden4_drop, num_neurons)
        hidden5_drop = dropout(hidden5, keep_prob, is_training=is_training)
        
        logits = fully_connected(hidden5_drop, n_classes, activation_fn=None)

with tf.name_scope('loss'):
    xentrophy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentrophy, name='loss')

with tf.name_scope('train'):
    optimizer = tf.train.AdamOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

with tf.name_scope('eval'):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

#exporting useful values to Tensorboard
loss_summary = tf.summary.scalar("loss", loss)
accuracy_summary = tf.summary.scalar("accuracy", accuracy)
file_writer_train = tf.summary.FileWriter(logdir_generate("train"), tf.get_default_graph())
file_writer_test = tf.summary.FileWriter(logdir_generate("test"), tf.get_default_graph())

init = tf.global_variables_initializer()

#creating a saver to save our model
saver = tf.train.Saver()

In [123]:
step = 0
no_winner = 0
stop_learning = False
a = 0

with tf.Session() as sess:
    n_batches = int(np.ceil(train_X.shape[0] / batch_size))
    
    print("STARTING TO TRAIN A NEW MODEL!")

    sess.run(init)
    
    #getting initial losses and accuracies
    summary_loss_train = loss_summary.eval(feed_dict={is_training: False, X: train_X, y: train_y})
    summary_loss_test = loss_summary.eval(feed_dict={is_training: False, X: val_X, y: val_y})
    summary_accuracy_train = accuracy_summary.eval(feed_dict={is_training: False, X: train_X, y: train_y})
    summary_accuracy_val = accuracy_summary.eval(feed_dict={is_training: False, X: val_X, y: val_y})
    file_writer_train.add_summary(summary_loss_train, 0)
    file_writer_train.add_summary(summary_accuracy_train, 0)
    file_writer_test.add_summary(summary_loss_test, 0)
    file_writer_test.add_summary(summary_accuracy_val, 0)
    
    #early stopping, initializing winner
    winner = loss.eval(feed_dict={is_training: False, X: val_X, y: val_y})

    for epoch in range(n_epochs):
        
        for i in range(n_batches):
            batch_X, batch_y = random_batch(train_X, train_y, batch_size)
            sess.run(training_op, feed_dict={is_training: True, X: batch_X, y: batch_y})
            step += 1
            
            #early stopping
            if step % 100 == 0:
                loss_val = loss.eval(feed_dict={is_training: False, X: val_X, y: val_y})
                if winner > loss_val:
                    winner = loss_val
                    a = step
                    no_winner = 0
                    save_path = saver.save(sess, "tmp/winner_model")
                else:
                    no_winner += 100
                if max_no_winner < no_winner:
                    stop_learning = True
                    break
            #-----------------
        if stop_learning: #if there was early stopping
            break

        if epoch % 1 == 0:
            summary_loss_train = loss_summary.eval(feed_dict={is_training: False, is_training: False, X: batch_X, y: batch_y})
            summary_loss_test = loss_summary.eval(feed_dict={is_training: False, X: test_X, y: test_y})
            summary_accuracy_train = accuracy_summary.eval(feed_dict={is_training: False, X: train_X, y: train_y})
            summary_accuracy_val = accuracy_summary.eval(feed_dict={is_training: False, X: val_X, y: val_y})
            file_writer_train.add_summary(summary_loss_train, step)
            file_writer_train.add_summary(summary_accuracy_train, step)
            file_writer_test.add_summary(summary_loss_test, step)
            file_writer_test.add_summary(summary_accuracy_val, step)
            
        if epoch % 10 == 0:
            save_path = saver.save(sess, "tmp/my_model")

    tr_acc = accuracy.eval(feed_dict={is_training: False, X: train_X, y: train_y})
    val_acc = accuracy.eval(feed_dict={is_training: False, X: val_X, y: val_y})
    
print("------------------")
print("End of training!")
print("------------------")
print("Training accuracy:",tr_acc)
print("Validation accuracy:",val_acc)

STARTING TO TRAIN A NEW MODEL!
------------------
End of training!
------------------
Training accuracy: 0.998073
Validation accuracy: 0.991222


In [124]:
winner #winner model's loss

0.024811031

In [129]:
with tf.Session() as sess:
    saver.restore(sess, "tmp/winner_model")
    acc = accuracy.eval(feed_dict={is_training: False, X: val_X, y: val_y})

INFO:tensorflow:Restoring parameters from tmp/winner_model


In [130]:
acc

0.99213606