# Import Statements

In [1]:
from numpy.random import seed
seed(888)
import tensorflow as tf
tf.random.set_seed(404)

In [2]:
import os
import numpy as np
import tensorflow as tf

from time import strftime

# Constants

In [3]:
X_TRAIN_PATH = 'MNIST/digit_xtrain.csv'
X_TEST_PATH = 'MNIST/digit_xtest.csv'
Y_TRAIN_PATH = 'MNIST/digit_ytrain.csv'
Y_TEST_PATH = 'MNIST/digit_ytest.csv'
LOGGING_PATH = 'tensorbaord_mnist_digits_logs/'

NR_CLASSES = 10
VALIDATION_SIZE = 10000

IMAGE_HEIGHT = 28
IMAGE_WIDTH = 28
CHANNELS = 1
TOTAL_INPUTS = IMAGE_HEIGHT*IMAGE_WIDTH*CHANNELS

# Gather Data

In [4]:
%%time
y_train_all = np.loadtxt(Y_TRAIN_PATH, delimiter=',', dtype=int)

CPU times: total: 62.5 ms
Wall time: 183 ms


In [5]:
y_train_all.shape

(60000,)

In [6]:
y_test = np.loadtxt(Y_TEST_PATH, delimiter=',', dtype=int)

In [7]:
%%time
x_train_all = np.loadtxt(X_TRAIN_PATH, delimiter=',', dtype=int)


CPU times: total: 27.6 s
Wall time: 28.5 s


In [8]:
%%time
x_test = np.loadtxt(X_TEST_PATH, delimiter=',', dtype=int)

CPU times: total: 2.8 s
Wall time: 3.01 s


# Data Exploration

In [9]:
x_train_all.shape
# 784 => 28(width) * 28(height) * 1(nr. of channels)

(60000, 784)

In [10]:
x_train_all[0]
# 0 => very white, 255 => very black,in-between => shades of gray

array([  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   3,  18,  18,  18,
       126, 136, 175,  26, 166, 255, 247, 127,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,  30,  36,  94, 154, 17

In [11]:
x_test.shape

(10000, 784)

In [12]:
y_train_all.shape

(60000,)

In [13]:
y_train_all[:5]

array([5, 0, 4, 1, 9])

# Data Preprocessing

#### Re-Scaling

In [14]:
x_train_all, x_test = x_train_all / 255.0, x_test / 255.0

## Converting Target Values to One-Hot Encoding

In [15]:
np.eye(10)[y_train_all[:5]]
# the np.eye() function takes nr. of rows as a parameter and then creates an array with the given nr.
# of rows with 1 going diagonally and the rest being zeros. However, in our usage we add an array 
# of values i.e [y_train_all[:5]], and the value at the position of the index of 
# the value in [y_train_all[:5]] is replaced with 1

array([[0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]])

In [16]:
y_train_all = np.eye(NR_CLASSES)[y_train_all]

In [17]:
y_test = np.eye(NR_CLASSES)[y_test]
y_test.shape

(10000, 10)

In [18]:
y_train_all.shape

(60000, 10)

## Creating Validation Dataset from Training Dataset

In [19]:
x_val = x_train_all[:VALIDATION_SIZE]
y_val = y_train_all[:VALIDATION_SIZE]
x_val.shape

(10000, 784)

In [20]:
x_train = x_train_all[VALIDATION_SIZE:]
y_train = y_train_all[VALIDATION_SIZE:]
y_train.shape

(50000, 10)

# Setup Tensorflow Graph

In [43]:
tf.compat.v1.disable_eager_execution()
X = tf.compat.v1.placeholder(tf.float32, shape=[None, TOTAL_INPUTS], name='X')
Y = tf.compat.v1.placeholder(tf.float32, shape=[None, NR_CLASSES], name='Labels')

## Neural Network Architecture
### Hyperparameters

In [44]:
nr_epochs = 5
learning_rate = 1e-4

n_hidden1 = 512
n_hidden2 = 64

In [45]:
initial_w1 = tf.compat.v1.truncated_normal(shape=[TOTAL_INPUTS, n_hidden1], stddev=0.1, seed=42, name='w1')
w1 = tf.Variable(initial_value=initial_w1)

initial_b1 = tf.constant(value=0.0, shape=[n_hidden1], name='b1')
b1 = tf.Variable(initial_value=initial_b1)

layer1_in = tf.matmul(X, w1) + b1
layer1_out = tf.nn.relu(layer1_in)

In [46]:
initial_w2 = tf.compat.v1.truncated_normal(shape=[n_hidden1, n_hidden2], stddev=0.1, seed=42, name='w2')
w2 = tf.Variable(initial_value=initial_w2)

initial_b2 = tf.constant(value=0.0, shape=[n_hidden2], name='b2')
b2 = tf.Variable(initial_value=initial_b2)

layer2_in = tf.matmul(layer1_out, w2) + b2
layer2_out = tf.nn.relu(layer2_in)

In [47]:
initial_w3 = tf.compat.v1.truncated_normal(shape=[n_hidden2, NR_CLASSES], stddev=0.1, seed=42, name='w3')
w3 = tf.Variable(initial_value=initial_w3)

initial_b3 = tf.constant(value=0.0, shape=[NR_CLASSES], name='b3')
b3 = tf.Variable(initial_value=initial_b3)

layer3_in = tf.matmul(layer2_out, w3) + b3
output = tf.nn.softmax(layer3_in)

# Setup Tensorboard

In [48]:
folder_name = f"Model on {strftime('%d')}-{strftime('%b')} at {strftime('%H')}_{strftime('%M')}"
directory = os.path.join(LOGGING_PATH, folder_name)
try:
    os.makedirs(directory)
except OSError as err:
    print(err.strerror)
else:
    print('Successfully created directory')

Successfully created directory


# Loss, Optimisation & Metrics

### Defining Loss Function

In [49]:
loss = tf.reduce_mean(tf.compat.v1.nn.softmax_cross_entropy_with_logits_v2(labels=Y, logits=output))

### Defining Optimizer

In [50]:
optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=learning_rate)
train_step = optimizer.minimize(loss)

### Accuracy Metric

In [51]:
correct_pred = tf.equal(tf.argmax(output, axis=1), tf.argmax(Y, axis=1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, dtype=tf.float32))

In [52]:
tf.compat.v1.summary.scalar('cost', loss)
tf.compat.v1.summary.scalar('accuracy', accuracy)

<tf.Tensor 'accuracy:0' shape=() dtype=string>

# Run  Session

#### Initialize Global Variables

In [53]:
sess = tf.compat.v1.Session()

#### Setup Filewriter and merge  summaries

In [54]:
merged_summary = tf.compat.v1.summary.merge_all()

train_writer = tf.compat.v1.summary.FileWriter(directory + '/train')
train_writer.add_graph(sess.graph)

validation_writer = tf.compat.v1.summary.FileWriter(directory + '/validation')

In [55]:
# initialize global variables
init = tf.compat.v1.global_variables_initializer()
sess.run(init)

In [56]:
w3.eval(sess)

array([[-0.02807751, -0.01377521, -0.06763297,  0.00245804, -0.04684547,
        -0.00024663, -0.09745911,  0.06638492,  0.04368011, -0.07038976],
       [ 0.06426843,  0.14513893,  0.1841282 , -0.01587993, -0.10607921,
         0.15984018, -0.01142471,  0.14045748, -0.00587858, -0.04244646],
       [-0.03702376, -0.05268839, -0.03103511, -0.05996868, -0.00144826,
         0.19438368, -0.05893153,  0.115643  ,  0.1053272 ,  0.05255   ],
       [ 0.08373224, -0.01663086, -0.01385766,  0.04728002,  0.08347194,
        -0.03172532,  0.12180098, -0.03985466,  0.06396734, -0.06321164],
       [-0.04811501, -0.0591412 ,  0.04668889,  0.1818193 , -0.10811663,
        -0.0360087 ,  0.16283922,  0.05718798,  0.01851236,  0.02780658],
       [ 0.0415091 , -0.1065625 ,  0.1417127 ,  0.0806262 , -0.06378367,
        -0.05958629,  0.07530966, -0.02142833, -0.0680046 ,  0.19922827],
       [ 0.02385167, -0.07702351, -0.09945031,  0.10000172, -0.01865594,
         0.0217609 ,  0.01489197,  0.00750957

## Batching the Data

In [57]:
size_of_batch = 1000

In [58]:
nr_examples = y_train.shape[0]
nr_iterations = int(nr_examples/size_of_batch)

index_in_epoch = 0

In [59]:
def get_next_batch(batch_size, data, labels):
    
    global nr_examples
    global index_in_epoch
    
    start = index_in_epoch
    index_in_epoch += batch_size
    
    if index_in_epoch > nr_examples:
        start = 0
        index_in_epoch = batch_size
    
    end = index_in_epoch
    
    return data[start:end], labels[start:end]

## Training Loop

In [60]:
for epoch  in range(nr_epochs):
    #------------------------training-------------------------------
    for i in range(nr_iterations):
        batch_x, batch_y = get_next_batch(batch_size=size_of_batch, data=x_train, labels=y_train)
        feed_dictionary = {X:batch_x, Y:batch_y}
        sess.run(train_step, feed_dict=feed_dictionary)
    su, batch_accuracy = sess.run(fetches=[merged_summary, accuracy], feed_dict=feed_dictionary)
    train_writer.add_summary(su, epoch)
        
    print(f"Epoch {epoch} ------------------------------- Training Accuracy {batch_accuracy}")
    #--------------------------validation-------------------------------
    summary = sess.run(fetches=merged_summary, feed_dict={X:x_val, Y:y_val})
    validation_writer.add_summary(summary, epoch)
print('Training Finished!')
        

Epoch 0 ------------------------------- Training Accuracy 0.4050000011920929
Epoch 1 ------------------------------- Training Accuracy 0.7329999804496765
Epoch 2 ------------------------------- Training Accuracy 0.8190000057220459
Epoch 3 ------------------------------- Training Accuracy 0.843999981880188
Epoch 4 ------------------------------- Training Accuracy 0.8560000061988831
Training Finished!


# Reset for Next Iteration

In [61]:
train_writer.close()
validation_writer.close()
sess.close()
tf.compat.v1.reset_default_graph()