# Import Statements

In [223]:
from numpy.random import seed
seed(888)
import tensorflow as tf
tf.random.set_seed(404)

In [224]:
import os
import numpy as np
import tensorflow as tf

from time import strftime

# Constants

In [225]:
X_TRAIN_PATH = 'MNIST/digit_xtrain.csv'
X_TEST_PATH = 'MNIST/digit_xtest.csv'
Y_TRAIN_PATH = 'MNIST/digit_ytrain.csv'
Y_TEST_PATH = 'MNIST/digit_ytest.csv'
LOGGING_PATH = 'tensorbaord_mnist_digits_logs/'

NR_CLASSES = 10
VALIDATION_SIZE = 10000

IMAGE_HEIGHT = 28
IMAGE_WIDTH = 28
CHANNELS = 1
TOTAL_INPUTS = IMAGE_HEIGHT*IMAGE_WIDTH*CHANNELS

# Gather Data

In [226]:
%%time
y_train_all = np.loadtxt(Y_TRAIN_PATH, delimiter=',', dtype=int)

CPU times: total: 78.1 ms
Wall time: 60 ms


In [227]:
y_train_all.shape

(60000,)

In [228]:
y_test = np.loadtxt(Y_TEST_PATH, delimiter=',', dtype=int)

In [229]:
%%time
x_train_all = np.loadtxt(X_TRAIN_PATH, delimiter=',', dtype=int)


CPU times: total: 34.2 s
Wall time: 35 s


In [230]:
%%time
x_test = np.loadtxt(X_TEST_PATH, delimiter=',', dtype=int)

CPU times: total: 5.77 s
Wall time: 5.86 s


# Data Exploration

In [231]:
x_train_all.shape
# 784 => 28(width) * 28(height) * 1(nr. of channels)

(60000, 784)

In [232]:
x_train_all[0]
# 0 => very white, 255 => very black,in-between => shades of gray

array([  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   3,  18,  18,  18,
       126, 136, 175,  26, 166, 255, 247, 127,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,  30,  36,  94, 154, 17

In [233]:
x_test.shape

(10000, 784)

In [234]:
y_train_all.shape

(60000,)

In [235]:
y_train_all[:5]

array([5, 0, 4, 1, 9])

# Data Preprocessing

#### Re-Scaling

In [236]:
x_train_all, x_test = x_train_all / 255.0, x_test / 255.0

## Converting Target Values to One-Hot Encoding

In [237]:
np.eye(10)[y_train_all[:5]]
# the np.eye() function takes nr. of rows as a parameter and then creates an array with the given nr.
# of rows with 1 going diagonally and the rest being zeros. However, in our usage we add an array 
# of values i.e [y_train_all[:5]], and the value at the position of the index of 
# the value in [y_train_all[:5]] is replaced with 1

array([[0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]])

In [238]:
y_train_all = np.eye(NR_CLASSES)[y_train_all]

In [239]:
y_test = np.eye(NR_CLASSES)[y_test]
y_test.shape

(10000, 10)

In [240]:
y_train_all.shape

(60000, 10)

## Creating Validation Dataset from Training Dataset

In [241]:
x_val = x_train_all[:VALIDATION_SIZE]
y_val = y_train_all[:VALIDATION_SIZE]
x_val.shape

(10000, 784)

In [242]:
x_train = x_train_all[VALIDATION_SIZE:]
y_train = y_train_all[VALIDATION_SIZE:]
y_train.shape

(50000, 10)

# Setup Tensorflow Graph

In [363]:
tf.compat.v1.disable_eager_execution()
X = tf.compat.v1.placeholder(tf.float32, shape=[None, TOTAL_INPUTS], name='X')
Y = tf.compat.v1.placeholder(tf.float32, shape=[None, NR_CLASSES], name='Labels')

## Neural Network Architecture
### Hyperparameters

In [364]:
nr_epochs = 50
learning_rate = 1e-3

n_hidden1 = 512
n_hidden2 = 64

In [365]:
def setup_layer(input_, weight_dim, bias_dim, name):
    with tf.name_scope(name):
        initial_w = tf.compat.v1.truncated_normal(shape=weight_dim, stddev=0.1, seed=42, name='w')
        w = tf.Variable(initial_value=initial_w)

        initial_b = tf.constant(value=0.0, shape=[bias_dim], name='b')
        b = tf.Variable(initial_value=initial_b)

        layer_in = tf.matmul(input_, w) + b
        if name == 'out':
            layer_output = tf.nn.softmax(layer_in)
        else:
            layer_output = tf.nn.relu(layer_in)
        
        tf.compat.v1.summary.histogram('weights', w)
        tf.compat.v1.summary.histogram('biases', b)
        return layer_output

In [366]:
# layer_1 = setup_layer(input_=X, weight_dim=[TOTAL_INPUTS, n_hidden1],
#                       bias_dim=n_hidden1, name='layer_1')

# layer_dropout = tf.compat.v1.nn.dropout(layer_1, keep_prob=0.8)

# layer_2 = setup_layer(input_=layer_dropout, weight_dim=[n_hidden1, n_hidden2],
#                      bias_dim=n_hidden2, name='layer_2')

# output = setup_layer(input_=layer_2, weight_dim=[n_hidden2, NR_CLASSES],
#                     bias_dim=NR_CLASSES, name='out')

# model_name = f"{n_hidden1}-{n_hidden2}-DO-E{nr_epochs} LR{learning_rate}"

In [367]:
# With DropOut

layer_1 = setup_layer(input_=X, weight_dim=[TOTAL_INPUTS, n_hidden1],
                      bias_dim=n_hidden1, name='layer_1')

layer_dropout = tf.compat.v1.nn.dropout(layer_1, keep_prob=0.8)

layer_2 = setup_layer(input_=layer_dropout, weight_dim=[n_hidden1, n_hidden2],
                     bias_dim=n_hidden2, name='layer_2')

output = setup_layer(input_=layer_2, weight_dim=[n_hidden2, NR_CLASSES],
                    bias_dim=NR_CLASSES, name='out')

model_name = f"{n_hidden1}-{n_hidden2}-DO-E{nr_epochs} LR{learning_rate}"

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


# Setup Tensorboard

In [368]:
folder_name = f"{model_name} on {strftime('%d')}-{strftime('%b')} at {strftime('%H')}_{strftime('%M')}"
directory = os.path.join(LOGGING_PATH, folder_name)
try:
    os.makedirs(directory)
except OSError as err:
    print(err.strerror)
else:
    print('Successfully created directory')

Successfully created directory


# Loss, Optimisation & Metrics

### Defining Loss Function

In [369]:
with tf.name_scope('loss_calc'):
    loss = tf.reduce_mean(tf.compat.v1.nn.softmax_cross_entropy_with_logits_v2(labels=Y, logits=output))

### Defining Optimizer

In [370]:
with tf.name_scope('optimizer'):
    optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=learning_rate)
    train_step = optimizer.minimize(loss)

### Accuracy Metric

In [371]:
with tf.name_scope('accuracy_calc'):
    correct_pred = tf.equal(tf.argmax(output, axis=1), tf.argmax(Y, axis=1))
    accuracy = tf.reduce_mean(tf.cast(correct_pred, dtype=tf.float32))

In [372]:
with tf.name_scope('performance'):
    tf.compat.v1.summary.scalar('cost', loss)
    tf.compat.v1.summary.scalar('accuracy', accuracy)

### Check Image Inputs with Tensorbaord

In [373]:
with tf.name_scope('show_image'):
    x_image = tf.reshape(X, [-1, 28, 28, 1])
    tf.compat.v1.summary.image('image_input', x_image, max_outputs=4)

# Run  Session

#### Initialize Global Variables

In [374]:
sess = tf.compat.v1.Session()

#### Setup Filewriter and merge  summaries

In [375]:
merged_summary = tf.compat.v1.summary.merge_all()

train_writer = tf.compat.v1.summary.FileWriter(directory + '/train')
train_writer.add_graph(sess.graph)

validation_writer = tf.compat.v1.summary.FileWriter(directory + '/validation')

In [376]:
# initialize global variables
init = tf.compat.v1.global_variables_initializer()
sess.run(init)

## Batching the Data

In [377]:
size_of_batch = 1000

In [378]:
nr_examples = y_train.shape[0]
nr_iterations = int(nr_examples/size_of_batch)

index_in_epoch = 0

In [379]:
def get_next_batch(batch_size, data, labels):
    
    global nr_examples
    global index_in_epoch
    
    start = index_in_epoch
    index_in_epoch += batch_size
    
    if index_in_epoch > nr_examples:
        start = 0
        index_in_epoch = batch_size
    
    end = index_in_epoch
    
    return data[start:end], labels[start:end]

## Training Loop

In [380]:
for epoch  in range(nr_epochs):
    #------------------------training-------------------------------
    for i in range(nr_iterations):
        batch_x, batch_y = get_next_batch(batch_size=size_of_batch, data=x_train, labels=y_train)
        feed_dictionary = {X:batch_x, Y:batch_y}
        sess.run(train_step, feed_dict=feed_dictionary)
    su, batch_accuracy = sess.run(fetches=[merged_summary, accuracy], feed_dict=feed_dictionary)
    train_writer.add_summary(su, epoch)
        
    print(f"Epoch {epoch} ------------------------------- Training Accuracy {batch_accuracy}")
    #--------------------------validation-------------------------------
    summary = sess.run(fetches=merged_summary, feed_dict={X:x_val, Y:y_val})
    validation_writer.add_summary(summary, epoch)
print('Training Finished!')
        

Epoch 0 ------------------------------- Training Accuracy 0.8460000157356262
Epoch 1 ------------------------------- Training Accuracy 0.859000027179718
Epoch 2 ------------------------------- Training Accuracy 0.8669999837875366
Epoch 3 ------------------------------- Training Accuracy 0.8690000176429749
Epoch 4 ------------------------------- Training Accuracy 0.875
Epoch 5 ------------------------------- Training Accuracy 0.8769999742507935
Epoch 6 ------------------------------- Training Accuracy 0.9700000286102295
Epoch 7 ------------------------------- Training Accuracy 0.9769999980926514
Epoch 8 ------------------------------- Training Accuracy 0.9829999804496765
Epoch 9 ------------------------------- Training Accuracy 0.9819999933242798
Epoch 10 ------------------------------- Training Accuracy 0.9850000143051147
Epoch 11 ------------------------------- Training Accuracy 0.9879999756813049
Epoch 12 ------------------------------- Training Accuracy 0.9879999756813049
Epoch 13 -

# Reset for Next Iteration

In [381]:
train_writer.close()
validation_writer.close()
sess.close()
tf.compat.v1.reset_default_graph()

# Old Code

In [382]:
# with tf.name_scope('first_hidden_layer'):
#     initial_w1 = tf.compat.v1.truncated_normal(shape=[TOTAL_INPUTS, n_hidden1], stddev=0.1, seed=42, name='w1')
#     w1 = tf.Variable(initial_value=initial_w1)

#     initial_b1 = tf.constant(value=0.0, shape=[n_hidden1], name='b1')
#     b1 = tf.Variable(initial_value=initial_b1)

#     layer1_in = tf.matmul(X, w1) + b1
#     layer1_out = tf.nn.relu(layer1_in)

In [383]:
# with tf.name_scope('second_hidden_layer'):

#     initial_w2 = tf.compat.v1.truncated_normal(shape=[n_hidden1, n_hidden2], stddev=0.1, seed=42, name='w2')
#     w2 = tf.Variable(initial_value=initial_w2)

#     initial_b2 = tf.constant(value=0.0, shape=[n_hidden2], name='b2')
#     b2 = tf.Variable(initial_value=initial_b2)

#     layer2_in = tf.matmul(layer1_out, w2) + b2
#     layer2_out = tf.nn.relu(layer2_in)

In [384]:
# with tf.name_scope('output_layer'):
#     initial_w3 = tf.compat.v1.truncated_normal(shape=[n_hidden2, NR_CLASSES], stddev=0.1, seed=42, name='w3')
#     w3 = tf.Variable(initial_value=initial_w3)

#     initial_b3 = tf.constant(value=0.0, shape=[NR_CLASSES], name='b3')
#     b3 = tf.Variable(initial_value=initial_b3)

#     layer3_in = tf.matmul(layer2_out, w3) + b3
#     output = tf.nn.softmax(layer3_in)

In [385]:
# w3.eval(sess)