# Imports

In [50]:
from numpy.random import seed
seed(888)
import tensorflow
tensorflow.random.set_seed(404)

In [51]:
import os
import numpy as np
#import tensorflow as tf

import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

from time import strftime

# Constants

In [52]:
X_TRAIN_PATH = 'MNIST/digit_xtrain.csv'
X_TEST_PATH = 'MNIST/digit_xtest.csv'
Y_TRAIN_PATH = 'MNIST/digit_ytrain.csv'
Y_TEST_PATH = 'MNIST/digit_ytest.csv'

LOGGING_PATH = 'tensorboard_mnist_digits_logs/'

NR_CLASSES = 10
VALIDATION_SIZE = 10000
IMAGE_WIDTH = 28
IMAGE_HEIGHT = 28
CHANNELS = 1  # Grayscale
TOTAL_INPUTS = IMAGE_WIDTH * IMAGE_HEIGHT * CHANNELS

# Get the Data

In [53]:
%%time

y_train_all = np.loadtxt(Y_TRAIN_PATH, delimiter=',', dtype=int)

CPU times: user 168 ms, sys: 3.43 ms, total: 171 ms
Wall time: 170 ms


In [54]:
y_train_all.shape

(60000,)

In [55]:
y_test = np.loadtxt(Y_TEST_PATH, delimiter=',', dtype=int)

In [56]:
%%time 

x_train_all = np.loadtxt(X_TRAIN_PATH, delimiter=',', dtype=int)

CPU times: user 31.1 s, sys: 816 ms, total: 31.9 s
Wall time: 31.9 s


In [57]:
%%time 

x_test = np.loadtxt(X_TEST_PATH, delimiter=',', dtype=int)

CPU times: user 5.11 s, sys: 125 ms, total: 5.24 s
Wall time: 5.23 s


# Explore

In [58]:
x_train_all.shape

(60000, 784)

In [59]:
x_train_all[0]

array([  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   3,  18,  18,  18,
       126, 136, 175,  26, 166, 255, 247, 127,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,  30,  36,  94, 154, 17

In [60]:
y_train_all.shape

(60000,)

In [61]:
x_test.shape

(10000, 784)

In [62]:
# Each label corresponds to the categories or the classes for digits
y_train_all[:5]

array([5, 0, 4, 1, 9])

# Data Preprocessing 

In [63]:
# Re-scale
x_train_all, x_test = x_train_all / 255.0, x_test / 255.0

### Convert target values to one-hot encoding

In [64]:
# Eg to convert sparse matrix to full matrix
# Array element indexing in actual
values = y_train_all[:5]
np.eye(10)[values]

# Before -> array([5, 0, 4, 1, 9])

array([[0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]])

In [65]:
y_train_all = np.eye(NR_CLASSES)[y_train_all]

In [66]:
y_train_all.shape

(60000, 10)

In [67]:
y_test = np.eye(NR_CLASSES)[y_test]
y_test.shape

(10000, 10)

### Create validation dataset from training data

**Challenge:** Split the training dataset into a smaller training dataset and a validation dataset for the features and the labels. Create four arrays: `x_val`, `y_val`, `x_train` and `y_train` from `x_train_all` and `y_train_all`. Use the validation size of 10,000.

In [68]:
x_val = x_train_all[:VALIDATION_SIZE]
y_val = y_train_all[:VALIDATION_SIZE]

In [69]:
x_train = x_train_all[VALIDATION_SIZE:]
y_train = y_train_all[VALIDATION_SIZE:]

In [70]:
x_val.shape

(10000, 784)

In [71]:
x_train.shape

(50000, 784)

# Setup Tensorflow Graph

In [122]:
X = tf.placeholder(
    tf.float32, 
    shape=[None, TOTAL_INPUTS],  # [How many samples would be going to use, 784->Total features]
    name='X'
)

Y = tf.placeholder(tf.float32, shape=[None, NR_CLASSES], name='labels')

### Neural Network Architecture

#### Hyperparameters

In [123]:
nr_epochs = 5
learning_rate = 1e-4  # 0.0001

n_hidden1 = 512
n_hidden2 = 64

In [124]:
'''
# For grouping info
with tf.name_scope('hidden_1'):

    # Initial weights of first hidden layer
    initial_w1 = tf.truncated_normal(
        shape=[TOTAL_INPUTS, n_hidden1], 
        stddev=0.1,   # Far or close to each other
        seed=42)

    # Creating the weights
    w1 = tf.Variable(initial_value = initial_w1, name='w1')

    # Initialize the biases of the first hidden layer
    initial_b1 = tf.constant(value=0.0, shape=[n_hidden1])
    b1 = tf.Variable(initial_value = initial_b1, name='b1')

    # Feature going into the first hidden layer (Multiplication Matrix)
    layer1_in = tf.matmul(X, w1) + b1

    # Output of layer 1
    layer1_out = tf.nn.relu(layer1_in)
'''

"\n# For grouping info\nwith tf.name_scope('hidden_1'):\n\n    # Initial weights of first hidden layer\n    initial_w1 = tf.truncated_normal(\n        shape=[TOTAL_INPUTS, n_hidden1], \n        stddev=0.1,   # Far or close to each other\n        seed=42)\n\n    # Creating the weights\n    w1 = tf.Variable(initial_value = initial_w1, name='w1')\n\n    # Initialize the biases of the first hidden layer\n    initial_b1 = tf.constant(value=0.0, shape=[n_hidden1])\n    b1 = tf.Variable(initial_value = initial_b1, name='b1')\n\n    # Feature going into the first hidden layer (Multiplication Matrix)\n    layer1_in = tf.matmul(X, w1) + b1\n\n    # Output of layer 1\n    layer1_out = tf.nn.relu(layer1_in)\n"

**Challenge:** Set up the second hidden layer. This layer has 64 neurons and needs to work off the output of the first hidden layer(see above). Then setup the output layer. Remember, the output layer will use the softmax activation function.

In [125]:
'''
# For grouping info
with tf.name_scope('hidden_2'):

    # Initial weights of second hidden layer
    initial_w2 = tf.truncated_normal(shape=[n_hidden1, n_hidden2], stddev=0.1, seed=42)
    w2 = tf.Variable(initial_value = initial_w2, name='w2')

    # Initialize the biases of the second hidden layer
    initial_b2 = tf.constant(value=0.0, shape=[n_hidden2])
    b2 = tf.Variable(initial_value = initial_b2, name='b2')

    # Feature going into the second hidden layer (Multiplication Matrix)
    layer2_in = tf.matmul(layer1_out, w2) + b2

    # Output of layer 2
    layer2_out = tf.nn.relu(layer2_in)
'''

"\n# For grouping info\nwith tf.name_scope('hidden_2'):\n\n    # Initial weights of second hidden layer\n    initial_w2 = tf.truncated_normal(shape=[n_hidden1, n_hidden2], stddev=0.1, seed=42)\n    w2 = tf.Variable(initial_value = initial_w2, name='w2')\n\n    # Initialize the biases of the second hidden layer\n    initial_b2 = tf.constant(value=0.0, shape=[n_hidden2])\n    b2 = tf.Variable(initial_value = initial_b2, name='b2')\n\n    # Feature going into the second hidden layer (Multiplication Matrix)\n    layer2_in = tf.matmul(layer1_out, w2) + b2\n\n    # Output of layer 2\n    layer2_out = tf.nn.relu(layer2_in)\n"

In [126]:
'''
# For grouping info
with tf.name_scope('output_layer'):

    # Initial weights of output layer
    initial_w3 = tf.truncated_normal(shape=[n_hidden2, NR_CLASSES], stddev=0.1, seed=42)
    w3 = tf.Variable(initial_value = initial_w3, name='w3')

    # Initialize the biases of the output layer
    initial_b3 = tf.constant(value=0.0, shape=[NR_CLASSES])
    b3 = tf.Variable(initial_value = initial_b3, name='b3')

    # Feature going into the output layer (Multiplication Matrix)
    layer3_in = tf.matmul(layer2_out, w3) + b3

    # Output layer
    output = tf.nn.softmax(layer3_in)
'''

"\n# For grouping info\nwith tf.name_scope('output_layer'):\n\n    # Initial weights of output layer\n    initial_w3 = tf.truncated_normal(shape=[n_hidden2, NR_CLASSES], stddev=0.1, seed=42)\n    w3 = tf.Variable(initial_value = initial_w3, name='w3')\n\n    # Initialize the biases of the output layer\n    initial_b3 = tf.constant(value=0.0, shape=[NR_CLASSES])\n    b3 = tf.Variable(initial_value = initial_b3, name='b3')\n\n    # Feature going into the output layer (Multiplication Matrix)\n    layer3_in = tf.matmul(layer2_out, w3) + b3\n\n    # Output layer\n    output = tf.nn.softmax(layer3_in)\n"

In [127]:
def setup_layer(input, weight_dim, bias_dim, name):
    
    # For grouping info
    with tf.name_scope(name):
        
        # Initial weights of output layer
        initial_w = tf.truncated_normal(shape=weight_dim, stddev=0.1, seed=42)
        w = tf.Variable(initial_value = initial_w, name='W')

        # Initialize the biases of the output layer
        initial_b = tf.constant(value=0.0, shape=bias_dim)
        b = tf.Variable(initial_value = initial_b, name='B')

        # Feature going into the output layer (Multiplication Matrix)
        layer_in = tf.matmul(input, w) + b

        # Output layer
        if name == 'out':
            layer_out = tf.nn.softmax(layer_in)
        else:
            layer_out = tf.nn.relu(layer_in)
            
        return layer_out
        

In [128]:
layer_1 = setup_layer(X, weight_dim = [TOTAL_INPUTS, n_hidden1], 
                      bias_dim=[n_hidden1], name='layer_1')

layer_2 = setup_layer(layer_1, weight_dim = [n_hidden1, n_hidden2], 
                      bias_dim=[n_hidden2], name='layer_2')

output = setup_layer(layer_2, weight_dim = [n_hidden2, NR_CLASSES], 
                      bias_dim=[NR_CLASSES], name='out')

# Tensorboard Setup

In [129]:
# Folder for tensorboard

folder_name = f'Model 1 at {strftime("%H:%M")}'
directory = os.path.join(LOGGING_PATH, folder_name)

try:
    os.makedirs(directory)
except OSError as exception:
    print(exception.strerror)
else:
    print('Successfully created directories !!')

Successfully created directories !!


# Loss, Optimisation & Metrics 

#### Defining Loss Func

In [130]:
with tf.name_scope('loss_cal'):
    # Computes softmax cross entropy between logits and labels
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=Y, logits=output))

#### Defining Optimizer

In [131]:
with tf.name_scope('optimizer'):
    # Initialize the optimizer and specify 'adam' to use and define learning rate
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)

    # Specify operation what the optimizer will do to minimize the loss
    train_step = optimizer.minimize(loss)

#### Accuracy Metric

In [132]:
with tf.name_scope('accuracy_calc'):
    #argmax with extract the index of highest value(1) from each row
    correct_pred = tf.equal(tf.argmax(output, axis=1), tf.argmax(Y, axis=1))
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [133]:
with tf.name_scope('performance'):
    # Creating Tensorboard Summaries
    tf.summary.scalar('accuracy', accuracy)
    tf.summary.scalar('cost', loss)

#### Check Input Images in Tensorboard

In [134]:
with tf.name_scope('show_image'):
    # All data in placeholder
    x_image = tf.reshape(X, [-1,28,28,1])
    tf.summary.image('image_input', x_image, max_outputs=4)

# Run Session

In [135]:
sess = tf.Session()

#### Setup Filewriter and Merge Summaries

In [136]:
merged_summary = tf.summary.merge_all()

train_writer = tf.summary.FileWriter(directory + '/train')
train_writer.add_graph(sess.graph)

validation_writer = tf.summary.FileWriter(directory + '/validation')

#### Initialize all the variables

In [137]:
init = tf.global_variables_initializer()
sess.run(init)

In [138]:
# Old part code
# b3.eval(sess)

## Batching the Data

In [139]:
size_of_batch = 1000

In [140]:
num_examples = y_train.shape[0]
nr_iterations = int(num_examples / size_of_batch)

# To track where a batch ends and other starts
index_in_epoch = 0

In [141]:
def next_batch(batch_size, data, labels):
    
    # To acess global variables to get their current values
    global num_examples
    global index_in_epoch
    
    # Starting value and incrementing index
    start = index_in_epoch
    index_in_epoch += batch_size
    
    # To reset
    if index_in_epoch > num_examples:
        start = 0
        index_in_epoch = batch_size
    
    end = index_in_epoch
    
    # Returing all values(data) & features(labels) between starting and ending batch value
    return data[start:end], labels[start:end]

### Training Loop

In [142]:
for epoch in range(nr_epochs):
    
    # ===================== TRAINING DATASET =====================
    for i in range(nr_iterations):
        # 1- getting batch data and features
        batch_x, batch_y = next_batch(batch_size= size_of_batch, data=x_train, labels=y_train)
        
        # 2- To feed it to our session
        feed_dictionary = {X:batch_x, Y:batch_y}
        
        # To do calculations
        sess.run(train_step, feed_dict=feed_dictionary)
        
    # Fetching merged summary and accuracy from session
    s, batch_accuracy = sess.run(fetches=[merged_summary, accuracy], feed_dict=feed_dictionary)
    
    # Writing summary to file
    train_writer.add_summary(s, epoch)
        
    print(f'Epoch {epoch} \t| Training Accuracy = {batch_accuracy}')
    
    
    # ===================== VALIDATION =========================
    summary = sess.run(fetches=merged_summary, feed_dict={X: x_val, Y: y_val})
    validation_writer.add_summary(summary, epoch)
    
    
print('Done training !!')

Epoch 0 	| Training Accuracy = 0.4050000011920929
Epoch 1 	| Training Accuracy = 0.7329999804496765
Epoch 2 	| Training Accuracy = 0.8190000057220459
Epoch 3 	| Training Accuracy = 0.843999981880188
Epoch 4 	| Training Accuracy = 0.8560000061988831
Done training !!


# Reset for the Next Run

In [143]:
train_writer.close()
validation_writer.close()
sess.close()
tf.reset_default_graph()