# Imports

In [1]:
from numpy.random import seed
seed(888)
import tensorflow
tensorflow.random.set_seed(404)

In [2]:
import os
import numpy as np
#import tensorflow as tf

import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

Instructions for updating:
non-resource variables are not supported in the long term


# Constants

In [3]:
X_TRAIN_PATH = 'MNIST/digit_xtrain.csv'
X_TEST_PATH = 'MNIST/digit_xtest.csv'
Y_TRAIN_PATH = 'MNIST/digit_ytrain.csv'
Y_TEST_PATH = 'MNIST/digit_ytest.csv'

NR_CLASSES = 10
VALIDATION_SIZE = 10000
IMAGE_WIDTH = 28
IMAGE_HEIGHT = 28
CHANNELS = 1  # Grayscale
TOTAL_INPUTS = IMAGE_WIDTH * IMAGE_HEIGHT * CHANNELS

# Get the Data

In [4]:
%%time

y_train_all = np.loadtxt(Y_TRAIN_PATH, delimiter=',', dtype=int)

CPU times: user 203 ms, sys: 3.87 ms, total: 207 ms
Wall time: 208 ms


In [5]:
y_train_all.shape

(60000,)

In [6]:
y_test = np.loadtxt(Y_TEST_PATH, delimiter=',', dtype=int)

In [7]:
%%time 

x_train_all = np.loadtxt(X_TRAIN_PATH, delimiter=',', dtype=int)

CPU times: user 31.3 s, sys: 721 ms, total: 32 s
Wall time: 32 s


In [8]:
%%time 

x_test = np.loadtxt(X_TEST_PATH, delimiter=',', dtype=int)

CPU times: user 5.12 s, sys: 90.5 ms, total: 5.21 s
Wall time: 5.21 s


# Explore

In [9]:
x_train_all.shape

(60000, 784)

In [10]:
x_train_all[0]

array([  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   3,  18,  18,  18,
       126, 136, 175,  26, 166, 255, 247, 127,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,  30,  36,  94, 154, 17

In [11]:
y_train_all.shape

(60000,)

In [12]:
x_test.shape

(10000, 784)

In [13]:
# Each label corresponds to the categories or the classes for digits
y_train_all[:5]

array([5, 0, 4, 1, 9])

# Data Preprocessing 

In [14]:
# Re-scale
x_train_all, x_test = x_train_all / 255.0, x_test / 255.0

### Convert target values to one-hot encoding

In [15]:
# Eg to convert sparse matrix to full matrix
# Array element indexing in actual
values = y_train_all[:5]
np.eye(10)[values]

# Before -> array([5, 0, 4, 1, 9])

array([[0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]])

In [16]:
y_train_all = np.eye(NR_CLASSES)[y_train_all]

In [17]:
y_train_all.shape

(60000, 10)

In [18]:
y_test = np.eye(NR_CLASSES)[y_test]
y_test.shape

(10000, 10)

### Create validation dataset from training data

**Challenge:** Split the training dataset into a smaller training dataset and a validation dataset for the features and the labels. Create four arrays: `x_val`, `y_val`, `x_train` and `y_train` from `x_train_all` and `y_train_all`. Use the validation size of 10,000.

In [19]:
x_val = x_train_all[:VALIDATION_SIZE]
y_val = y_train_all[:VALIDATION_SIZE]

In [20]:
x_train = x_train_all[VALIDATION_SIZE:]
y_train = y_train_all[VALIDATION_SIZE:]

In [21]:
x_val.shape

(10000, 784)

In [22]:
x_train.shape

(50000, 784)

# Setup Tensorflow Graph

In [23]:
X = tf.placeholder(
    tf.float32, 
    shape=[None, TOTAL_INPUTS]  # [How many samples would be going to use, 784->Total features]
)

Y = tf.placeholder(tf.float32, shape=[None, NR_CLASSES])

### Neural Network Architecture

#### Hyperparameters

In [24]:
nr_epochs = 5
learning_rate = 1e-4  # 0.0001

n_hidden1 = 512
n_hidden2 = 64

In [25]:
# Initial weights of first hidden layer
initial_w1 = tf.truncated_normal(
    shape=[TOTAL_INPUTS, n_hidden1], 
    stddev=0.1,   # Far or close to each other
    seed=42)

# Creating the weights
w1 = tf.Variable(initial_value = initial_w1)

In [26]:
# Initialize the biases of the first hidden layer
initial_b1 = tf.constant(value=0.0, shape=[n_hidden1])
b1 = tf.Variable(initial_value = initial_b1)

In [27]:
# Feature going into the first hidden layer (Multiplication Matrix)
layer1_in = tf.matmul(X, w1) + b1

In [28]:
# Output of layer 1
layer1_out = tf.nn.relu(layer1_in)

**Challenge:** Set up the second hidden layer. This layer has 64 neurons and needs to work off the output of the first hidden layer(see above). Then setup the output layer. Remember, the output layer will use the softmax activation function.

In [29]:
# Initial weights of second hidden layer
initial_w2 = tf.truncated_normal(shape=[n_hidden1, n_hidden2], stddev=0.1, seed=42)
w2 = tf.Variable(initial_value = initial_w2)

# Initialize the biases of the second hidden layer
initial_b2 = tf.constant(value=0.0, shape=[n_hidden2])
b2 = tf.Variable(initial_value = initial_b2)

# Feature going into the second hidden layer (Multiplication Matrix)
layer2_in = tf.matmul(layer1_out, w2) + b2

# Output of layer 2
layer2_out = tf.nn.relu(layer2_in)

In [30]:
# Initial weights of output layer
initial_w3 = tf.truncated_normal(shape=[n_hidden2, NR_CLASSES], stddev=0.1, seed=42)
w3 = tf.Variable(initial_value = initial_w3)

# Initialize the biases of the output layer
initial_b3 = tf.constant(value=0.0, shape=[NR_CLASSES])
b3 = tf.Variable(initial_value = initial_b3)

# Feature going into the output layer (Multiplication Matrix)
layer3_in = tf.matmul(layer2_out, w3) + b3

# Output layer
output = tf.nn.softmax(layer3_in)

# Loss, Optimisation & Metrics 

#### Defining Loss Func

In [31]:
# Computes softmax cross entropy between logits and labels
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=Y, logits=output))

#### Defining Optimizer

In [32]:
# Initialize the optimizer and specify 'adam' to use and define learning rate
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)

# Specify operation what the optimizer will do to minimize the loss
train_step = optimizer.minimize(loss)

#### Accuracy Metric

In [35]:
#argmax with extract the index of highest value(1) from each row
correct_pred = tf.equal(tf.argmax(output, axis=1), tf.argmax(Y, axis=1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# Run Session

In [36]:
sess = tf.Session()

In [37]:
# Initialize all the variables
init = tf.global_variables_initializer()
sess.run(init)

In [40]:
b3.eval(sess)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)

## Batching the Data

In [41]:
size_of_batch = 1000

In [42]:
num_examples = y_train.shape[0]
nr_iterations = int(num_examples / size_of_batch)

# To track where a batch ends and other starts
index_in_epoch = 0

In [46]:
def next_batch(batch_size, data, labels):
    
    # To acess global variables to get their current values
    global num_examples
    global index_in_epoch
    
    # Starting value and incrementing index
    start = index_in_epoch
    index_in_epoch += batch_size
    
    # To reset
    if index_in_epoch > num_examples:
        start = 0
        index_in_epoch = batch_size
    
    end = index_in_epoch
    
    # Returing all values(data) & features(labels) between starting and ending batch value
    return data[start:end], labels[start:end]

### Training Loop

In [47]:
for epoch in range(nr_epochs):
    
    for i in range(nr_iterations):
        # 1- getting batch data and features
        batch_x, batch_y = next_batch(batch_size= size_of_batch, data=x_train, labels=y_train)
        
        # 2- To feed it to our session
        feed_dictionary = {X:batch_x, Y:batch_y}
        
        # To do calculations
        sess.run(train_step, feed_dict=feed_dictionary)
        
        #
        batch_accuracy = sess.run(fetches=[accuracy], feed_dict=feed_dictionary)
        
    print(f'Epoch {epoch} \t| Training Accuracy = {batch_accuracy}')
    
print('Done training !!')

Epoch 0 	| Training Accuracy = [0.376]
Epoch 1 	| Training Accuracy = [0.713]
Epoch 2 	| Training Accuracy = [0.8]
Epoch 3 	| Training Accuracy = [0.822]
Epoch 4 	| Training Accuracy = [0.839]
Done training !!
