In [1]:
# set seed for reproducibility purposes.
from numpy.random import seed
seed(888)
from tensorflow.random import set_seed
set_seed(404)

In [2]:
import tensorflow as tf
import numpy as np
import os

In [9]:
# CONSTANTS
X_TRAIN_PATH = 'MNIST_Dataset/digit_xtrain.csv'
X_TEST_PATH = 'MNIST_Dataset/digit_xtest.csv'
Y_TRAIN_PATH = 'MNIST_Dataset/digit_ytrain.csv'
Y_TEST_PATH = 'MNIST_Dataset/digit_ytest.csv'

IMAGE_WIDTH = 28
IMAGE_HEIGHT = 28
NUM_CHANNELS = 1
TOTAL_INPUTS = IMAGE_WIDTH * IMAGE_HEIGHT * NUM_CHANNELS
NUM_CLASSES = 10 # digits from 0-9
VALIDATION_SIZE = 10000

## *Load Data*

In [4]:
%%time

y_train_data = np.loadtxt(Y_TRAIN_PATH, delimiter=',', dtype=int)
print('Y Train shape: ', y_train_data.shape)

y_test = np.loadtxt(Y_TEST_PATH, delimiter=',', dtype=int)
print('Y Test shape: ', y_test.shape)

Y Train shape:  (60000,)
Y Test shape:  (10000,)
CPU times: user 214 ms, sys: 19.7 ms, total: 234 ms
Wall time: 232 ms


In [5]:
%%time

x_train_data = np.loadtxt(X_TRAIN_PATH, delimiter=',', dtype=int)
print('X Train shape: ', x_train_data.shape)

x_test = np.loadtxt(X_TEST_PATH, delimiter=',', dtype=int)
print('X Test shape: ', x_test.shape)

X Train shape:  (60000, 784)
X Test shape:  (10000, 784)
CPU times: user 27.8 s, sys: 1.15 s, total: 28.9 s
Wall time: 28.9 s


---

## *Data preprocessing*

In [6]:
# re-scale training data from 0-255 to 0-1
x_train_data, x_test = x_train_data/255.0, x_test/255.0

# convert target values (labels) to one-hot-encoding
y_train_data = np.eye(NUM_CLASSES)[y_train_data]
print('Y Train shape: ', y_train_data.shape)

y_test = np.eye(NUM_CLASSES)[y_test]
print('Y Test shape: ', y_test.shape)

Y Train shape:  (60000, 10)
Y Test shape:  (10000, 10)


### *Create validation dataset from training data*

In [7]:
x_val = x_train_data[:VALIDATION_SIZE]
y_val = y_train_data[:VALIDATION_SIZE]

x_train = x_train_data[VALIDATION_SIZE:]
y_train = y_train_data[VALIDATION_SIZE:]

print('Validation shape: ', x_val.shape, y_val.shape)
print('New training shape: ', x_train.shape, y_train.shape)


Validation shape:  (10000, 784) (10000, 10)
New training shape:  (50000, 784) (50000, 10)


---

## *Setup tensorflow graph*

In [11]:
# https://www.tensorflow.org/api_docs/python/tf/compat/v1/placeholder

# from docs: "... This API is strongly discouraged for use with eager execution and tf.function."
tf.compat.v1.disable_eager_execution()

# features placeholder
X = tf.compat.v1.placeholder(tf.float32, shape=[None, TOTAL_INPUTS])

#labels placeholder
Y = tf.compat.v1.placeholder(tf.float32, shape=[None, NUM_CLASSES])

### *Neural network definition*

In [13]:
# hyperparameters
num_epochs = 5
learning_rate = 1e-4

n_hidden1 = 512 # layer 1
n_hidden2 = 64 # layer 2

#### *Setup 1st layer*

In [14]:
# set initial random weights for 1st layer
initial_w1 = tf.random.truncated_normal(shape=[TOTAL_INPUTS, n_hidden1], stddev=0.1)
# create tf variable for weights
w1 = tf.Variable(initial_value=initial_w1)

In [15]:
# set initial bias for 1st layer
initial_b1 = tf.constant(value=0.0, shape=[n_hidden1])
b1 = tf.Variable(initial_value=initial_b1)

In [17]:
# https://www.tensorflow.org/api_docs/python/tf/linalg/matmul

# set 1st layer inputs (X which is the raw inputs multiplied by the weights (matrix mult) + bias)
# this result is what feeds the activation function
layer1_in = tf.matmul(X, w1) + b1

# set 1st layer outputs using rectified linear (activation function)
layer1_out = tf.nn.relu(layer1_in)

##### *Setup 2nd layer*

In [18]:
# setup weights and biases
initial_w2 = tf.random.truncated_normal(shape=[n_hidden1, n_hidden2], stddev=0.1)
w2 = tf.Variable(initial_value=initial_w2)
initial_b2 = tf.constant(value=0.0, shape=[n_hidden2])
b2 = tf.Variable(initial_value=initial_b2)

# setup inputs and outputs
layer2_in = tf.matmul(layer1_out, w2) + b2
layer2_out = tf.nn.relu(layer2_in)

#### *Setup output layer*

In [19]:
# setup weights and biases
initial_w3 = tf.random.truncated_normal(shape=[n_hidden2, NUM_CLASSES], stddev=0.1)
w3 = tf.Variable(initial_value=initial_w3)
initial_b3 = tf.constant(value=0.0, shape=[NUM_CLASSES])
b3 = tf.Variable(initial_value=initial_b3)

# setup inputs and outputs
layer3_in = tf.matmul(layer2_out, w3) + b3
final_output = tf.nn.softmax(layer3_in) # softmax is used as activation function because it's the output layer