In [1]:
# set seed for reproducibility purposes.
from numpy.random import seed
seed(888)
from tensorflow.random import set_seed
set_seed(404)

In [2]:
import tensorflow as tf
import numpy as np
import os
from time import strftime

In [3]:
# CONSTANTS
X_TRAIN_PATH = 'MNIST_Dataset/digit_xtrain.csv'
X_TEST_PATH = 'MNIST_Dataset/digit_xtest.csv'
Y_TRAIN_PATH = 'MNIST_Dataset/digit_ytrain.csv'
Y_TEST_PATH = 'MNIST_Dataset/digit_ytest.csv'

IMAGE_WIDTH = 28
IMAGE_HEIGHT = 28
NUM_CHANNELS = 1
TOTAL_INPUTS = IMAGE_WIDTH * IMAGE_HEIGHT * NUM_CHANNELS
NUM_CLASSES = 10 # digits from 0-9
VALIDATION_SIZE = 10000

LOGGING_PATH = 'tensorboard_mnist_logs/'

## *Load Data*

In [4]:
%%time

y_train_data = np.loadtxt(Y_TRAIN_PATH, delimiter=',', dtype=int)
print('Y Train shape: ', y_train_data.shape)

y_test = np.loadtxt(Y_TEST_PATH, delimiter=',', dtype=int)
print('Y Test shape: ', y_test.shape)

Y Train shape:  (60000,)
Y Test shape:  (10000,)
CPU times: user 315 ms, sys: 3.21 ms, total: 318 ms
Wall time: 327 ms


In [5]:
%%time

x_train_data = np.loadtxt(X_TRAIN_PATH, delimiter=',', dtype=int)
print('X Train shape: ', x_train_data.shape)

x_test = np.loadtxt(X_TEST_PATH, delimiter=',', dtype=int)
print('X Test shape: ', x_test.shape)

X Train shape:  (60000, 784)
X Test shape:  (10000, 784)
CPU times: user 30.4 s, sys: 1.57 s, total: 32 s
Wall time: 32 s


---

## *Data preprocessing*

In [6]:
# re-scale training data from 0-255 to 0-1
x_train_data, x_test = x_train_data/255.0, x_test/255.0

# convert target values (labels) to one-hot-encoding
y_train_data = np.eye(NUM_CLASSES)[y_train_data]
print('Y Train shape: ', y_train_data.shape)

y_test = np.eye(NUM_CLASSES)[y_test]
print('Y Test shape: ', y_test.shape)

Y Train shape:  (60000, 10)
Y Test shape:  (10000, 10)


### *Create validation dataset from training data*

In [7]:
x_val = x_train_data[:VALIDATION_SIZE]
y_val = y_train_data[:VALIDATION_SIZE]

x_train = x_train_data[VALIDATION_SIZE:]
y_train = y_train_data[VALIDATION_SIZE:]

print('Validation shape: ', x_val.shape, y_val.shape)
print('New training shape: ', x_train.shape, y_train.shape)


Validation shape:  (10000, 784) (10000, 10)
New training shape:  (50000, 784) (50000, 10)


---

## *Setup tensorflow graph*

In [8]:
# https://www.tensorflow.org/api_docs/python/tf/compat/v1/placeholder

# from docs: "... This API is strongly discouraged for use with eager execution and tf.function."
tf.compat.v1.disable_eager_execution()

# features placeholder
X = tf.compat.v1.placeholder(tf.float32, shape=[None, TOTAL_INPUTS])

#labels placeholder
Y = tf.compat.v1.placeholder(tf.float32, shape=[None, NUM_CLASSES])

### *Neural network definition*

In [9]:
# hyperparameters
num_epochs = 5
learning_rate = 1e-4

n_hidden1 = 512 # layer 1
n_hidden2 = 64 # layer 2

#### *Setup 1st layer*

In [10]:
# set initial random weights for 1st layer
initial_w1 = tf.random.truncated_normal(shape=[TOTAL_INPUTS, n_hidden1], stddev=0.1)
# create tf variable for weights
w1 = tf.Variable(initial_value=initial_w1)

In [11]:
# set initial bias for 1st layer
initial_b1 = tf.constant(value=0.0, shape=[n_hidden1])
b1 = tf.Variable(initial_value=initial_b1)

In [12]:
# https://www.tensorflow.org/api_docs/python/tf/linalg/matmul

# set 1st layer inputs (X which is the raw inputs multiplied by the weights (matrix mult) + bias)
# this result is what feeds the activation function
layer1_in = tf.matmul(X, w1) + b1

# set 1st layer outputs using rectified linear (activation function)
layer1_out = tf.nn.relu(layer1_in)

##### *Setup 2nd layer*

In [13]:
# setup weights and biases
initial_w2 = tf.random.truncated_normal(shape=[n_hidden1, n_hidden2], stddev=0.1)
w2 = tf.Variable(initial_value=initial_w2)
initial_b2 = tf.constant(value=0.0, shape=[n_hidden2])
b2 = tf.Variable(initial_value=initial_b2)

# setup inputs and outputs
layer2_in = tf.matmul(layer1_out, w2) + b2
layer2_out = tf.nn.relu(layer2_in)

#### *Setup output layer*

In [14]:
# setup weights and biases
initial_w3 = tf.random.truncated_normal(shape=[n_hidden2, NUM_CLASSES], stddev=0.1)
w3 = tf.Variable(initial_value=initial_w3)
initial_b3 = tf.constant(value=0.0, shape=[NUM_CLASSES])
b3 = tf.Variable(initial_value=initial_b3)

# setup inputs and outputs
layer3_in = tf.matmul(layer2_out, w3) + b3
final_output = tf.nn.softmax(layer3_in) # softmax is used as activation function because it's the output layer

### *Tensorboard Setup*

In [15]:
folder_name = f'Model_1_at_{strftime("%H:%M")}'
directory = os.path.join(LOGGING_PATH, folder_name)

try:
    os.makedirs(directory)
except OSError as exception:
    print(exception.strerror)
else:
    print('Directory created.')

Directory created.


### *Loss function*

In [16]:
# https://www.tensorflow.org/api_docs/python/tf/nn/softmax_cross_entropy_with_logits

# reduce_mean is used to get the average of the losses as the training
# will happen in batches instead of the whole dataset.
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=Y, logits=final_output))

### Optimizer

In [17]:
# https://www.tensorflow.org/api_docs/python/tf/compat/v1/train/AdamOptimizer
optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=learning_rate)

# https://www.tensorflow.org/api_docs/python/tf/compat/v1/train/AdamOptimizer#minimize
train_step = optimizer.minimize(loss)

### Accuracy

In [18]:
# argmax of the final_output row has the highest probability from indexes 0-9 (softmax output)
# argmax of the Y row has 1 for the true label and 0 for the rest from indexes 0-9 (one-hot-encoding)
correct_prediction = tf.equal(tf.argmax(final_output, axis=1), tf.argmax(Y, axis=1))

# calculate accuracy
acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [19]:
# (tensorboard)
# add summary for the accuracy
tf.compat.v1.summary.scalar('accuracy', acc)
# add summary for the loss
tf.compat.v1.summary.scalar('loss', loss)

<tf.Tensor 'loss:0' shape=() dtype=string>

---

## *Start and run session*

In [20]:
# "A Session object encapsulates the environment in which 
# Operation objects are executed, and Tensor objects are evaluated."
session = tf.compat.v1.Session()

2021-12-19 17:48:38.740040: E tensorflow/stream_executor/cuda/cuda_driver.cc:271] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2021-12-19 17:48:38.740107: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (edlcorrea-X555LF): /proc/driver/nvidia/version does not exist
2021-12-19 17:48:38.743333: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [21]:
# setup FileWriter and merge summaries (tensorboard)
merged_summary = tf.compat.v1.summary.merge_all()
train_writer = tf.compat.v1.summary.FileWriter(directory + '/train')
train_writer.add_graph(session.graph)

In [22]:
# initialize the variables
init = tf.compat.v1.global_variables_initializer()
session.run(init)

### *Batching the data*

In [23]:
batch_size = 1000
num_examples = y_train.shape[0]
num_iterations = int(num_examples/batch_size)

index_in_epoch = 0

In [24]:
def next_batch(batch_size, data, labels):
    global num_examples
    global index_in_epoch
    
    start = index_in_epoch
    index_in_epoch += batch_size
    
    if index_in_epoch > num_examples:
        # reset
        start=0
        index_in_epoch = batch_size
    
    end = index_in_epoch
    
    return data[start:end], labels[start:end]

### Write the training loop

In [25]:
print('Training started.')
for epoch in range(num_epochs):
    for i in range(num_iterations):
        batch_x, batch_y = next_batch(batch_size=batch_size, data=x_train, labels=y_train)
        
        # create feed dictionary
        feed_dictionary = {X: batch_x, Y: batch_y}        
        # do the calculations for training
        session.run(train_step, feed_dict=feed_dictionary)
    
    # fetch the accuracy and summary from session
    summary, batch_accuracy = session.run(fetches=[merged_summary, acc], feed_dict=feed_dictionary)
    train_writer.add_summary(summary, epoch)
        
    print(f'Epoch {epoch} \t| Training accuracy = {batch_accuracy}')
print('Training complete.')

Training started.
Epoch 0 	| Training accuracy = 0.4819999933242798
Epoch 1 	| Training accuracy = 0.7429999709129333
Epoch 2 	| Training accuracy = 0.8330000042915344
Epoch 3 	| Training accuracy = 0.9369999766349792
Epoch 4 	| Training accuracy = 0.9440000057220459
Training complete.


---

## *Reset for next run*

In [26]:
train_writer.close()
session.close()
tf.compat.v1.reset_default_graph()