# Task 2 - Week 3

## Modify the MNIST classifier so that instead of training on the full data set, you train on only 50000 images and keep 10000 images as validation data. Add operations that will calculate the accuracy of the classifier on the validation data.

In [2]:
#Load the necessary libraries
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf

np.set_printoptions(suppress=True) #determine the way floating point 
#numbers, arrays and other NumPy objects are displayed

## Download the MNIST dataset using Keras

In [3]:
(x_all, y_all),(x_test, y_test) = tf.keras.datasets.mnist.load_data()

In [4]:
#help(tf.keras.datasets.mnist.load_data) # description of dataset

In [5]:
print ('x_all samples = ' + str(x_all.shape))
print ('y_all samples = ' + str(y_all.shape))

print ('x_test samples = ' + str(x_test.shape))
print ('y_test samples = ' + str(y_test.shape))

x_all samples = (60000, 28, 28)
y_all samples = (60000,)
x_test samples = (10000, 28, 28)
y_test samples = (10000,)


## Preprocess the data ready for tensorflow

In [7]:
y_input = tf.keras.utils.to_categorical(y_all)      #one hot encoder
x_input = (np.reshape(x_all, (x_all.shape[0], 784)) / 255.0).astype(np.float32)    #flattening

In [9]:
#you train on only 50000 images and keep 10000 images as validation data.
print ('x_input samples = ' + str(x_input.shape))
print ('y_input samples = ' + str(y_input.shape))

x_input samples = (60000, 784)
y_input samples = (60000, 10)


In [8]:
x_validate = x_input[0:10000,:] #the first 10000
y_validate = y_input[0:10000,:]
x_train = x_input[10000:,:]
y_train = y_input[10000:,:]

# we'll print out the shapes of the arrays to check it's what we expect
print ('x_train samples = ' + str(x_train.shape))
print ('y_train samples = ' + str(y_train.shape))
print ('x_validate samples = ' + str(x_validate.shape))
print ('y_validate samples = ' + str(y_validate.shape))

x_train samples = (50000, 784)
y_train samples = (50000, 10)
x_validate samples = (10000, 784)
y_validate samples = (10000, 10)


In [10]:
y_test = tf.keras.utils.to_categorical(y_test)
x_test = (np.reshape(x_test, (x_test.shape[0], 784)) / 255.0).astype(np.float32)

In [11]:
y_test = tf.keras.utils.to_categorical(y_test)
x_test = (np.reshape(x_test, (x_test.shape[0], 784)) / 255.0).astype(np.float32)
print ('x_test samples = ' + str(x_test.shape))
print ('y_test samples = ' + str(y_test.shape))

x_test samples = (10000, 784)
y_test samples = (10000, 10, 2)


## Create the tensorflow graph

In [12]:
W = tf.Variable(tf.zeros([784,10]))
b = tf.Variable(tf.zeros([10]))

2023-05-21 20:33:31.791705: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-05-21 20:33:32.487435: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 7426 MB memory:  -> device: 0, name: Tesla M60, pci bus id: 0001:00:00.0, compute capability: 5.2


## Set-up the multinomial logistic regression

In [13]:
# predicted probability for each class
def y_pred(x):
    return tf.nn.softmax(tf.matmul(x,W) + b)

# cross entropy loss function
@tf.function
def loss(x,y):
    y_ = y_pred(x)
    return tf.reduce_mean(-tf.reduce_sum(y * tf.math.log(y_), axis=[1]))

In [14]:
# New code for calculating validation accuracy
def accuracy(x,y):
    # compare the prediction to the label, if they're the same
    # tf.equal will return 1, if they're different it will be 0
    y_ = y_pred(x)
    correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
    # calculate the accuracy
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    return accuracy

## Set-up the training routine

In [16]:
train_steps = 500

# learning rate
lr = 1e-2

# gradient descent optimizer
optimizer = tf.optimizers.SGD(lr)

## Create a session and run the training

In [17]:
# Set up logging.
logdir = '/home/jovyan/logs'
writer = tf.summary.create_file_writer(logdir)

with writer.as_default(): # use the contex manager
    for i in range(train_steps):
        with tf.GradientTape() as tape:
            current_loss = loss(x_input,y_input)
        gradients = tape.gradient(current_loss, [W, b])
        optimizer.apply_gradients(zip(gradients, [W ,b]))
        # calculate the validation accuracy
        val_acc = accuracy(x_validate,y_validate)
        # write the value to tensorboard
        tf.summary.scalar('val_acc', val_acc, step=i)
