## Load the necessary libraries

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf

np.set_printoptions(suppress=True) #determine the way floating point 
#numbers, arrays and other NumPy objects are displayed

## Download the MNIST dataset using Keras

In [2]:
(x_all, y_all),(x_test, y_test) = tf.keras.datasets.mnist.load_data()

In [3]:
help(tf.keras.datasets.mnist.load_data) # description of dataset

Help on function load_data in module keras.datasets.mnist:

load_data(path='mnist.npz')
    Loads the MNIST dataset.
    
    This is a dataset of 60,000 28x28 grayscale images of the 10 digits,
    along with a test set of 10,000 images.
    More info can be found at the
    [MNIST homepage](http://yann.lecun.com/exdb/mnist/).
    
    Args:
      path: path where to cache the dataset locally
        (relative to `~/.keras/datasets`).
    
    Returns:
      Tuple of NumPy arrays: `(x_train, y_train), (x_test, y_test)`.
    
    **x_train**: uint8 NumPy array of grayscale image data with shapes
      `(60000, 28, 28)`, containing the training data. Pixel values range
      from 0 to 255.
    
    **y_train**: uint8 NumPy array of digit labels (integers in range 0-9)
      with shape `(60000,)` for the training data.
    
    **x_test**: uint8 NumPy array of grayscale image data with shapes
      (10000, 28, 28), containing the test data. Pixel values range
      from 0 to 255.
    
  

In [4]:
print ('x_all samples = ' + str(x_all.shape))
print ('y_all samples = ' + str(y_all.shape))

print ('x_test samples = ' + str(x_test.shape))
print ('y_test samples = ' + str(y_test.shape))

x_all samples = (60000, 28, 28)
y_all samples = (60000,)
x_test samples = (10000, 28, 28)
y_test samples = (10000,)


## Preprocess the data ready for tensorflow

In [5]:
y_input = tf.keras.utils.to_categorical(y_all)      #one hot encoder
x_input = (np.reshape(x_all, (x_all.shape[0], 784)) / 255.0).astype(np.float32)    #flattening

In [6]:
#you train on only 50000 images and keep 10000 images as validation data.

In [7]:
x_validate = x_input[0:10000,:]
y_validate = y_input[0:10000,:]
x_train = x_input[10000:,:]
y_train = y_input[10000:,:]

# we'll print out the shapes of the arrays to check it's what we expect
print ('x_train samples = ' + str(x_train.shape))
print ('y_train samples = ' + str(y_train.shape))
print ('x_validate samples = ' + str(x_validate.shape))
print ('y_validate samples = ' + str(y_validate.shape))

x_train samples = (50000, 784)
y_train samples = (50000, 10)
x_validate samples = (10000, 784)
y_validate samples = (10000, 10)


In [8]:
y_test = tf.keras.utils.to_categorical(y_test)
x_test = (np.reshape(x_test, (x_test.shape[0], 784)) / 255.0).astype(np.float32)

In [9]:
y_test = tf.keras.utils.to_categorical(y_test)
x_test = (np.reshape(x_test, (x_test.shape[0], 784)) / 255.0).astype(np.float32)
print ('x_test samples = ' + str(x_test.shape))
print ('y_test samples = ' + str(y_test.shape))

x_test samples = (10000, 784)
y_test samples = (10000, 10, 2)


## Create the tensorflow graph

In [10]:
W = tf.Variable(tf.zeros([784,10]))
b = tf.Variable(tf.zeros([10]))

2023-05-16 20:26:00.814176: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-05-16 20:26:01.330968: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 14660 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0001:00:00.0, compute capability: 7.5


## Set-up the multinomial logistic regression

In [11]:
# predicted probability for each class
def y_pred(x):
    return tf.nn.softmax(tf.matmul(x,W) + b)

# cross entropy loss function
@tf.function
def loss(x,y):
    y_ = y_pred(x)
    return tf.reduce_mean(-tf.reduce_sum(y * tf.math.log(y_), axis=[1]))

In [12]:
# New code for calculating validation accuracy
def accuracy(x,y):
    # compare the prediction to the label, if they're the same
    # tf.equal will return 1, if they're different it will be 0
    y_ = y_pred(x)
    correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
    # calculate the accuracy
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    return accuracy

## Set-up the training routine

In [13]:
train_steps = 500

# learning rate
lr = 1e-2

# gradient descent optimizer
optimizer = tf.optimizers.SGD(lr)

## Create a session and run the training

In [14]:
# Set up logging.
logdir = '/home/jovyan/logs'
writer = tf.summary.create_file_writer(logdir)

with writer.as_default(): # use the contex manager
    for i in range(train_steps):
        with tf.GradientTape() as tape:
            current_loss = loss(x_input,y_input)
        gradients = tape.gradient(current_loss, [W, b])
        optimizer.apply_gradients(zip(gradients, [W ,b]))
        # calculate the validation accuracy
        val_acc = accuracy(x_validate,y_validate)
        # write the value to tensorboard
        tf.summary.scalar('val_acc', val_acc, step=i)
