# Deep Neural Network for MNIST Dataset Classificaiton

#### Info about the dataset:
##### Content: 70,000 images (28x28 pixels) of handwritten integers (0-9)
##### Analysis: There are 10 integers to account for, this is a classification problem with 10 classes/outputs.
##### Goal: Build a model which can sucessfully predict what integer a given MNIST image is.


# Import the relevant packages

In [1]:
#Standard ML libraries
import numpy as np
import tensorflow as tf
#Use the tf version of the MNIST dataset via the tfds module
import tensorflow_datasets as tfds

# Data
#### Staging and PreProcessing

In [42]:
#Stage the data w/two associated arguments: with_info and as_supervised 
#with_info = True, attaches a tuple to the table that contains metadata about this version of MNIST dataset
#as_supervised = True, loads data for supervised learning (inputs, targets) target stucture
#load the actual data to mnist_dataset, load th metadata to mnist_info
mnist_dataset, mnist_info = tfds.load(name = 'mnist', with_info = True, as_supervised = True)

#assign training and test data to mnist_train/mnist_test w/ built in ['train']/['test']
#just like pandas dataframes
mnist_train, mnist_test = mnist_dataset['train'], mnist_dataset['test']

#there is no reference to validation datasets in the MNIST tf dataset 
#we need to make one

#count 10% of the training data
num_validation_samples = 0.1*mnist_info.splits['train'].num_examples
#make sure that number is an integer
num_validation_samples = tf.cast(num_validation_samples, tf.int64)

#count the test data
num_test_samples = mnist_info.splits['test'].num_examples
#make sure that count is a integer
num_test_samples = tf.cast(num_test_samples, tf.int64)

#now it is time to standardize our data
#write scale func, w/ param (image, label) --> this is for data.map(func) method
#.map() method -- takes a func that scales images/labels and returns images/labels
#.map() will then apply that transformation scaling embedded in a function to the whole dataset

#how to best scale: scale for numerical stability
#to ensure nice and easy calculations, we want to transform all data to be b/t 0<->1
def scale(image, label):
#make sure every number is a float for below calculations
    image = tf.cast(image, tf.float32)
#divide each image element by 255, to ensure all elements are transformed to be 0<->1; 
#255 b/c there are 0-255 shades of grey(rgb)
    image /= 255.
    return image, label

#apply .map() method w/scale func to mnist_train dataset == assign it to a new variable
scaled_train_and_validation_dataset = mnist_train.map(scale)
#apply .map() method w/scale func to mnist_test dataset == assign it to a new variable
test_data = mnist_test.map(scale)

#now that our data is scaled, we want to shuffle it
#use .shuffle() method to shuffle dataset
#assign buffer size so that we don't overload computer memory
BUFFER_SIZE = 10000 
#shuffle the train and validation data
shuffled_train_and_validation_dataset = scaled_train_and_validation_dataset.shuffle(BUFFER_SIZE)
#use .take() method to take shuffled train data and so that we can use it as validating data
validation_data = shuffled_train_and_validation_dataset.take(num_validation_samples)
#rename the renaming 90% of the shuffled train data to easier variable name to type
train_data = shuffled_train_and_validation_dataset.skip(num_validation_samples)

#determine a batch size such that we can slowly work our way through training data
BATCH_SIZE = 100

#assign batch size
train_data = train_data.batch(BATCH_SIZE)
#assign the batch size to the the total amount of validation samples
#we dont batch validation data because, we only forward propogate validation data in  neural net
#we only need to batch train data becauswe we are backwards/forwards propogating in neural net
validation_data = validation_data.batch(num_validation_samples)
#same logic applies for test_data
test_data = test_data.batch(num_test_samples)

#use python func next(iter()) to iterate through the 'same' validation data
#assign two different variables, to deal with input tuple length
validation_inputs, validation_targets = next(iter(validation_data))

## Model

In [55]:
inputs_size = 784
output_size = 10
#use same hidden layer size for both width/depth
hidden_layer_size = 250

#build model 
model = tf.keras.Sequential([
#deifine input layer: given input tensor of rank 3, 28x28x1
#b/c this is not a CNN, we need to flatten images
#use.layers.Flatten() method that takes our tensor and flattens it to a vector, (28,28,1) = (784,)
#what we are creating is a 'feed forward' neural net
        tf.keras.layers.Flatten(input_shape = (28,28,1)),
#tf.keras.layers.Dense() is essentially = output = activation_func(dot(input,weight) + bias)
        tf.keras.layers.Dense(hidden_layer_size, activation = 'relu'),
        tf.keras.layers.Dense(hidden_layer_size, activation = 'relu'),
#do the same dot product using .layers.Dense(), except set the number of outputs
#use 'softmax' activation func so that we get the properly size output tensor
        tf.keras.layers.Dense(output_size, activation = 'softmax')  
        ])



## Optimizer/Optimization Algo

In [56]:
#model.compile() method makes this step simple
#optimizer = 'adam' best optimization algorithim for this kind of classification
#loss func = we want one-hot encoding, in tf the correct func is sparse_categorical_crossentropy
model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics =['accuracy'])

# Train the model

In [52]:
NUM_EPOCHS = 5

#use .fit() method to fit the model with inputs, epoch#, and validationo data
model.fit(train_data, 
          epochs = NUM_EPOCHS, 
          validation_data = (validation_inputs, validation_targets), 
          verbose = 2)


Epoch 1/5
540/540 - 3s - loss: 0.2534 - accuracy: 0.9259 - val_loss: 0.1147 - val_accuracy: 0.9642
Epoch 2/5
540/540 - 3s - loss: 0.0963 - accuracy: 0.9705 - val_loss: 0.0742 - val_accuracy: 0.9777
Epoch 3/5
540/540 - 3s - loss: 0.0662 - accuracy: 0.9794 - val_loss: 0.0495 - val_accuracy: 0.9848
Epoch 4/5
540/540 - 3s - loss: 0.0449 - accuracy: 0.9864 - val_loss: 0.0380 - val_accuracy: 0.9890
Epoch 5/5
540/540 - 3s - loss: 0.0362 - accuracy: 0.9889 - val_loss: 0.0306 - val_accuracy: 0.9913


<tensorflow.python.keras.callbacks.History at 0x22fef771220>

## Test the model

In [53]:
test_loss, test_accuracy = model.evaluate(test_data)



In [54]:
print('Test loss:{0:.2f}   Test Accuracy:{1:.2f}%'.format(test_loss, test_accuracy*100))

Test loss:0.08   Test Accuracy:97.74%
