# Deep Neural Network MNIST Classification

## Import Libraries

In [1]:
# pip install tensorflow_datasets

In [2]:
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds

## Load Dataset and Data Preprocessing

In [3]:
# Load MNIST dataset including MNIST info
data, info = tfds.load(name='mnist', with_info=True, as_supervised=True)

# Extract train and test data
mnist_train, mnist_test = data['train'], data['test']

# Extract 10% data from train dataset to crease validation dataset
# Find the number of validation samples by taking 10% of all train dataset
num_val_samples = 0.1 * info.splits['train'].num_examples

# Convert num_val_samples into integer
num_val_samples = tf.cast(num_val_samples, tf.int64)

# Find the number of test samples and convert it into integer
num_test_samples = info.splits['test'].num_examples
num_test_samples = tf.cast(num_test_samples, tf.int64)

# Scale data to make results more numerically stable like having inputs between 0 and 1
def scale(image, label):
    image = tf.cast(image, tf.float32)
    image /= 255.
    return image, label

# Use map() function to apply custom transformation to train and test datasets
scaled_train_data = mnist_train.map(scale)
scaled_test_data = mnist_test.map(scale)

# Shuffle the train dataset before creating validataion dataset
BUFFER_SIZE = 10000
shuffled_train_data = scaled_train_data.shuffle(BUFFER_SIZE)

# Extract validation dataset
validation_data = shuffled_train_data.take(num_val_samples)
train_data = shuffled_train_data.skip(num_val_samples)

In [4]:
# Use Mini-Batch Gradient Descent to train the model
# Set batch size hyperparameter
BATCH_SIZE = 100

# Use batch() method that combines the consecutive elements of a dataset into batches
train_data = train_data.batch(BATCH_SIZE)

# Since we will not back-propagating on the validation data but only forward-propagating we don't need to batch
# However model expect the validation data in the batch form too but now we use only single batch
validation_data = validation_data.batch(BATCH_SIZE)
test_data = scaled_test_data.batch(BATCH_SIZE)

# The MNIST data is iterable and in 2-tuple format since as_supervised=True
# Therefore, validation inputs and targets have to exported and converted
validataion_inputs, validataion_targets = next(iter(validation_data)) # next() load data and iter() makes it iterable

## Model

### Outline the Model

In [8]:
# Make a model with 784 inputs, 2 hidden layers with 50 nodes and 10 outputs
input_size = 784
output_size = 10
hidden_layer_size = 100

# Define a model; flatten the images into column vector
model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(28,28,1)),
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(output_size, activation='softmax')
    ])

### Choose the Optimizer and the Loss Function

In [9]:
# sparce_categorical_crossentropy loss function applies one-hot encoding
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

### Train the Model

In [10]:
NUM_EPOCHS = 5
model.fit(train_data, epochs=NUM_EPOCHS, validation_data=(validataion_inputs, validataion_targets), verbose=2)

Epoch 1/5
540/540 - 3s - loss: 0.3254 - accuracy: 0.9075 - val_loss: 0.1436 - val_accuracy: 0.9600 - 3s/epoch - 6ms/step
Epoch 2/5
540/540 - 2s - loss: 0.1372 - accuracy: 0.9596 - val_loss: 0.0980 - val_accuracy: 0.9700 - 2s/epoch - 4ms/step
Epoch 3/5
540/540 - 2s - loss: 0.0978 - accuracy: 0.9703 - val_loss: 0.1466 - val_accuracy: 0.9700 - 2s/epoch - 3ms/step
Epoch 4/5
540/540 - 2s - loss: 0.0749 - accuracy: 0.9768 - val_loss: 0.0898 - val_accuracy: 0.9700 - 2s/epoch - 3ms/step
Epoch 5/5
540/540 - 2s - loss: 0.0613 - accuracy: 0.9814 - val_loss: 0.0852 - val_accuracy: 0.9700 - 2s/epoch - 3ms/step


<keras.callbacks.History at 0x1b5927684c0>

### Test the Model

In [11]:
model.evaluate(test_data)



[0.08265190571546555, 0.9746999740600586]

Since the accuracy is very close to the validation accuracy the model did not overfit the data.