# Deep Neural Network for MNIST Classification

MNIST provides 70,000 images (28x28 pixels) of handwritten digits (1 digit per image).

The goal is to write a classification algorithm that detects which digit is written for the 10 digits (0, 1, 2, 3, 4, 5, 6, 7, 8, 9).



## Import the packages

In [1]:
!pip install tensorflow-datasets -q

In [2]:
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds # TensorFLow includes a data provider for MNIST

## Load and Preprocess Data


In [36]:
# tfds.load loads a dataset
# mnist_dataset = tfds.load(name='mnist', as_supervised=True)
# with_info=True provides a tuple containing information about the version, features, and number of samples
# as_supervised=True will load the dataset in a 2-tuple structure (input, target)

mnist_dataset, mnist_info = tfds.load(name='mnist', with_info=True, as_supervised=True)

In [38]:
#Extract the training and testing dataset with the built references
mnist_train, mnist_test = mnist_dataset['train'], mnist_dataset['test']

In [39]:
# Define the number of validation samples as a % of the train samples
num_validation_samples = tf.cast(0.1 * mnist_info.splits['train'].num_examples, tf.int64)

In [40]:
# Store the number of test samples in a dedicated variable
num_test_samples = tf.cast(mnist_info.splits['test'].num_examples, tf.int64)

In [41]:
# Normalize the inputs between 0 and 1
def scale(image, label):
  """
  function that scales an image and its label between 0 and 1
  takes an image and a label as inputs
  returns the image and its label with all its values between 0 and 1
  """
  # typecast to float
  image = tf.cast(image, tf.float32)

  # Divide each element by 255 to get all elements between 0 and 1
  image /= 255.

  return image, label

In [42]:
# the method .map() allows for custom transformation to a given dataset
scaled_train_and_validation_data = mnist_train.map(scale)

In [43]:
# scale and batch the test data
test_data = mnist_test.map(scale)

In [44]:
# Shuffle the data
# cannot shuffle the whole dataset in one go because it will not all fit in memory

BUFFER_SIZE = 10000
# if BUFFER_SIZE=1 => no shuffling will actually happen
# if BUFFER_SIZE >= num samples => shuffling is uniform

shuffled_train_and_validation_data = scaled_train_and_validation_data.shuffle(BUFFER_SIZE)


In [45]:
# create a batch with a batch size equal to the total number of validation samples
validation_data = shuffled_train_and_validation_data.take(num_validation_samples)

In [46]:
# the train_data is everything else
train_data = shuffled_train_and_validation_data.skip(num_validation_samples)

In [47]:
# determine the batch size
BATCH_SIZE = 32

In [48]:
# batch the train data
train_data = train_data.batch(BATCH_SIZE)

In [49]:
# batch the validation data
validation_data = validation_data.batch(num_validation_samples)

In [50]:
# batch the test data
test_data = test_data.batch(num_test_samples)

In [51]:
# takes next batch
# as_supervized=True for a 2-tuple structure
validation_inputs, validation_targets = next(iter(validation_data))

## Build the Model

In [52]:
# choose parameters
input_size = 784
output_size = 10
hidden_layer_size = 50

In [53]:
# define the model
model = tf.keras.Sequential([

    # the first layer (the input layer)
    tf.keras.layers.Flatten(input_shape=(28, 28, 1)), # input layer

    # tf.keras.layers.Dense is implementing: output = activation(dot(input, weight) + bias)
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'), # 1st hidden layer
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'), # 2nd hidden layer

    # the final layer is activated with softmax
    tf.keras.layers.Dense(output_size, activation='softmax') # output layer
])

### Choose the optimizer and the loss function

In [54]:
# compile the model with the adam optimizer, sparse categorical cross entropy loss function, and accuracy metric
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

### Training


In [55]:
# determine the maximum number of epochs
NUM_EPOCHS = 10

In [56]:
# train the model
model.fit(train_data, validation_data=(validation_inputs, validation_targets), epochs=NUM_EPOCHS, verbose = 2)

Epoch 1/10
1688/1688 - 10s - 6ms/step - accuracy: 0.9130 - loss: 0.3031 - val_accuracy: 0.9488 - val_loss: 0.1666
Epoch 2/10
1688/1688 - 5s - 3ms/step - accuracy: 0.9586 - loss: 0.1383 - val_accuracy: 0.9652 - val_loss: 0.1184
Epoch 3/10
1688/1688 - 4s - 3ms/step - accuracy: 0.9690 - loss: 0.1030 - val_accuracy: 0.9713 - val_loss: 0.0920
Epoch 4/10
1688/1688 - 4s - 2ms/step - accuracy: 0.9741 - loss: 0.0849 - val_accuracy: 0.9743 - val_loss: 0.0902
Epoch 5/10
1688/1688 - 5s - 3ms/step - accuracy: 0.9776 - loss: 0.0720 - val_accuracy: 0.9778 - val_loss: 0.0780
Epoch 6/10
1688/1688 - 4s - 2ms/step - accuracy: 0.9807 - loss: 0.0612 - val_accuracy: 0.9792 - val_loss: 0.0702
Epoch 7/10
1688/1688 - 4s - 2ms/step - accuracy: 0.9823 - loss: 0.0551 - val_accuracy: 0.9827 - val_loss: 0.0629
Epoch 8/10
1688/1688 - 5s - 3ms/step - accuracy: 0.9855 - loss: 0.0473 - val_accuracy: 0.9825 - val_loss: 0.0566
Epoch 9/10
1688/1688 - 4s - 2ms/step - accuracy: 0.9870 - loss: 0.0427 - val_accuracy: 0.9855 -

<keras.src.callbacks.history.History at 0x7931560a16c0>

## Test the model


In [57]:
test_loss, test_accuracy = model.evaluate(test_data)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 933ms/step - accuracy: 0.9701 - loss: 0.1095


In [58]:
print('Test loss: {0:.2f}. Test accuracy: {1:.2f}%'.format(test_loss, test_accuracy*100.))

Test loss: 0.11. Test accuracy: 97.01%
