***import relevant packages***

In [1]:
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds

***load data***

In [2]:
#loading dataset as a 2-tuple structure [input, target]
#using with info to provide a tuple containing info about version, features, n samples of the dataset
dataset, info = tfds.load(name='mnist', as_supervised=True, with_info=True)
info

[1mDownloading and preparing dataset 11.06 MiB (download: 11.06 MiB, generated: 21.00 MiB, total: 32.06 MiB) to /root/tensorflow_datasets/mnist/3.0.1...[0m


Dl Completed...:   0%|          | 0/5 [00:00<?, ? file/s]

[1mDataset mnist downloaded and prepared to /root/tensorflow_datasets/mnist/3.0.1. Subsequent calls will reuse this data.[0m


tfds.core.DatasetInfo(
    name='mnist',
    full_name='mnist/3.0.1',
    description="""
    The MNIST database of handwritten digits.
    """,
    homepage='http://yann.lecun.com/exdb/mnist/',
    data_dir='/root/tensorflow_datasets/mnist/incomplete.7B777S_3.0.1/',
    file_format=tfrecord,
    download_size=11.06 MiB,
    dataset_size=21.00 MiB,
    features=FeaturesDict({
        'image': Image(shape=(28, 28, 1), dtype=uint8),
        'label': ClassLabel(shape=(), dtype=int64, num_classes=10),
    }),
    supervised_keys=('image', 'label'),
    disable_shuffling=False,
    splits={
        'test': <SplitInfo num_examples=10000, num_shards=1>,
        'train': <SplitInfo num_examples=60000, num_shards=1>,
    },
    citation="""@article{lecun2010mnist,
      title={MNIST handwritten digit database},
      author={LeCun, Yann and Cortes, Corinna and Burges, CJ},
      journal={ATT Labs [Online]. Available: http://yann.lecun.com/exdb/mnist},
      volume={2},
      year={2010}
    }""

In [3]:
train, test = dataset['train'], dataset['test']

#take a 10% of the training dataset to serve as validation
n_validation_samp = tf.cast(0.1 * info.splits['train'].num_examples, tf.int64)
n_test_samp = tf.cast(info.splits['test'].num_examples, tf.int64)
n_test_samp

<tf.Tensor: shape=(), dtype=int64, numpy=10000>

**Scale Data**

In [4]:
#making results more numerically stable
def scale(image, label):
    image = tf.cast(image, tf.float32)
    image /= 255.0
    return image, label
scl_train = train.map(scale)
scl_test = test.map(scale)

In [5]:
#shuffle data before splitting validation from train to make sure each batch is varient
BUFFER_SIZE = 10000 #optimizing the computational power
suffled_data = scl_train.shuffle(BUFFER_SIZE)
validation_data = suffled_data.take(n_validation_samp)
train_data = suffled_data.skip(n_validation_samp)


***Mini-Batch Gradient Descent Setup***

In [6]:
# dividing the data into smaller chunks
BATCH_SIZE = 128
train_data = train_data.batch(BATCH_SIZE)
# even as single-batch datasets 
# we need to ensure these datasets are represented in the same format as train_data
validation_data = validation_data.batch(n_validation_samp)
test_data = scl_test.batch(n_test_samp)

In [7]:
# extract a batch of validation inputs and target for evaluation.
validation_inputs, validation_targets = next(iter(validation_data))

### Model
***Outline the model***

In [8]:
input_size = 28 * 28
output_size = 10
width1 = 128
width2 = 64

In [9]:
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(28, 28, 1)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(width1, activation='relu'),
    tf.keras.layers.Dense(width2, activation='relu'),
    tf.keras.layers.Dense(output_size, activation='softmax')
])

**choose the optimizer and the loss function**

In [10]:
model.compile(
    optimizer='Adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

### Training

**Monitoring setup to avoid overfitting && Stops automatically when no improvement**

In [11]:
from tensorflow.keras.callbacks import (
    EarlyStopping,
    ReduceLROnPlateau,
    TensorBoard,
    ModelCheckpoint
)

callbacks = [
    EarlyStopping(
        monitor='val_loss',
        patience=5,
        min_delta=0.001,
        mode='min',
        restore_best_weights=True,
        verbose=1
),
    ModelCheckpoint(
        'best_model.keras',
        monitor='val_loss',
        save_best_only=True,
        mode='min',
        verbose=1
    ),
    TensorBoard(log_dir='./logs')
]

In [12]:
NUM_EPOCHS = 24
model.fit(
    train_data, 
    epochs= NUM_EPOCHS, 
    validation_data=(validation_inputs, validation_targets),
    callbacks=[callbacks]
)

Epoch 1/24
[1m414/422[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 8ms/step - accuracy: 0.8249 - loss: 0.6073
Epoch 1: val_loss improved from inf to 0.18121, saving model to best_model.keras
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 9ms/step - accuracy: 0.8266 - loss: 0.6016 - val_accuracy: 0.9498 - val_loss: 0.1812
Epoch 2/24
[1m418/422[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - accuracy: 0.9556 - loss: 0.1485
Epoch 2: val_loss improved from 0.18121 to 0.12025, saving model to best_model.keras
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.9556 - loss: 0.1484 - val_accuracy: 0.9662 - val_loss: 0.1202
Epoch 3/24
[1m417/422[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - accuracy: 0.9696 - loss: 0.1010
Epoch 3: val_loss improved from 0.12025 to 0.09261, saving model to best_model.keras
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 

<keras.src.callbacks.history.History at 0x7dbeb5f710c0>

In [13]:
test_loss, test_accuracy = model.evaluate(test_data)
print('Test loss: {0:.2f}. Test accuracy: {1:.2f}%'.format(test_loss, test_accuracy*100.))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 590ms/step - accuracy: 0.9781 - loss: 0.1044
Test loss: 0.10. Test accuracy: 97.81%
