# Importing important modules

In [9]:
import tensorflow as tf
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt

# Loading the MNIST dataset

Loading the train dataset

In [2]:
# Load the MNIST dataset from google for handwritten digits recognisition
mnist = tfds.load("mnist", split="train", as_supervised=True)
mnist = mnist.map(lambda x, y: (tf.image.convert_image_dtype(x, tf.float32), tf.one_hot(y, depth=10)))
mnist = mnist.shuffle(buffer_size=60000).batch(64).prefetch(buffer_size=tf.data.AUTOTUNE)

Downloading and preparing dataset 11.06 MiB (download: 11.06 MiB, generated: 21.00 MiB, total: 32.06 MiB) to /root/tensorflow_datasets/mnist/3.0.1...


Dl Completed...:   0%|          | 0/5 [00:00<?, ? file/s]

Dataset mnist downloaded and prepared to /root/tensorflow_datasets/mnist/3.0.1. Subsequent calls will reuse this data.


Loading the test dataset



In [3]:
# Evaluating the model
mnist_test = tfds.load('mnist', split="test", as_supervised=True)
mnist_test = mnist_test.map(lambda x, y: (tf.image.convert_image_dtype(x, tf.float32), tf.one_hot(y, depth=10)))
mnist_test = mnist_test.batch(64).prefetch(buffer_size=tf.data.AUTOTUNE)

Defining the CNN model

In [4]:
# Define the model using relu activation function and softmax activation
# Softmax activation is commonly used for the output layer in multi-class classification tasks.
# Data set is digit recognisition
model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28, 1)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')
])

# Dynamic focal loss function

In [5]:
def dynamic_focal_loss(y_true, y_pred, alpha=0.25):
    # Compute the binary cross-entropy loss
    binary_cross_entropy = tf.losses.binary_crossentropy(y_true, y_pred, from_logits=False)

    p_t = tf.math.exp(-binary_cross_entropy)

    alpha = 0.25

    # Calculate the difficulty as the difference between the predicted probability of the true class
    # and the maximum predicted probability across all classes
    max_pred_prob = tf.reduce_max(y_pred, axis=1)
    true_class_prob = tf.reduce_sum(y_true * y_pred, axis=1)
    difficulty = tf.abs(true_class_prob - max_pred_prob)

    # Calculate the adaptive gamma value as the difficulty
    gamma = 1/((difficulty**2)+1)

    # Calculate the focal loss
    focal_loss = alpha * (1 - p_t) ** gamma * binary_cross_entropy

    return focal_loss

Training the model

In [11]:
# Compiling the model with the Focal Loss
model.compile(optimizer='adam', loss=dynamic_focal_loss, metrics=['accuracy'])

# Training the model
history = model.fit(mnist, epochs=30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


Evaluating the model

In [12]:
loss, accuracy = model.evaluate(mnist_test)

print(f"Test Loss: {loss}, Test Accuracy: {accuracy}")


Test Loss: nan, Test Accuracy: 0.09799999743700027
