# Import necessary modules

In [113]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical

# Prepare dataset

In [114]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train/255.0, x_test/255.0
x_train = x_train.reshape(-1, 784)
x_test = x_test.reshape(-1, 784)
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

In [115]:
x_train.shape, y_train.shape, x_test.shape, y_test.shape

((60000, 784), (60000, 10), (10000, 784), (10000, 10))

# Build the model

In [116]:
class MLP(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.d1 = tf.keras.layers.Dense(8, activation='relu')
        self.d2 = tf.keras.layers.Dense(4, activation='relu')
        self.d3 = tf.keras.layers.Dense(4, activation='relu')
        self.out = tf.keras.layers.Dense(10, activation='softmax')

    def call(self, x):
        x = self.d1(x)
        x = self.d2(x)
        x = self.d3(x)
        return self.out(x)
    
gradient_tape_model = MLP()

# Loss function and optimizer

In [117]:
loss_fn = tf.keras.losses.CategoricalCrossentropy(from_logits=False)
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)

# Training loop (using gradient tape)

In [118]:
EPOCHS = 5
BATCH_SIZE = 128

train_test_split = int(0.8 * len(x_train))
trainX = x_train[0:train_test_split]
trainY = y_train[0:train_test_split]
valX = x_train[train_test_split:]
valY = y_train[train_test_split:]
# Create training dataset
train_ds = tf.data.Dataset.from_tensor_slices((trainX, trainY))
train_ds = train_ds.shuffle(buffer_size=10000).batch(BATCH_SIZE)

# Create validation dataset
val_ds = tf.data.Dataset.from_tensor_slices((valX, valY))
val_ds = val_ds.batch(BATCH_SIZE)

for epoch in range(EPOCHS):
    # Training loop
    for x_batch, y_batch in train_ds:
        with tf.GradientTape() as tape:
            probs = gradient_tape_model(x_batch)
            loss = loss_fn(y_batch, probs)
        grads = tape.gradient(loss, gradient_tape_model.trainable_variables)
        optimizer.apply_gradients(zip(grads, gradient_tape_model.trainable_variables))

    # Validation loop
    val_losses = []
    val_accuracy = tf.keras.metrics.CategoricalAccuracy()

    for x_val_batch, y_val_batch in val_ds:
        val_probs = gradient_tape_model(x_val_batch)
        val_loss = loss_fn(y_val_batch, val_probs)
        val_losses.append(val_loss.numpy())
        val_accuracy.update_state(y_val_batch, val_probs)

    val_loss_avg = sum(val_losses) / len(val_losses)

    print(f"Epoch {epoch+1}: Training Loss = {loss.numpy():.4f}, "
          f"Validation Loss = {val_loss_avg:.4f}, "
          f"Validation Accuracy = {val_accuracy.result().numpy():.4f}")

Epoch 1: Training Loss = 0.5580, Validation Loss = 0.4668, Validation Accuracy = 0.8665
Epoch 2: Training Loss = 0.6198, Validation Loss = 0.4542, Validation Accuracy = 0.8717
Epoch 3: Training Loss = 0.3552, Validation Loss = 0.4032, Validation Accuracy = 0.8862
Epoch 4: Training Loss = 0.3718, Validation Loss = 0.4021, Validation Accuracy = 0.8849
Epoch 5: Training Loss = 0.5964, Validation Loss = 0.4028, Validation Accuracy = 0.8848


# Training using model.fit()

In [119]:
# Instantiate and compile the model
keras_fit_model = MLP()
keras_fit_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.01),
    loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),
    metrics=['accuracy']
)

# Train using fit
keras_fit_model.fit(x_train, y_train, batch_size=128, epochs=5, validation_split=0.2, shuffle=False)


Epoch 1/5
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 13ms/step - accuracy: 0.5525 - loss: 1.3005 - val_accuracy: 0.8176 - val_loss: 0.6090
Epoch 2/5
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 11ms/step - accuracy: 0.8248 - loss: 0.5948 - val_accuracy: 0.8478 - val_loss: 0.5207
Epoch 3/5
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 11ms/step - accuracy: 0.8519 - loss: 0.5118 - val_accuracy: 0.8611 - val_loss: 0.4828
Epoch 4/5
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 12ms/step - accuracy: 0.8628 - loss: 0.4773 - val_accuracy: 0.8668 - val_loss: 0.4674
Epoch 5/5
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 11ms/step - accuracy: 0.8699 - loss: 0.4589 - val_accuracy: 0.8712 - val_loss: 0.4567


<keras.src.callbacks.history.History at 0x77ef8d735060>

# Model evaluation function

In [120]:
def evaluate(model, x_test):
    y_pred_prob = model(x_test)
    y_pred = tf.argmax(y_pred_prob, axis=1)
    y_true = tf.argmax(y_test, axis=1)

    acc = tf.reduce_mean(tf.cast(tf.equal(y_pred, y_true), tf.float32))
    return acc

In [137]:
print(f"Gradient Tape model accuracy: {evaluate(gradient_tape_model, x_test) * 100:.2f}%")
print(f"Keras fit model accuracy: {evaluate(keras_fit_model, x_test) * 100:.2f}%")

Gradient Tape model accuracy: 87.81%
Keras fit model accuracy: 87.57%
