In [14]:
import tensorflow as tf
from tensorflow.keras import Input, Model
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.datasets.mnist import load_data
from tensorflow.keras.utils import to_categorical
import numpy as np

Load & Preprocess dataset

In [15]:
(x_train, y_train), (x_test, y_test) = load_data()

x_train, x_test = x_train / 255.0, x_test / 255
y_train, y_test = to_categorical(y_train, num_classes=10), to_categorical(y_test, num_classes=10)

In [16]:
def create_model():
    inputs = Input(shape=(28, 28))
    x = Flatten()(inputs)
    x = Dense(128, activation='relu')(x)
    x = Dense(64, activation='relu')(x)
    x = Dense(32, activation='relu')(x)
    outputs = Dense(10, activation='softmax')(x)
    model = Model(inputs=inputs, outputs=outputs)
    return model

In [17]:
model = create_model()
loss_fn = CategoricalCrossentropy(from_logits=True)
optimizer = tf.keras.optimizers.Adam()

In [18]:
@tf.function
def train_step(model, images, labels):
    with tf.GradientTape() as tape:
        predictions = model(images, training=True)
        loss = loss_fn(labels, predictions)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    return loss

train model with gradient tape

In [19]:
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(60000).batch(32)

for epoch in range(5):
    epoch_loss = tf.keras.metrics.Mean()
    for images, labels in train_dataset:
        loss = train_step(model, images, labels)
        epoch_loss.update_state(loss)
    print(f'Epoch {epoch + 1}, Loss: {epoch_loss.result().numpy():.4f}')

  output, from_logits = _get_logits(
2025-06-22 20:39:05.588776: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Epoch 1, Loss: 0.2470
Epoch 2, Loss: 0.1046
Epoch 3, Loss: 0.0752
Epoch 4, Loss: 0.0582
Epoch 5, Loss: 0.0457


Evaluate model


In [20]:
test_accuracy = tf.keras.metrics.CategoricalAccuracy()

test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32)

for images, labels in test_dataset:
    predictions = model(images, training=False)
    test_accuracy.update_state(labels, predictions)

print(f'\nGradientTape Final Test Accuracy: {test_accuracy.result().numpy():.4f}')


GradientTape Final Test Accuracy: 0.9769


Training using model.fit()

In [21]:
model2 = create_model()
model2.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

history = model2.fit(x_train, y_train, epochs=5, batch_size=32, validation_data=(x_test, y_test))

Epoch 1/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.8569 - loss: 0.4740 - val_accuracy: 0.9587 - val_loss: 0.1333
Epoch 2/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9671 - loss: 0.1147 - val_accuracy: 0.9684 - val_loss: 0.1053
Epoch 3/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9769 - loss: 0.0772 - val_accuracy: 0.9716 - val_loss: 0.0853
Epoch 4/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9812 - loss: 0.0605 - val_accuracy: 0.9759 - val_loss: 0.0790
Epoch 5/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9854 - loss: 0.0455 - val_accuracy: 0.9766 - val_loss: 0.0754


Comparision

In [22]:
print("\n--- Final Performance Comparison ---")
print(f"GradientTape Final Test Accuracy: {test_accuracy.result().numpy():.4f}")
fit_test_accuracy = history.history['val_accuracy'][-1]
print(f"model.fit() Final Test Accuracy:    {fit_test_accuracy:.4f}")


--- Final Performance Comparison ---
GradientTape Final Test Accuracy: 0.9769
model.fit() Final Test Accuracy:    0.9766
