### Compare  tf.GradientTape() vs Tensorflow's model.fit()

In [1]:
import tensorflow as tf
import numpy as np

In [4]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

x_train = x_train.astype('float32') / 255.0
x_test  = x_test.astype('float32') / 255.0
x_train = x_train.reshape(-1, 28 * 28)
x_test  = x_test.reshape(-1, 28 * 28)

y_train_cat = tf.keras.utils.to_categorical(y_train, 10)
y_test_cat  = tf.keras.utils.to_categorical(y_test, 10)

In [5]:
def create_model():
    return tf.keras.Sequential([
        tf.keras.layers.Dense(128, activation='relu', input_shape=(784,)),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(10)  # logits (softmax applied later)
    ])

In [6]:
# Custom training with tf.GradientTape 
model_tape = create_model()
loss_fn = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
optimizer = tf.keras.optimizers.Adam()
batch_size = 64
train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train_cat)).shuffle(1024).batch(batch_size)

# Training loop
epochs = 5
for epoch in range(epochs):
    epoch_loss = []
    for x_batch, y_batch in train_ds:
        with tf.GradientTape() as tape:
            logits = model_tape(x_batch)
            loss = loss_fn(y_batch, logits)
        grads = tape.gradient(loss, model_tape.trainable_variables)
        optimizer.apply_gradients(zip(grads, model_tape.trainable_variables))
        epoch_loss.append(loss.numpy())

    print(f"[GradientTape] Epoch {epoch+1}, Loss: {np.mean(epoch_loss):.4f}")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2025-06-21 22:28:57.656362: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


[GradientTape] Epoch 1, Loss: 0.2749


2025-06-21 22:29:11.718133: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


[GradientTape] Epoch 2, Loss: 0.1173
[GradientTape] Epoch 3, Loss: 0.0820


2025-06-21 22:29:39.881206: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


[GradientTape] Epoch 4, Loss: 0.0590
[GradientTape] Epoch 5, Loss: 0.0457


In [7]:
# Evaluate
logits = model_tape(x_test)
accuracy = tf.keras.metrics.categorical_accuracy(y_test_cat, logits)
print(f"[GradientTape] Test Accuracy: {np.mean(accuracy):.4f}")

[GradientTape] Test Accuracy: 0.9757


In [8]:
# Using model.fit 
model_fit = create_model()
model_fit.compile(optimizer='adam',
                  loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
                  metrics=['accuracy'])

model_fit.fit(x_train, y_train_cat, epochs=5, batch_size=64, verbose=1)
loss, acc = model_fit.evaluate(x_test, y_test_cat, verbose=0)
print(f"[model.fit] Test Accuracy: {acc:.4f}")


Epoch 1/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.8502 - loss: 0.5130
Epoch 2/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9642 - loss: 0.1200
Epoch 3/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9764 - loss: 0.0776
Epoch 4/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9821 - loss: 0.0564
Epoch 5/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9873 - loss: 0.0408
[model.fit] Test Accuracy: 0.9787
