<a href="https://colab.research.google.com/github/omor-niloy/DNN-CSE4261/blob/main/Assignment-4/DNN_Assignment_4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.utils import to_categorical
import numpy as np

In [10]:
# Load and preprocess MNIST
(x_train_full, y_train_full), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train_full = x_train_full.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0
x_train_full = np.expand_dims(x_train_full, axis=-1)
x_test = np.expand_dims(x_test, axis=-1)

# One-hot encode labels
y_train_full = to_categorical(y_train_full, num_classes=10)
y_test = to_categorical(y_test, num_classes=10)

# 0.1 validation split
val_size = int(0.1 * len(x_train_full))  # 6000
x_val, y_val = x_train_full[:val_size], y_train_full[:val_size]
x_train, y_train = x_train_full[val_size:], y_train_full[val_size:]

# tf.data Datasets
batch_size = 64
train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(10000).batch(batch_size)
val_ds = tf.data.Dataset.from_tensor_slices((x_val, y_val)).batch(batch_size)
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(batch_size)

In [11]:
def create_model():
    return models.Sequential([
        layers.Flatten(input_shape=(28, 28, 1)),  # Flatten image
        layers.Dense(256, activation='relu'),
        layers.Dense(128, activation='relu'),
        layers.Dense(64, activation='relu'),
        layers.Dense(10)  # logits (no softmax)
    ])

In [12]:
model = create_model()
loss_fn = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
optimizer = tf.keras.optimizers.Adam()

  super().__init__(**kwargs)


In [13]:
def evaluate(model, dataset):
    correct = 0
    total = 0
    for x, y in dataset:
        logits = model(x, training=False)
        preds = tf.argmax(logits, axis=1)
        labels = tf.argmax(y, axis=1)
        correct += tf.reduce_sum(tf.cast(preds == labels, tf.int32))
        total += y.shape[0]
    return correct / total

In [14]:
epochs = 5
print("=== Manual Training with GradientTape ===")
for epoch in range(epochs):
    print(f"\nEpoch {epoch + 1}/{epochs}")

    train_losses = []
    train_correct = 0
    train_total = 0

    for x_batch, y_batch in train_ds:
        with tf.GradientTape() as tape:
            logits = model(x_batch, training=True)
            loss = loss_fn(y_batch, logits)
        grads = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))
        train_losses.append(loss.numpy())

        # Accuracy calculation
        preds = tf.argmax(logits, axis=1)
        true_labels = tf.argmax(y_batch, axis=1)
        train_correct += tf.reduce_sum(tf.cast(preds == true_labels, tf.int32))
        train_total += y_batch.shape[0]

    train_loss = np.mean(train_losses)
    train_accuracy = train_correct / train_total

    # Validation loop
    val_losses = []
    val_correct = 0
    val_total = 0

    for x_val_batch, y_val_batch in val_ds:
        val_logits = model(x_val_batch, training=False)
        val_loss = loss_fn(y_val_batch, val_logits)
        val_losses.append(val_loss.numpy())

        preds = tf.argmax(val_logits, axis=1)
        true_labels = tf.argmax(y_val_batch, axis=1)
        val_correct += tf.reduce_sum(tf.cast(preds == true_labels, tf.int32))
        val_total += y_val_batch.shape[0]

    val_loss = np.mean(val_losses)
    val_accuracy = val_correct / val_total

    print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.4f}, "
          f"Val Loss: {val_loss:.4f}, Val Acc: {val_accuracy:.4f}")


=== Manual Training with CategoricalCrossentropy ===

Epoch 1/5
Train Loss: 0.2547, Train Acc: 0.9261, Val Loss: 0.1148, Val Acc: 0.9652

Epoch 2/5
Train Loss: 0.0990, Train Acc: 0.9701, Val Loss: 0.0981, Val Acc: 0.9682

Epoch 3/5
Train Loss: 0.0656, Train Acc: 0.9789, Val Loss: 0.0867, Val Acc: 0.9743

Epoch 4/5
Train Loss: 0.0495, Train Acc: 0.9843, Val Loss: 0.0783, Val Acc: 0.9770

Epoch 5/5
Train Loss: 0.0378, Train Acc: 0.9876, Val Loss: 0.0813, Val Acc: 0.9790


In [15]:
acc = evaluate(model, test_ds)
print(f"\nManual Test Accuracy: {acc.numpy() * 100:.2f}%")



Manual Test Accuracy: 97.69%


In [16]:
# === Training with model.fit() ===
model = create_model()
model.compile(
    optimizer='adam',
    loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
    metrics=['accuracy']
)

In [17]:
print("\n=== Training with model.fit() ===")
model.fit(x_train, y_train, batch_size=64, epochs=5, validation_data=(x_val, y_val))

print("\n=== Evaluation with model.evaluate() ===")
test_loss, test_acc = model.evaluate(x_test, y_test, batch_size=64)
print(f"Test Accuracy: {test_acc * 100:.2f}%")


=== Training with model.fit() ===
Epoch 1/5
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - accuracy: 0.8589 - loss: 0.4672 - val_accuracy: 0.9608 - val_loss: 0.1248
Epoch 2/5
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9675 - loss: 0.1041 - val_accuracy: 0.9710 - val_loss: 0.0956
Epoch 3/5
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9794 - loss: 0.0655 - val_accuracy: 0.9730 - val_loss: 0.0888
Epoch 4/5
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9845 - loss: 0.0483 - val_accuracy: 0.9663 - val_loss: 0.1174
Epoch 5/5
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9887 - loss: 0.0337 - val_accuracy: 0.9742 - val_loss: 0.0972

=== Evaluation with model.evaluate() ===
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.9723 - loss: 0.0957
Test Accu