In [1]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical

# --- Load and preprocess MNIST ---
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Expand grayscale channel dimension
x_train = x_train[..., tf.newaxis] / 255.0
x_test = x_test[..., tf.newaxis] / 255.0

# Convert to one-hot encoding
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

# --- Define the ResNet-style CNN for MNIST ---
inputs = tf.keras.Input(shape=(28, 28, 1))

# Initial Convolution
x = layers.Conv2D(32, 3, padding='same')(inputs)
x = layers.BatchNormalization()(x)
x = layers.ReLU()(x)

# === Residual Block 1 ===
res1 = layers.Conv2D(32, 3, padding='same')(x)
res1 = layers.BatchNormalization()(res1)
res1 = layers.ReLU()(res1)
res1 = layers.Conv2D(32, 3, padding='same')(res1)
res1 = layers.BatchNormalization()(res1)
x = layers.Add()([x, res1])
x = layers.ReLU()(x)

# === Residual Block 2 (Downsample) ===
res2 = layers.Conv2D(64, 3, strides=2, padding='same')(x)
res2 = layers.BatchNormalization()(res2)
res2 = layers.ReLU()(res2)
res2 = layers.Conv2D(64, 3, padding='same')(res2)
res2 = layers.BatchNormalization()(res2)

shortcut2 = layers.Conv2D(64, 1, strides=2, padding='same')(x)
shortcut2 = layers.BatchNormalization()(shortcut2)

x = layers.Add()([res2, shortcut2])
x = layers.ReLU()(x)

# === Residual Block 3 (Same size) ===
res3 = layers.Conv2D(64, 3, padding='same')(x)
res3 = layers.BatchNormalization()(res3)
res3 = layers.ReLU()(res3)
res3 = layers.Conv2D(64, 3, padding='same')(res3)
res3 = layers.BatchNormalization()(res3)

x = layers.Add()([x, res3])
x = layers.ReLU()(x)

# === Global Average Pooling & Dense ===
x = layers.GlobalAveragePooling2D()(x)
outputs = layers.Dense(10, activation='softmax')(x)

# Create and compile the model
model = models.Model(inputs=inputs, outputs=outputs)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Print model summary
model.summary()

# --- Train and evaluate ---
model.fit(x_train, y_train, epochs=5, batch_size=64, validation_split=0.1)
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f"\n Test Accuracy: {test_acc * 100:.2f}%")


Epoch 1/5
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m244s[0m 271ms/step - accuracy: 0.8745 - loss: 0.5152 - val_accuracy: 0.9362 - val_loss: 0.2045
Epoch 2/5
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m227s[0m 269ms/step - accuracy: 0.9860 - loss: 0.0546 - val_accuracy: 0.9773 - val_loss: 0.0780
Epoch 3/5
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m231s[0m 274ms/step - accuracy: 0.9889 - loss: 0.0397 - val_accuracy: 0.9478 - val_loss: 0.1706
Epoch 4/5
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m229s[0m 271ms/step - accuracy: 0.9910 - loss: 0.0309 - val_accuracy: 0.9655 - val_loss: 0.1180
Epoch 5/5
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m222s[0m 263ms/step - accuracy: 0.9918 - loss: 0.0269 - val_accuracy: 0.9878 - val_loss: 0.0456
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 41ms/step - accuracy: 0.9838 - loss: 0.0498

🧠 Test Accuracy: 98.69%
