In [1]:
# STEP 1: Import necessary libraries
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.datasets import mnist
import numpy as np

In [None]:
# STEP 2: Load and preprocess MNIST dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# ResNet50 expects 224x224 RGB images
x_train = np.stack([x_train] * 3, axis=-1)  # Convert 1 channel to 3 channels
x_test = np.stack([x_test] * 3, axis=-1)

x_train = tf.image.resize(x_train, [224, 224])
x_test = tf.image.resize(x_test, [224, 224])

x_train = preprocess_input(x_train) # To make your input image format match what ResNet50 expects
x_test = preprocess_input(x_test)

# One-hot encode labels
y_train_cat = to_categorical(y_train, 10)
y_test_cat = to_categorical(y_test, 10)

ResNet50 expects:

* Input shape: (224, 224, 3)
* Pixel values: preprocessed in a specific way, not just raw 0–255 or 0–1 images

What does preprocess_input() do for ResNet50? When using tensorflow.keras.applications.resnet50.preprocess_input():

Step	Effect

* Subtracts the ImageNet mean RGB values	(103.939, 116.779, 123.68)
* Converts image to BGR format (not RGB)	Because original ResNet50 was trained on BGR images
* Does not scale pixels to [0, 1] like MNIST normally does	ResNet50 uses raw pixel values minus mean

In [None]:
# STEP 3: Load ResNet50 base model
base_model = ResNet50(
    include_top=False,
    input_shape=(224, 224, 3),
    weights='imagenet'
)
base_model.trainable = True  # Fine-tune entire model

In [None]:
# STEP 4: Build the full model
model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(10, activation='softmax')  # MNIST has 10 classes
])

In [None]:
# STEP 5: Compile the model
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

In [None]:
# STEP 6: Train the model
history = model.fit(
    x_train, y_train_cat,
    validation_split=0.1,
    epochs=5,
    batch_size=64
)

In [None]:
# STEP 7: Evaluate on test data
test_loss, test_acc = model.evaluate(x_test, y_test_cat, verbose=2)
print(f"\nTest Accuracy: {test_acc * 100:.2f}%")

**Coding Skip Connection**

In [5]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, BatchNormalization, Add, Activation, MaxPooling2D, Flatten, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical


# -----------------------------------------------
# Load and preprocess MNIST data
# -----------------------------------------------
(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = x_train.reshape(-1, 28, 28, 1).astype("float32") / 255.0
x_test = x_test.reshape(-1, 28, 28, 1).astype("float32") / 255.0

y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

In [9]:
# -----------------------------------------------
# Residual block definition
# -----------------------------------------------
def residual_block(inputs, filters, block_name):

    x = Conv2D(filters, (3, 3), padding='same', name=f"{block_name}_conv1")(inputs)
    x = BatchNormalization(name=f"{block_name}_bn1")(x)
    x = Activation('relu', name=f"{block_name}_act1")(x)

    x = Conv2D(filters, (3, 3), padding='same', name=f"{block_name}_conv2")(x)
    x = BatchNormalization(name=f"{block_name}_bn2")(x)

    x = Add(name=f"{block_name}_add")([x, inputs])
    x = Activation('relu', name=f"{block_name}_out")(x)

    return x

In [13]:
# -----------------------------------------------
# Build the model
# -----------------------------------------------
inputs = Input(shape=(28, 28, 1), name='input')

# -------- Block 1 --------
x = Conv2D(32, (3, 3), padding='same', name='block1_conv_init')(inputs)
x = BatchNormalization(name='block1_bn_init')(x)
x = Activation('relu', name='block1_act_init')(x)

x = residual_block(x, filters=32, block_name='block1_res')
x = MaxPooling2D(pool_size=(2, 2), name='block1_pool')(x)

# -------- Block 2 --------
x = Conv2D(64, (3, 3), padding='same', name='block2_conv_init')(x)
x = BatchNormalization(name='block2_bn_init')(x)
x = Activation('relu', name='block2_act_init')(x)

x = residual_block(x, filters=64, block_name='block2_res')
x = MaxPooling2D(pool_size=(2, 2), name='block2_pool')(x)

# -------- Output --------
x = Flatten(name='flatten')(x)
outputs = Dense(10, activation='softmax', name='output_dense')(x)

# Create model
model = Model(inputs=inputs, outputs=outputs)

model.summary(line_length=110)

In [11]:
# Compile
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [12]:
# -----------------------------------------------
# Train and evaluate
# -----------------------------------------------

model.fit(x_train, y_train, epochs=5, batch_size=128, validation_split=0.1)

test_loss, test_acc = model.evaluate(x_test, y_test)
print(f"Test accuracy: {test_acc:.4f}")

Epoch 1/5
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 38ms/step - accuracy: 0.8756 - loss: 0.4930 - val_accuracy: 0.9312 - val_loss: 0.2334
Epoch 2/5
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 15ms/step - accuracy: 0.9842 - loss: 0.0485 - val_accuracy: 0.9882 - val_loss: 0.0452
Epoch 3/5
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 15ms/step - accuracy: 0.9867 - loss: 0.0404 - val_accuracy: 0.9860 - val_loss: 0.0520
Epoch 4/5
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 15ms/step - accuracy: 0.9896 - loss: 0.0307 - val_accuracy: 0.9783 - val_loss: 0.0841
Epoch 5/5
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 15ms/step - accuracy: 0.9907 - loss: 0.0303 - val_accuracy: 0.9812 - val_loss: 0.0792
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9736 - loss: 0.0991
Test accuracy: 0.9784
