##### ARTI 560 - Computer Vision  
## Image Classification using Transfer Learning - Exercise 

### Objective

In this exercise, you will:

1. Select another pretrained model (e.g., VGG16, MobileNetV2, or EfficientNet) and fine-tune it for CIFAR-10 classification.  
You'll find the pretrained models in [Tensorflow Keras Applications Module](https://www.tensorflow.org/api_docs/python/tf/keras/applications).

2. Before training, inspect the architecture using model.summary() and observe:
- Network depth
- Number of parameters
- Trainable vs Frozen layers

3. Then compare its performance with ResNet and the custom CNN.

### Questions:

- Which model achieved the highest accuracy?
- Which model trained faster?
- How might the architecture explain the differences?


In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np

print("TF:", tf.__version__)
AUTOTUNE = tf.data.AUTOTUNE

# CIFAR-10
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()
y_train = y_train.squeeze()
y_test = y_test.squeeze()

NUM_CLASSES = 10
BATCH_SIZE = 64
IMG_SIZE = 224  # for ImageNet pretrained models (224x224 is standard)

# tf.data pipelines
def preprocess_basic(img, label):
    img = tf.cast(img, tf.float32)
    img = tf.image.resize(img, (IMG_SIZE, IMG_SIZE))
    return img, label

train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(50000).map(preprocess_basic, num_parallel_calls=AUTOTUNE)
test_ds  = tf.data.Dataset.from_tensor_slices((x_test, y_test)).map(preprocess_basic, num_parallel_calls=AUTOTUNE)

train_ds = train_ds.batch(BATCH_SIZE).prefetch(AUTOTUNE)
test_ds  = test_ds.batch(BATCH_SIZE).prefetch(AUTOTUNE)

TF: 2.20.0


  d = cPickle.load(f, encoding="bytes")


In [2]:
def print_model_stats(model, name="model"):
    total_params = model.count_params()
    trainable_params = np.sum([np.prod(v.shape) for v in model.trainable_weights])
    non_trainable_params = np.sum([np.prod(v.shape) for v in model.non_trainable_weights])
    depth = len(model.layers)
    print(f"\n=== {name} Stats ===")
    print("Depth (num layers):", depth)
    print("Total params:", total_params)
    print("Trainable params:", int(trainable_params))
    print("Non-trainable params:", int(non_trainable_params))

In [3]:
import tensorflow as tf
keras = tf.keras
layers = tf.keras.layers

IMG_SIZE = 224
NUM_CLASSES = 10

inputs = keras.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
x = tf.keras.applications.mobilenet_v2.preprocess_input(inputs)

base = tf.keras.applications.MobileNetV2(
    input_shape=(IMG_SIZE, IMG_SIZE, 3),
    include_top=False,
    weights="imagenet"
)
base.trainable = False

x = base(x, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dropout(0.2)(x)
outputs = layers.Dense(NUM_CLASSES, activation="softmax")(x)

model = keras.Model(inputs, outputs)
model.summary()

In [5]:
import tensorflow as tf
keras = tf.keras
layers = tf.keras.layers

IMG_SIZE = 224
NUM_CLASSES = 10

# Build model
inputs = keras.Input(shape=(IMG_SIZE, IMG_SIZE, 3))

x = tf.keras.applications.mobilenet_v2.preprocess_input(inputs)

base = tf.keras.applications.MobileNetV2(
    input_shape=(IMG_SIZE, IMG_SIZE, 3),
    include_top=False,
    weights="imagenet"
)
base.trainable = False

x = base(x, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dropout(0.2)(x)
outputs = layers.Dense(NUM_CLASSES, activation="softmax")(x)

mobilenet_model = keras.Model(inputs, outputs)

mobilenet_model.summary()

In [8]:
# Define callbacks
callbacks = [
    tf.keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True),
    tf.keras.callbacks.ReduceLROnPlateau(patience=2, factor=0.2)
]

# Compile
mobilenet_model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-5),
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

# Train
hist2 = mobilenet_model.fit(
    train_ds,
    validation_data=test_ds,
    epochs=10,
    callbacks=callbacks
)

Epoch 1/10


[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m533s[0m 669ms/step - accuracy: 0.6567 - loss: 1.0399 - val_accuracy: 0.7823 - val_loss: 0.6361 - learning_rate: 1.0000e-05
Epoch 2/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m657s[0m 840ms/step - accuracy: 0.8401 - loss: 0.4754 - val_accuracy: 0.8403 - val_loss: 0.4633 - learning_rate: 1.0000e-05
Epoch 3/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m663s[0m 847ms/step - accuracy: 0.8726 - loss: 0.3746 - val_accuracy: 0.8662 - val_loss: 0.3859 - learning_rate: 1.0000e-05
Epoch 4/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m597s[0m 763ms/step - accuracy: 0.8917 - loss: 0.3148 - val_accuracy: 0.8812 - val_loss: 0.3407 - learning_rate: 1.0000e-05
Epoch 5/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m584s[0m 747ms/step - accuracy: 0.9075 - loss: 0.2718 - val_accuracy: 0.8903 - val_loss: 0.3190 - learning_rate: 1.0000e-05
Epoch 6/10
[1m782/782[0m [32m━━━━━━━━━

In [11]:
import tensorflow as tf

# Make sure we use tf.keras consistently
keras = tf.keras
layers = tf.keras.layers

# Define augmentation (fixes: NameError data_augmentation)
data_augmentation = keras.Sequential(
    [
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(0.05),
        layers.RandomZoom(0.1),
    ],
    name="aug",
)

# Define callbacks (fixes: NameError callbacks)
callbacks = [
    keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True),
    keras.callbacks.ReduceLROnPlateau(patience=2, factor=0.2),
]

# ---- Build ResNet50 transfer model ----
res_base = keras.applications.ResNet50(
    input_shape=(IMG_SIZE, IMG_SIZE, 3),
    include_top=False,
    weights="imagenet",
)
res_base.trainable = False  # Phase 1: freeze backbone

inputs = keras.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
x = data_augmentation(inputs)
x = keras.applications.resnet.preprocess_input(x)  # IMPORTANT for ResNet
x = res_base(x, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dropout(0.3)(x)
outputs = layers.Dense(NUM_CLASSES, activation="softmax")(x)

resnet_model = keras.Model(inputs, outputs, name="cifar10_resnet50")

resnet_model.summary()
print_model_stats(resnet_model, "ResNet50 (frozen base)")

# ---- Train Phase 1 ----
resnet_model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=1e-3),
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"],
)

hist_r1 = resnet_model.fit(
    train_ds,
    validation_data=test_ds,
    epochs=8,
    callbacks=callbacks,
)

# ---- Fine-tune Phase 2 ----
res_base.trainable = True
for layer in res_base.layers[:-30]:  # fine-tune last ~30 layers
    layer.trainable = False

resnet_model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=1e-5),
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"],
)

print_model_stats(resnet_model, "ResNet50 (fine-tuning)")

hist_r2 = resnet_model.fit(
    train_ds,
    validation_data=test_ds,
    epochs=8,
    callbacks=callbacks,
)

# ---- Evaluate ----
res_loss, res_acc = resnet_model.evaluate(test_ds, verbose=0)
print("ResNet50 Test Acc:", res_acc)


=== ResNet50 (frozen base) Stats ===
Depth (num layers): 6
Total params: 23608202
Trainable params: 20490
Non-trainable params: 23587712
Epoch 1/8
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1828s[0m 2s/step - accuracy: 0.8089 - loss: 0.5564 - val_accuracy: 0.8868 - val_loss: 0.3324 - learning_rate: 0.0010
Epoch 2/8
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1516s[0m 2s/step - accuracy: 0.8542 - loss: 0.4187 - val_accuracy: 0.8899 - val_loss: 0.3218 - learning_rate: 0.0010
Epoch 3/8
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1519s[0m 2s/step - accuracy: 0.8662 - loss: 0.3924 - val_accuracy: 0.8961 - val_loss: 0.3101 - learning_rate: 0.0010
Epoch 4/8
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1516s[0m 2s/step - accuracy: 0.8667 - loss: 0.3894 - val_accuracy: 0.9015 - val_loss: 0.2965 - learning_rate: 0.0010
Epoch 5/8
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1497s[0m 2s/step - accuracy: 0.8691 - loss: 

In [10]:
def make_custom_cnn():
    inputs = keras.Input(shape=(32, 32, 3))
    x = layers.Rescaling(1./255)(inputs)

    # simple but decent CNN
    for filters in [32, 64, 128]:
        x = layers.Conv2D(filters, 3, padding="same", activation="relu")(x)
        x = layers.Conv2D(filters, 3, padding="same", activation="relu")(x)
        x = layers.MaxPooling2D()(x)
        x = layers.Dropout(0.25)(x)

    x = layers.Flatten()(x)
    x = layers.Dense(256, activation="relu")(x)
    x = layers.Dropout(0.5)(x)
    outputs = layers.Dense(NUM_CLASSES, activation="softmax")(x)
    return keras.Model(inputs, outputs, name="custom_cnn")

cnn_model = make_custom_cnn()
cnn_model.summary()
print_model_stats(cnn_model, "Custom CNN")

cnn_model.compile(
    optimizer=keras.optimizers.Adam(1e-3),
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

# For custom CNN, use original 32x32 images (no resize needed)
train_ds_cnn = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(50000).batch(BATCH_SIZE).prefetch(AUTOTUNE)
test_ds_cnn  = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(BATCH_SIZE).prefetch(AUTOTUNE)

hist_cnn = cnn_model.fit(
    train_ds_cnn,
    validation_data=test_ds_cnn,
    epochs=20,
    callbacks=callbacks
)

cnn_loss, cnn_acc = cnn_model.evaluate(test_ds_cnn, verbose=0)
print("Custom CNN Test Acc:", cnn_acc)


=== Custom CNN Stats ===
Depth (num layers): 18
Total params: 814122
Trainable params: 814122
Non-trainable params: 0
Epoch 1/20
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 36ms/step - accuracy: 0.3567 - loss: 1.7287 - val_accuracy: 0.5162 - val_loss: 1.3307 - learning_rate: 0.0010
Epoch 2/20
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 73ms/step - accuracy: 0.5358 - loss: 1.2916 - val_accuracy: 0.6257 - val_loss: 1.0437 - learning_rate: 0.0010
Epoch 3/20
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 68ms/step - accuracy: 0.6241 - loss: 1.0520 - val_accuracy: 0.6611 - val_loss: 0.9512 - learning_rate: 2.0000e-04
Custom CNN Test Acc: 0.5162000060081482


In [13]:
def show_result(name, var_name):
    if var_name in globals() and globals()[var_name] is not None:
        print(f"{name}: {globals()[var_name]:.4f}")
    else:
        print(f"{name}: NOT AVAILABLE (run training/evaluate cell to create `{var_name}`)")

print("\n=== FINAL RESULTS (Test Accuracy) ===")
show_result("MobileNetV2 (after fine-tune)", "test_acc2")
show_result("ResNet50    (after fine-tune)", "res_acc")
show_result("Custom CNN", "cnn_acc")


=== FINAL RESULTS (Test Accuracy) ===
MobileNetV2 (after fine-tune): NOT AVAILABLE (run training/evaluate cell to create `test_acc2`)
ResNet50    (after fine-tune): 0.9437
Custom CNN: 0.5162
