##### ARTI 560 - Computer Vision  
## Image Classification using Transfer Learning - Exercise 

### Objective

In this exercise, you will:

1. Select another pretrained model (e.g., VGG16, MobileNetV2, or EfficientNet) and fine-tune it for CIFAR-10 classification.  
You'll find the pretrained models in [Tensorflow Keras Applications Module](https://www.tensorflow.org/api_docs/python/tf/keras/applications).

2. Before training, inspect the architecture using model.summary() and observe:
- Network depth
- Number of parameters
- Trainable vs Frozen layers

3. Then compare its performance with ResNet and the custom CNN.

### Questions:

- Which model achieved the highest accuracy?
- Which model trained faster?
- How might the architecture explain the differences?

In [None]:
import time
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# =============================
# 1) Load CIFAR-10
# =============================
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()

class_names = [
    "airplane","automobile","bird","cat","deer",
    "dog","frog","horse","ship","truck"
]

y_train = y_train.squeeze().astype("int64")
y_test  = y_test.squeeze().astype("int64")

x_train = x_train.astype("float32")
x_test  = x_test.astype("float32")

# =============================
# 2) Data augmentation
# =============================
data_augmentation = keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.05),
    layers.RandomZoom(0.1),
], name="augmentation")

# =============================
# Helper functions for reporting
# =============================
def summarize_model(model, name="model"):
    """Print key architecture stats required in ARTI 560."""
    print("\n" + "="*70)
    print(f"SUMMARY: {name}")
    print("="*70)
    model.summary()

    depth = len(model.layers)
    total_params = model.count_params()
    trainable_params = int(np.sum([tf.size(v).numpy() for v in model.trainable_variables]))
    non_trainable_params = int(np.sum([tf.size(v).numpy() for v in model.non_trainable_variables]))

    print("\n--- Architecture Stats ---")
    print("Depth (layers count):", depth)
    print("Total params        :", total_params)
    print("Trainable params    :", trainable_params)
    print("Non-trainable params:", non_trainable_params)
    return {
        "depth": depth,
        "total_params": total_params,
        "trainable_params": trainable_params,
        "non_trainable_params": non_trainable_params
    }

def train_and_evaluate(model, x_train, y_train, x_test, y_test,
                       epochs=3, batch_size=64, lr=1e-3, callbacks=None,
                       tag=""):
    """Compile, train, time it, then evaluate on test."""
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=lr),
        loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        metrics=["accuracy"]
    )

    t0 = time.time()
    history = model.fit(
        x_train, y_train,
        validation_split=0.1,
        epochs=epochs,
        batch_size=batch_size,
        callbacks=callbacks,
        verbose=1
    )
    t1 = time.time()

    test_loss, test_acc = model.evaluate(x_test, y_test, verbose=0)
    elapsed = t1 - t0

    print(f"\n[{tag}] Test accuracy: {test_acc:.4f} | Test loss: {test_loss:.4f} | Train time(s): {elapsed:.1f}")
    return history, test_loss, test_acc, elapsed

# Common callbacks 
callbacks = [
    keras.callbacks.EarlyStopping(monitor="val_accuracy", patience=3, restore_best_weights=True),
    keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=1),
]

# ==========================================================
# A) ResNet50V2 (your baseline) - Frozen + Fine-tune
# ==========================================================
from tensorflow.keras.applications import ResNet50V2
from tensorflow.keras.applications.resnet_v2 import preprocess_input as resnet_preprocess

resnet_base = ResNet50V2(include_top=False, weights="imagenet", input_shape=(224, 224, 3))
resnet_base.trainable = False

resnet_model = keras.Sequential([
    layers.Input(shape=(32, 32, 3)),
    data_augmentation,
    layers.Resizing(224, 224, interpolation="bilinear"),
    layers.Lambda(resnet_preprocess),
    resnet_base,
    layers.GlobalAveragePooling2D(),
    layers.Dense(10)  # logits
], name="cifar10_resnet50v2")

resnet_stats = summarize_model(resnet_model, "ResNet50V2 (initial)")

# Frozen training
hist_r_frozen, r_loss_frozen, r_acc_frozen, r_time_frozen = train_and_evaluate(
    resnet_model, x_train, y_train, x_test, y_test,
    epochs=3, batch_size=64, lr=1e-3, callbacks=callbacks,
    tag="ResNet Frozen"
)

# Fine-tune last layers
resnet_base.trainable = True
for layer in resnet_base.layers[:-30]:
    layer.trainable = False

print("\nResNet backbone trainable layers:",
      sum(l.trainable for l in resnet_base.layers), "/", len(resnet_base.layers))

# Re-summarize after unfreezing
_ = summarize_model(resnet_model, "ResNet50V2 (fine-tuning setup)")

hist_r_ft, r_loss_ft, r_acc_ft, r_time_ft = train_and_evaluate(
    resnet_model, x_train, y_train, x_test, y_test,
    epochs=3, batch_size=64, lr=1e-5, callbacks=None,
    tag="ResNet Fine-tuned"
)

# ==========================================================
# B) Another pretrained model: MobileNetV2 - Frozen + Fine-tune
# ==========================================================
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input as mobilenet_preprocess

# ---- If you want EfficientNetB0 instead, uncomment below and comment MobileNet lines:
# from tensorflow.keras.applications import EfficientNetB0
# from tensorflow.keras.applications.efficientnet import preprocess_input as mobilenet_preprocess
# base2 = EfficientNetB0(include_top=False, weights="imagenet", input_shape=(224,224,3))

base2 = MobileNetV2(include_top=False, weights="imagenet", input_shape=(224, 224, 3))
base2.trainable = False

model2 = keras.Sequential([
    layers.Input(shape=(32, 32, 3)),
    data_augmentation,
    layers.Resizing(224, 224),
    layers.Lambda(mobilenet_preprocess),
    base2,
    layers.GlobalAveragePooling2D(),
    layers.Dropout(0.2),
    layers.Dense(10)  # logits
], name="cifar10_mobilenetv2")

m2_stats = summarize_model(model2, "MobileNetV2 (initial)")

# Frozen training
hist_m_frozen, m_loss_frozen, m_acc_frozen, m_time_frozen = train_and_evaluate(
    model2, x_train, y_train, x_test, y_test,
    epochs=3, batch_size=64, lr=1e-3, callbacks=callbacks,
    tag="MobileNetV2 Frozen"
)

# Fine-tune last layers
base2.trainable = True
for layer in base2.layers[:-30]:
    layer.trainable = False

print("\nMobileNetV2 backbone trainable layers:",
      sum(l.trainable for l in base2.layers), "/", len(base2.layers))

_ = summarize_model(model2, "MobileNetV2 (fine-tuning setup)")

hist_m_ft, m_loss_ft, m_acc_ft, m_time_ft = train_and_evaluate(
    model2, x_train, y_train, x_test, y_test,
    epochs=3, batch_size=64, lr=1e-5, callbacks=None,
    tag="MobileNetV2 Fine-tuned"
)

# ==========================================================
# C) Custom CNN (from scratch) - compare baseline
# ==========================================================
custom_cnn = keras.Sequential([
    layers.Input(shape=(32, 32, 3)),
    data_augmentation,

    layers.Conv2D(32, 3, padding="same", activation="relu"),
    layers.MaxPooling2D(),

    layers.Conv2D(64, 3, padding="same", activation="relu"),
    layers.MaxPooling2D(),

    layers.Conv2D(128, 3, padding="same", activation="relu"),
    layers.MaxPooling2D(),

    layers.Flatten(),
    layers.Dense(128, activation="relu"),
    layers.Dropout(0.3),
    layers.Dense(10)  # logits
], name="custom_cnn_cifar10")

cnn_stats = summarize_model(custom_cnn, "Custom CNN (from scratch)")

hist_cnn, c_loss, c_acc, c_time = train_and_evaluate(
    custom_cnn, x_train/255.0, y_train, x_test/255.0, y_test,  # normalize manually for scratch CNN
    epochs=3, batch_size=64, lr=1e-3, callbacks=callbacks,
    tag="Custom CNN"
)

# ==========================================================
# 4) Final comparison (Answers-ready)
# ==========================================================
results = [
    ("Cusom CNN", c_acc, c_time),
    ("ResNet50V2 Frozen", r_acc_frozen, r_time_frozen),
    ("ResNet50V2 Fine-tuned", r_acc_ft, r_time_ft),
    ("MobileNetV2 Frozen", m_acc_frozen, m_time_frozen),
    ("MobileNetV2 Fine-tuned", m_acc_ft, m_time_ft),
]

print("\n" + "="*70)
print("FINAL RESULTS (Accuracy & Training Time)")
print("="*70)
for name, acc, tsec in results:
    print(f"{name:22s} | test_acc={acc:.4f} | train_time(s)={tsec:.1f}")

best_acc = max(results, key=lambda x: x[1])
fastest  = min(results, key=lambda x: x[2])

print("\nHighest accuracy:", best_acc[0], "->", f"{best_acc[1]:.4f}")
print("Fastest training :", fastest[0],  "->", f"{fastest[2]:.1f}s")

print("\n(Use model.summary outputs above to discuss depth/params/trainable layers in your report.)")


Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
[1m170498071/170498071[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 0us/step


  d = cPickle.load(f, encoding="bytes")


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50v2_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94668760/94668760[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 0us/step


SUMMARY: ResNet50V2 (initial)



--- Architecture Stats ---
Depth (layers count): 6
Total params        : 23585290
Trainable params    : 20490
Non-trainable params: 23564806
Epoch 1/3
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m875s[0m 1s/step - accuracy: 0.7510 - loss: 0.7140 - val_accuracy: 0.8668 - val_loss: 0.3828 - learning_rate: 0.0010
Epoch 2/3
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2407s[0m 3s/step - accuracy: 0.8032 - loss: 0.5580 - val_accuracy: 0.8816 - val_loss: 0.3407 - learning_rate: 0.0010
Epoch 3/3
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m956s[0m 1s/step - accuracy: 0.8185 - loss: 0.5240 - val_accuracy: 0.8858 - val_loss: 0.3312 - learning_rate: 0.0010

[ResNet Frozen] Test accuracy: 0.8793 | Test loss: 0.3476 | Train time(s): 4238.1

ResNet backbone trainable layers: 30 / 190

SUMMARY: ResNet50V2 (fine-tuning setup)



--- Architecture Stats ---
Depth (layers count): 6
Total params        : 23585290
Trainable params    : 14464010
Non-trainable params: 9121286
Epoch 1/3
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1423s[0m 2s/step - accuracy: 0.8231 - loss: 0.5119 - val_accuracy: 0.9046 - val_loss: 0.2744
Epoch 2/3
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1364s[0m 2s/step - accuracy: 0.8688 - loss: 0.3796 - val_accuracy: 0.9184 - val_loss: 0.2388
Epoch 3/3
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1321s[0m 2s/step - accuracy: 0.8924 - loss: 0.3135 - val_accuracy: 0.9236 - val_loss: 0.2200

[ResNet Fine-tuned] Test accuracy: 0.9192 | Test loss: 0.2329 | Train time(s): 4110.5
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step

SUMMARY: MobileNetV2 (initial)



--- Architecture Stats ---
Depth (layers count): 7
Total params        : 2270794
Trainable params    : 12810
Non-trainable params: 2257992
Epoch 1/3
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m478s[0m 672ms/step - accuracy: 0.6470 - loss: 1.0115 - val_accuracy: 0.8038 - val_loss: 0.5723 - learning_rate: 0.0010
Epoch 2/3
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m473s[0m 671ms/step - accuracy: 0.7223 - loss: 0.7937 - val_accuracy: 0.8190 - val_loss: 0.5147 - learning_rate: 5.0000e-04
Epoch 3/3
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m469s[0m 666ms/step - accuracy: 0.7313 - loss: 0.7635 - val_accuracy: 0.8266 - val_loss: 0.4981 - learning_rate: 2.5000e-04

[MobileNetV2 Frozen] Test accuracy: 0.7972 | Test loss: 0.5902 | Train time(s): 1422.9

MobileNetV2 backbone trainable layers: 30 / 154

SUMMARY: MobileNetV2 (fine-tuning setup)



--- Architecture Stats ---
Depth (layers count): 7
Total params        : 2270794
Trainable params    : 1539210
Non-trainable params: 731592
Epoch 1/3
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m570s[0m 794ms/step - accuracy: 0.6920 - loss: 0.8951 - val_accuracy: 0.8252 - val_loss: 0.5164
Epoch 2/3
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m580s[0m 824ms/step - accuracy: 0.7594 - loss: 0.6977 - val_accuracy: 0.8376 - val_loss: 0.4524
Epoch 3/3
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m574s[0m 815ms/step - accuracy: 0.7840 - loss: 0.6218 - val_accuracy: 0.8520 - val_loss: 0.4155

[MobileNetV2 Fine-tuned] Test accuracy: 0.8489 | Test loss: 0.4348 | Train time(s): 1725.7

SUMMARY: Custom CNN (from scratch)



--- Architecture Stats ---
Depth (layers count): 11
Total params        : 356810
Trainable params    : 356810
Non-trainable params: 8
Epoch 1/3
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 17ms/step - accuracy: 0.3942 - loss: 1.6585 - val_accuracy: 0.5244 - val_loss: 1.3259 - learning_rate: 0.0010
Epoch 2/3
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 18ms/step - accuracy: 0.5298 - loss: 1.3101 - val_accuracy: 0.6040 - val_loss: 1.1029 - learning_rate: 5.0000e-04
Epoch 3/3
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 19ms/step - accuracy: 0.5742 - loss: 1.1925 - val_accuracy: 0.6052 - val_loss: 1.1145 - learning_rate: 2.5000e-04

[Custom CNN] Test accuracy: 0.5230 | Test loss: 1.3357 | Train time(s): 41.4

FINAL RESULTS (Accuracy & Training Time)
Custom CNN             | test_acc=0.5230 | train_time(s)=41.4
ResNet50V2 Frozen      | test_acc=0.8793 | train_time(s)=4238.1
ResNet50V2 Fine-tuned  | test_acc=0.9192 | train_t