# Cats vs Dogs — Vision AI in 5 Days 🐱🐶

**Author:** Suraj Jaiswal  
**Bootcamp:** Build an AI That Sees  
**Dataset:** `cats_vs_dogs` via TensorFlow Datasets (auto-download in Colab)  

This notebook walks through:
1. Setup & Data Loading
2. EDA & Preprocessing (resize, normalize, augment)
3. Baseline CNN (from scratch)
4. Evaluation (accuracy, precision, recall, F1, confusion matrix, ROC)
5. Transfer Learning (MobileNetV2)
6. Export artifacts (models, plots, sample predictions)

> Tip: In Colab, go to **Runtime → Change runtime type → GPU** before training.

In [ ]:
# 0) Environment check (Colab + GPU)
import sys, platform
print('Python:', sys.version)
print('Platform:', platform.platform())
try:
    import tensorflow as tf
    print('TensorFlow:', tf.__version__)
    print('GPU Available:', tf.config.list_physical_devices('GPU'))
except Exception as e:
    print('TensorFlow not installed in this environment. It will be available in Colab.')

In [ ]:
# 1) Imports
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc, precision_recall_fscore_support
import itertools, os, pathlib, time
print('TF:', tf.__version__)

In [ ]:
# 2) Load dataset: cats_vs_dogs (auto downloads)
DATA_DIR = './data_tfds'
ds, info = tfds.load('cats_vs_dogs', split=['train[:80%]', 'train[80%:90%]', 'train[90%:]'],
                     with_info=True, as_supervised=True, data_dir=DATA_DIR)
train_ds, val_ds, test_ds = ds
num_classes = info.features['label'].num_classes
print('Classes:', info.features['label'].names)
print('Train/Val/Test sizes (approx):', info.splits['train'].num_examples*0.8,
      info.splits['train'].num_examples*0.1, info.splits['train'].num_examples*0.1)

In [ ]:
# 3) Preprocessing & augmentation
IMG_SIZE = (150, 150)
BATCH_SIZE = 64

def preprocess(image, label):
    image = tf.image.resize(image, IMG_SIZE)
    image = tf.cast(image, tf.float32) / 255.0
    return image, label

aug = tf.keras.Sequential([
    tf.keras.layers.RandomFlip('horizontal'),
    tf.keras.layers.RandomRotation(0.1),
    tf.keras.layers.RandomZoom(0.1)
])

train_ds_p = (train_ds
              .map(preprocess, num_parallel_calls=tf.data.AUTOTUNE)
              .shuffle(2048)
              .batch(BATCH_SIZE)
              .prefetch(tf.data.AUTOTUNE))
val_ds_p = (val_ds.map(preprocess, num_parallel_calls=tf.data.AUTOTUNE)
            .batch(BATCH_SIZE)
            .prefetch(tf.data.AUTOTUNE))
test_ds_p = (test_ds.map(preprocess, num_parallel_calls=tf.data.AUTOTUNE)
             .batch(BATCH_SIZE)
             .prefetch(tf.data.AUTOTUNE))

# visualize a few samples
for images, labels in train_ds_p.take(1):
    plt.figure(figsize=(6,6))
    for i in range(9):
        ax = plt.subplot(3,3,i+1)
        plt.imshow(images[i].numpy())
        plt.title(('Cat','Dog')[int(labels[i].numpy())])
        plt.axis('off')
    plt.show()

In [ ]:
# 4) Baseline CNN model
def build_baseline(input_shape=(150,150,3)):
    m = tf.keras.Sequential([
        tf.keras.layers.Input(shape=input_shape),
        aug,
        tf.keras.layers.Conv2D(32, 3, activation='relu'),
        tf.keras.layers.MaxPooling2D(),
        tf.keras.layers.Conv2D(64, 3, activation='relu'),
        tf.keras.layers.MaxPooling2D(),
        tf.keras.layers.Conv2D(128, 3, activation='relu'),
        tf.keras.layers.MaxPooling2D(),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dropout(0.4),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])
    m.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return m

baseline = build_baseline()
baseline.summary()

In [ ]:
# 5) Train baseline CNN
EPOCHS = 10
cb = [
    tf.keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True),
    tf.keras.callbacks.ModelCheckpoint('baseline_cnn.h5', save_best_only=True)
]
hist = baseline.fit(train_ds_p, validation_data=val_ds_p, epochs=EPOCHS, callbacks=cb)

# Plot training curves (separate plots, no specific colors)
plt.figure()
plt.plot(hist.history['loss'], label='train_loss')
plt.plot(hist.history['val_loss'], label='val_loss')
plt.title('Baseline CNN Loss')
plt.xlabel('Epoch'); plt.ylabel('Loss'); plt.legend(); plt.show()

plt.figure()
plt.plot(hist.history['accuracy'], label='train_acc')
plt.plot(hist.history['val_accuracy'], label='val_acc')
plt.title('Baseline CNN Accuracy')
plt.xlabel('Epoch'); plt.ylabel('Accuracy'); plt.legend(); plt.show()

In [ ]:
# 6) Evaluate baseline CNN
test_images, test_labels = [], []
for batch_images, batch_labels in test_ds_p:
    test_images.append(batch_images.numpy())
    test_labels.append(batch_labels.numpy())
test_images = np.concatenate(test_images, axis=0)
test_labels = np.concatenate(test_labels, axis=0)

pred_probs = baseline.predict(test_images, batch_size=64)
preds = (pred_probs.ravel() >= 0.5).astype(int)

print(classification_report(test_labels, preds, target_names=['Cat','Dog']))
cm = confusion_matrix(test_labels, preds)

# Confusion matrix plot
plt.figure()
plt.imshow(cm, interpolation='nearest')
plt.title('Confusion Matrix - Baseline CNN')
plt.colorbar()
tick_marks = np.arange(2)
plt.xticks(tick_marks, ['Cat','Dog'], rotation=45)
plt.yticks(tick_marks, ['Cat','Dog'])
thresh = cm.max() / 2.
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
    plt.text(j, i, format(cm[i, j], 'd'),
             horizontalalignment='center',
             color='white' if cm[i, j] > thresh else 'black')
plt.ylabel('True label'); plt.xlabel('Predicted label'); plt.tight_layout(); plt.show()

# ROC curve
fpr, tpr, _ = roc_curve(test_labels, pred_probs.ravel())
roc_auc = auc(fpr, tpr)
plt.figure()
plt.plot(fpr, tpr, label=f'AUC = {roc_auc:.3f}')
plt.plot([0,1],[0,1], linestyle='--')
plt.title('ROC Curve - Baseline CNN')
plt.xlabel('False Positive Rate'); plt.ylabel('True Positive Rate'); plt.legend(); plt.show()

In [ ]:
# 7) Transfer Learning: MobileNetV2
IMG_SIZE_TL = (160, 160)
def preprocess_tl(image, label):
    image = tf.image.resize(image, IMG_SIZE_TL)
    image = tf.keras.applications.mobilenet_v2.preprocess_input(tf.cast(image, tf.float32))
    return image, label

train_tl = (train_ds.map(preprocess_tl, num_parallel_calls=tf.data.AUTOTUNE)
            .shuffle(2048).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE))
val_tl = (val_ds.map(preprocess_tl, num_parallel_calls=tf.data.AUTOTUNE)
          .batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE))
test_tl = (test_ds.map(preprocess_tl, num_parallel_calls=tf.data.AUTOTUNE)
           .batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE))

base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SIZE_TL+(3,), include_top=False, weights='imagenet')
base_model.trainable = False

inputs = tf.keras.Input(shape=IMG_SIZE_TL+(3,))
x = tf.keras.layers.RandomFlip('horizontal')(inputs)
x = tf.keras.layers.RandomRotation(0.1)(x)
x = base_model(x, training=False)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dropout(0.2)(x)
outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x)
tl_model = tf.keras.Model(inputs, outputs)
tl_model.compile(optimizer=tf.keras.optimizers.Adam(1e-3), loss='binary_crossentropy', metrics=['accuracy'])
tl_model.summary()

In [ ]:
# 8) Train transfer model (frozen base), then fine-tune
cb_tl = [
    tf.keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True),
    tf.keras.callbacks.ModelCheckpoint('transfer_mobilenetv2.h5', save_best_only=True)
]
hist_tl = tl_model.fit(train_tl, validation_data=val_tl, epochs=8, callbacks=cb_tl)

plt.figure()
plt.plot(hist_tl.history['loss'], label='train_loss')
plt.plot(hist_tl.history['val_loss'], label='val_loss')
plt.title('Transfer Learning Loss (Frozen)')
plt.xlabel('Epoch'); plt.ylabel('Loss'); plt.legend(); plt.show()

plt.figure()
plt.plot(hist_tl.history['accuracy'], label='train_acc')
plt.plot(hist_tl.history['val_accuracy'], label='val_acc')
plt.title('Transfer Learning Accuracy (Frozen)')
plt.xlabel('Epoch'); plt.ylabel('Accuracy'); plt.legend(); plt.show()

# Fine-tune top layers
base_model.trainable = True
for layer in base_model.layers[:-30]:
    layer.trainable = False
tl_model.compile(optimizer=tf.keras.optimizers.Adam(1e-4), loss='binary_crossentropy', metrics=['accuracy'])
hist_ft = tl_model.fit(train_tl, validation_data=val_tl, epochs=5, callbacks=cb_tl)

plt.figure()
plt.plot(hist_ft.history['loss'], label='train_loss')
plt.plot(hist_ft.history['val_loss'], label='val_loss')
plt.title('Fine-tuning Loss')
plt.xlabel('Epoch'); plt.ylabel('Loss'); plt.legend(); plt.show()

plt.figure()
plt.plot(hist_ft.history['accuracy'], label='train_acc')
plt.plot(hist_ft.history['val_accuracy'], label='val_acc')
plt.title('Fine-tuning Accuracy')
plt.xlabel('Epoch'); plt.ylabel('Accuracy'); plt.legend(); plt.show()

In [ ]:
# 9) Evaluate transfer model
y_true, y_prob = [], []
for batch_images, batch_labels in test_tl:
    p = tl_model.predict(batch_images, verbose=0)
    y_prob.append(p.ravel())
    y_true.append(batch_labels.numpy())
y_prob = np.concatenate(y_prob)
y_true = np.concatenate(y_true)
y_pred = (y_prob >= 0.5).astype(int)

print(classification_report(y_true, y_pred, target_names=['Cat','Dog']))
cm = confusion_matrix(y_true, y_pred)

plt.figure()
plt.imshow(cm, interpolation='nearest')
plt.title('Confusion Matrix - Transfer Learning')
plt.colorbar()
tick_marks = np.arange(2)
plt.xticks(tick_marks, ['Cat','Dog'], rotation=45)
plt.yticks(tick_marks, ['Cat','Dog'])
thresh = cm.max() / 2.
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
    plt.text(j, i, format(cm[i, j], 'd'),
             horizontalalignment='center',
             color='white' if cm[i, j] > thresh else 'black')
plt.ylabel('True label'); plt.xlabel('Predicted label'); plt.tight_layout(); plt.show()

fpr, tpr, _ = roc_curve(y_true, y_prob)
roc_auc = auc(fpr, tpr)
plt.figure()
plt.plot(fpr, tpr, label=f'AUC = {roc_auc:.3f}')
plt.plot([0,1],[0,1], linestyle='--')
plt.title('ROC Curve - Transfer Learning')
plt.xlabel('False Positive Rate'); plt.ylabel('True Positive Rate'); plt.legend(); plt.show()

# Save model
tl_model.save('transfer_mobilenetv2_final.h5')

In [ ]:
# 10) Sample predictions & export helper
SAMPLES_DIR = 'sample_predictions'
os.makedirs(SAMPLES_DIR, exist_ok=True)

def save_sample_preds(model, dataset, n=12):
    i = 0
    for imgs, labs in dataset.unbatch().take(n):
        img = tf.image.resize(imgs, (IMG_SIZE_TL))
        arr = tf.keras.applications.mobilenet_v2.preprocess_input(tf.cast(img, tf.float32))
        prob = model.predict(tf.expand_dims(arr,0), verbose=0).ravel()[0]
        pred = 'Dog' if prob>=0.5 else 'Cat'
        true = 'Dog' if int(labs.numpy())==1 else 'Cat'
        plt.figure()
        plt.imshow(tf.cast(imgs, tf.uint8).numpy())
        plt.title(f'Pred: {pred} ({prob:.2f}) | True: {true}')
        plt.axis('off')
        plt.savefig(os.path.join(SAMPLES_DIR, f'sample_{i:02d}.png'))
        plt.close()
        i += 1
    print(f'Saved {i} sample prediction images to {SAMPLES_DIR}/')

save_sample_preds(tl_model, test_ds)

## Done!
- Models saved: `baseline_cnn.h5`, `transfer_mobilenetv2.h5/transfer_mobilenetv2_final.h5`
- Plots displayed above
- Sample predictions saved under `sample_predictions/`

**Next:** Push artifacts and notebook to GitHub and add results to slides.