# Chest X-Ray Pneumonia — Transfer Learning Workflow (Colab Ready)

This notebook follows the course tutorials and project criteria:
- Uses transfer learning (EfficientNetB0) with proper preprocessing
- Adds data augmentation and two-stage training (head training + fine-tuning)
- Includes evaluation metrics beyond accuracy (precision, recall, ROC–AUC, confusion matrix)
- Demonstrates reproducibility and clear inference steps

If KaggleHub auth fails, a Kaggle CLI fallback is provided. Keep credentials out of source control.

In [None]:
# Setup: install packages (Colab) and configure KaggleHub
!pip -q install kagglehub tensorflow pillow numpy scikit-learn matplotlib --upgrade

import os
from getpass import getpass

# Securely paste your Kaggle API token for KaggleHub
os.environ["KAGGLE_API_TOKEN"] = getpass("Paste Kaggle API token: ")
print("KAGGLE_API_TOKEN set:", "***" if os.environ.get("KAGGLE_API_TOKEN") else "MISSING")

# Reproducibility
import numpy as np
import tensorflow as tf
np.random.seed(42)
tf.random.set_seed(42)

In [None]:
# Download dataset via KaggleHub
import kagglehub

path = kagglehub.dataset_download("paultimothymooney/chest-xray-pneumonia")
print("Path to dataset files:", path)

# Resolve chest_xray root with train/val/test
from pathlib import Path
base = Path(path)
candidates = [p for p in base.rglob("chest_xray") if p.is_dir()]
chest_xray_root = candidates[0] if candidates else base
print("Using chest_xray_root:", chest_xray_root)
train_dir = chest_xray_root / "train"
val_dir = chest_xray_root / "val"
test_dir = chest_xray_root / "test"
print("train:", train_dir)
print("val:", val_dir)
print("test:", test_dir)

In [None]:
# Fallback: Kaggle CLI (uncomment if KaggleHub fails)
# import json, os, pathlib
# pathlib.Path("/root/.kaggle").mkdir(parents=True, exist_ok=True)
# with open("/root/.kaggle/kaggle.json", "w") as f:
#     json.dump({"username": "<username>", "key": "<key>"}, f)
# os.chmod("/root/.kaggle/kaggle.json", 0o600)
# !pip -q install kaggle
# !kaggle datasets download -d paultimothymooney/chest-xray-pneumonia -p data
# !unzip -q data/chest-xray-pneumonia.zip -d data
# %ls -la data

# Build Datasets with Preprocessing and Augmentation

We create `tf.data` pipelines for train/val/test. For EfficientNetB0, we use its `preprocess_input` function. We also apply light data augmentation to improve generalization.

In [None]:
# Create tf.data pipelines with EfficientNet preprocessing
import tensorflow as tf
from tensorflow.keras.applications.efficientnet import preprocess_input
from tensorflow.keras import layers

IMG_SIZE = (224, 224)
BATCH_SIZE = 32

train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    str(train_dir), image_size=IMG_SIZE, batch_size=BATCH_SIZE, label_mode='binary')
val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    str(val_dir), image_size=IMG_SIZE, batch_size=BATCH_SIZE, label_mode='binary')
test_ds = tf.keras.preprocessing.image_dataset_from_directory(
    str(test_dir), image_size=IMG_SIZE, batch_size=BATCH_SIZE, label_mode='binary')

# Augmentation layer
data_augmentation = tf.keras.Sequential([
    layers.RandomFlip('horizontal'),
    layers.RandomRotation(0.05),
    layers.RandomZoom(0.1)
], name="augmentation")

# Apply preprocess_input

def prep(x, y):
    x = tf.cast(x, tf.float32)
    x = preprocess_input(x)  # EfficientNet-specific preprocessing
    return x, y

train_ds = train_ds.map(prep).prefetch(tf.data.AUTOTUNE)
val_ds = val_ds.map(prep).prefetch(tf.data.AUTOTUNE)
test_ds = test_ds.map(prep).prefetch(tf.data.AUTOTUNE)

# Transfer Learning: Build EfficientNetB0 Model

We use EfficientNetB0 pre-trained on ImageNet. First we freeze the base to train only the custom head. Augmentation is included as a layer before the base model.

In [None]:
# Build model with augmentation + EfficientNetB0 base
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras import layers, models

base_model = EfficientNetB0(weights="imagenet", include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False

inputs = layers.Input(shape=(224, 224, 3))
x = data_augmentation(inputs)
x = base_model(x, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dropout(0.2)(x)
outputs = layers.Dense(1, activation="sigmoid")(x)
model = models.Model(inputs, outputs)

model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"]) 
model.summary()

# Stage 1 Training (Head Only)

Train the classification head while keeping the EfficientNetB0 base frozen. Monitor training and validation curves to ensure convergence.

In [None]:
# Train head
EPOCHS_HEAD = 5
history_head = model.fit(train_ds, validation_data=val_ds, epochs=EPOCHS_HEAD)

import matplotlib.pyplot as plt
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history_head.history['accuracy'], label='Train Acc')
plt.plot(history_head.history['val_accuracy'], label='Val Acc')
plt.legend(); plt.title('Accuracy (Head)')

plt.subplot(1, 2, 2)
plt.plot(history_head.history['loss'], label='Train Loss')
plt.plot(history_head.history['val_loss'], label='Val Loss')
plt.legend(); plt.title('Loss (Head)')
plt.show()

# Stage 2 Fine-Tuning (Unfreeze Top Layers)

Unfreeze the top layers of EfficientNetB0 and continue training with a lower learning rate to improve performance without overfitting.

In [None]:
# Unfreeze top layers and fine-tune
# Unfreeze last N layers of the base model
N = 50
for layer in base_model.layers[-N:]:
    layer.trainable = True

from tensorflow.keras.optimizers import Adam
model.compile(optimizer=Adam(1e-5), loss='binary_crossentropy', metrics=['accuracy'])

EPOCHS_FT = 5
history_ft = model.fit(train_ds, validation_data=val_ds, epochs=EPOCHS_FT)

import matplotlib.pyplot as plt
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history_ft.history['accuracy'], label='Train Acc (FT)')
plt.plot(history_ft.history['val_accuracy'], label='Val Acc (FT)')
plt.legend(); plt.title('Accuracy (Fine-tune)')

plt.subplot(1, 2, 2)
plt.plot(history_ft.history['loss'], label='Train Loss (FT)')
plt.plot(history_ft.history['val_loss'], label='Val Loss (FT)')
plt.legend(); plt.title('Loss (Fine-tune)')
plt.show()

# Evaluation: Accuracy, Precision, Recall, ROC–AUC, Confusion Matrix

We evaluate the fine-tuned model on the test set and compute additional metrics commonly used in medical imaging tasks.

In [None]:
# Evaluate and compute metrics
import numpy as np
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix

# Standard evaluation
test_loss, test_acc = model.evaluate(test_ds)
print({"test_loss": float(test_loss), "test_acc": float(test_acc)})

# Collect y_true and y_pred_proba
y_true = []
y_pred_proba = []
for x_batch, y_batch in test_ds:
    probs = model.predict(x_batch, verbose=0).ravel()
    y_pred_proba.extend(probs.tolist())
    y_true.extend(y_batch.numpy().ravel().tolist())

y_true = np.array(y_true).astype(int)
y_pred_proba = np.array(y_pred_proba)
y_pred = (y_pred_proba >= 0.5).astype(int)

prec = precision_score(y_true, y_pred)
rec = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)
auc = roc_auc_score(y_true, y_pred_proba)
cm = confusion_matrix(y_true, y_pred)

print({
    "precision": float(prec),
    "recall": float(rec),
    "f1": float(f1),
    "roc_auc": float(auc)
})
print("Confusion Matrix:\n", cm)

# Inference

We demonstrate inference on a single image. The same preprocessing pipeline is applied before prediction.

In [None]:
# Single-image inference example
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.efficientnet import preprocess_input
import numpy as np
from pathlib import Path

# Use one image from test_dir for demo
sample_candidates = list(Path(test_dir).rglob("*.jpeg")) + list(Path(test_dir).rglob("*.jpg")) + list(Path(test_dir).rglob("*.png"))
assert len(sample_candidates) > 0, "No sample images found in test_dir"
sample_image_path = str(sample_candidates[0])
print("Sample:", sample_image_path)

img = image.load_img(sample_image_path, target_size=(224, 224))
arr = image.img_to_array(img)
arr = preprocess_input(arr)  # EfficientNet preprocessing
arr = np.expand_dims(arr, axis=0)

proba = float(model.predict(arr)[0][0])
pred_label = "Pneumonia" if proba >= 0.5 else "Normal"
print({"label": pred_label, "score": proba})

# Save Model Artifact

We save the trained Keras model as an `.h5` file that can be loaded by the FastAPI service.

In [None]:
# Save model and (in Colab) download
model.save("pneumonia_model.h5")
print("Saved: pneumonia_model.h5")

try:
    from google.colab import files
    files.download("pneumonia_model.h5")
except Exception:
    pass

# Exploratory Data Analysis (EDA)

We inspect class distribution and visualize sample images to understand dataset characteristics and potential imbalance.

In [None]:
# Class distribution and sample visualization
import os
from pathlib import Path
import matplotlib.pyplot as plt

classes = []
counts = []
for cls in sorted([d.name for d in Path(train_dir).iterdir() if d.is_dir()]):
    classes.append(cls)
    counts.append(len(list((Path(train_dir)/cls).glob("*.*"))))

print({"classes": classes, "train_counts": counts})

# Show grid of sample images
def show_samples(root, cls, n=8):
    files = list((Path(root)/cls).glob("*.*"))[:n]
    cols = 4
    rows = (len(files) + cols - 1) // cols
    plt.figure(figsize=(12, 3*rows))
    for i, fp in enumerate(files):
        img = tf.keras.utils.load_img(fp)
        plt.subplot(rows, cols, i+1)
        plt.imshow(img, cmap='gray')
        plt.axis('off')
        plt.title(cls)
    plt.tight_layout()
    plt.show()

if classes:
    show_samples(train_dir, classes[0], n=8)
    if len(classes) > 1:
        show_samples(train_dir, classes[1], n=8)

In [None]:
# Check for missing values in the dataset
import pandas as pd

def check_missing_values(directory):
    missing_counts = {}
    for cls in sorted([d.name for d in Path(directory).iterdir() if d.is_dir()]):
        cls_dir = Path(directory) / cls
        for img_file in cls_dir.glob("*.*"):
            try:
                img = tf.keras.utils.load_img(img_file)
            except Exception as e:
                missing_counts[img_file.name] = str(e)
    return missing_counts

missing_values = check_missing_values(train_dir)
if missing_values:
    print("Missing or corrupted files detected:")
    for file, error in missing_values.items():
        print(f"{file}: {error}")
else:
    print("No missing or corrupted files detected.")

# Dataset Variants for Different Models

We build three dataset variants to match model preprocessing:
- Baseline CNN: normalize to [0,1]
- EfficientNetB0: `efficientnet.preprocess_input`
- ResNet50V2: `resnet_v2.preprocess_input`

In [None]:
# Build raw datasets and mapped variants
import tensorflow as tf
from tensorflow.keras.applications.efficientnet import preprocess_input as eff_prep
from tensorflow.keras.applications.resnet_v2 import preprocess_input as resnet_prep

IMG_SIZE = (224, 224)
BATCH_SIZE = 32

raw_train = tf.keras.preprocessing.image_dataset_from_directory(
    str(train_dir), image_size=IMG_SIZE, batch_size=BATCH_SIZE, label_mode='binary')
raw_val = tf.keras.preprocessing.image_dataset_from_directory(
    str(val_dir), image_size=IMG_SIZE, batch_size=BATCH_SIZE, label_mode='binary')
raw_test = tf.keras.preprocessing.image_dataset_from_directory(
    str(test_dir), image_size=IMG_SIZE, batch_size=BATCH_SIZE, label_mode='binary')

# Baseline normalization
baseline_norm = lambda x, y: (tf.cast(x, tf.float32)/255.0, y)

train_baseline = raw_train.map(baseline_norm).prefetch(tf.data.AUTOTUNE)
val_baseline = raw_val.map(baseline_norm).prefetch(tf.data.AUTOTUNE)
test_baseline = raw_test.map(baseline_norm).prefetch(tf.data.AUTOTUNE)

# EfficientNet preprocessing
train_eff = raw_train.map(lambda x,y: (eff_prep(tf.cast(x, tf.float32)), y)).prefetch(tf.data.AUTOTUNE)
val_eff = raw_val.map(lambda x,y: (eff_prep(tf.cast(x, tf.float32)), y)).prefetch(tf.data.AUTOTUNE)
test_eff = raw_test.map(lambda x,y: (eff_prep(tf.cast(x, tf.float32)), y)).prefetch(tf.data.AUTOTUNE)

# ResNet50V2 preprocessing
train_resnet = raw_train.map(lambda x,y: (resnet_prep(tf.cast(x, tf.float32)), y)).prefetch(tf.data.AUTOTUNE)
val_resnet = raw_val.map(lambda x,y: (resnet_prep(tf.cast(x, tf.float32)), y)).prefetch(tf.data.AUTOTUNE)
test_resnet = raw_test.map(lambda x,y: (resnet_prep(tf.cast(x, tf.float32)), y)).prefetch(tf.data.AUTOTUNE)


# Callbacks and Checkpointing

We define reusable callbacks for early stopping, learning rate reduction, and model checkpointing. Best models are stored under the `artifacts/` directory.

In [None]:
# Define callbacks helper
import os
os.makedirs('artifacts', exist_ok=True)

from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

def build_callbacks(name: str, monitor: str = 'val_accuracy'):
    ckpt_path = f'artifacts/best_{name}.h5'
    callbacks = [
        EarlyStopping(monitor=monitor, patience=3, restore_best_weights=True),
        ReduceLROnPlateau(monitor=monitor, factor=0.5, patience=2, min_lr=1e-6),
        ModelCheckpoint(ckpt_path, monitor=monitor, save_best_only=True, save_weights_only=False)
    ]
    return callbacks, ckpt_path

# EfficientNetB0 Training with Callbacks

We compile the model with accuracy and AUC, then train with callbacks to capture the best checkpoint.

In [None]:
# Compile with AUC and train with callbacks
from tensorflow.keras.metrics import AUC
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy', AUC(name='auc')])

callbacks, ckpt_eff_head = build_callbacks('efficientnet_head', monitor='val_auc')
EPOCHS_HEAD = 5
history_head = model.fit(train_eff, validation_data=val_eff, epochs=EPOCHS_HEAD, callbacks=callbacks)

# Fine-tune
from tensorflow.keras.optimizers import Adam
for layer in base_model.layers[-50:]:
    layer.trainable = True
model.compile(optimizer=Adam(1e-5), loss='binary_crossentropy', metrics=['accuracy', AUC(name='auc')])

callbacks_ft, ckpt_eff_ft = build_callbacks('efficientnet_ft', monitor='val_auc')
EPOCHS_FT = 5
history_ft = model.fit(train_eff, validation_data=val_eff, epochs=EPOCHS_FT, callbacks=callbacks_ft)

print('EfficientNet checkpoints:', ckpt_eff_head, ckpt_eff_ft)

# Baseline CNN Model

We train a simple CNN as a baseline, using [0,1] normalization. Callbacks help capture the best performing checkpoint.

In [None]:
# Build, train, evaluate baseline CNN
from tensorflow.keras import layers, models
from tensorflow.keras.metrics import AUC

baseline = models.Sequential([
    layers.Input(shape=(224, 224, 3)),
    layers.Conv2D(32, 3, activation='relu'), layers.MaxPooling2D(),
    layers.Conv2D(64, 3, activation='relu'), layers.MaxPooling2D(),
    layers.Conv2D(128, 3, activation='relu'), layers.MaxPooling2D(),
    layers.Flatten(), layers.Dense(128, activation='relu'), layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')
])
baseline.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy', AUC(name='auc')])

callbacks_bl, ckpt_bl = build_callbacks('baseline_cnn', monitor='val_auc')
hist_bl = baseline.fit(train_baseline, validation_data=val_baseline, epochs=5, callbacks=callbacks_bl)
loss_bl, acc_bl, auc_bl = baseline.evaluate(test_baseline)
print({'baseline': {'loss': float(loss_bl), 'acc': float(acc_bl), 'auc': float(auc_bl)}, 'ckpt': ckpt_bl})

# ResNet50V2 Transfer Learning

We build a ResNet50V2 transfer model with a custom head, trained on the ResNet-preprocessed datasets. We include callbacks and a short fine-tuning phase.

In [None]:
# Build, train, evaluate ResNet50V2
from tensorflow.keras.applications import ResNet50V2
from tensorflow.keras import layers, models
from tensorflow.keras.metrics import AUC
from tensorflow.keras.optimizers import Adam

resnet_base = ResNet50V2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
resnet_base.trainable = False

inputs = layers.Input(shape=(224, 224, 3))
x = data_augmentation(inputs)
x = resnet_base(x, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dropout(0.2)(x)
outputs = layers.Dense(1, activation='sigmoid')(x)
resnet_model = models.Model(inputs, outputs)

resnet_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy', AUC(name='auc')])
cb_resnet_head, ckpt_resnet_head = build_callbacks('resnet_head', monitor='val_auc')
hist_resnet_head = resnet_model.fit(train_resnet, validation_data=val_resnet, epochs=5, callbacks=cb_resnet_head)

# Fine-tune top layers
for layer in resnet_base.layers[-50:]:
    layer.trainable = True
resnet_model.compile(optimizer=Adam(1e-5), loss='binary_crossentropy', metrics=['accuracy', AUC(name='auc')])
cb_resnet_ft, ckpt_resnet_ft = build_callbacks('resnet_ft', monitor='val_auc')
hist_resnet_ft = resnet_model.fit(train_resnet, validation_data=val_resnet, epochs=5, callbacks=cb_resnet_ft)

loss_r, acc_r, auc_r = resnet_model.evaluate(test_resnet)
print({'resnet': {'loss': float(loss_r), 'acc': float(acc_r), 'auc': float(auc_r)}, 'ckpts': [ckpt_resnet_head, ckpt_resnet_ft]})

# Save and Reload Best Models & Histories

We save training histories to JSON and demonstrate loading the best checkpoints for inference or further evaluation.

In [None]:
# Save histories to JSON and reload checkpoints
import json
from tensorflow.keras.models import load_model

# Save histories
with open('artifacts/history_efficientnet_head.json', 'w') as f: json.dump(history_head.history, f)
with open('artifacts/history_efficientnet_ft.json', 'w') as f: json.dump(history_ft.history, f)
with open('artifacts/history_baseline.json', 'w') as f: json.dump(hist_bl.history, f)
with open('artifacts/history_resnet_head.json', 'w') as f: json.dump(hist_resnet_head.history, f)
with open('artifacts/history_resnet_ft.json', 'w') as f: json.dump(hist_resnet_ft.history, f)

# Reload best models
eff_best = load_model('artifacts/best_efficientnet_ft.h5') if os.path.exists('artifacts/best_efficientnet_ft.h5') else None
bl_best = load_model('artifacts/best_baseline_cnn.h5') if os.path.exists('artifacts/best_baseline_cnn.h5') else None
res_best = load_model('artifacts/best_resnet_ft.h5') if os.path.exists('artifacts/best_resnet_ft.h5') else None

print({'loaded': {'efficientnet_ft': eff_best is not None, 'baseline': bl_best is not None, 'resnet_ft': res_best is not None}})

# Results Comparison Table

We aggregate the key metrics across baseline CNN, EfficientNetB0 (fine-tuned), and ResNet50V2 (fine-tuned) for comparison.

In [None]:
# Build comparison table
import pandas as pd

# EfficientNet metrics (reuse last eval with test_eff)
loss_eff, acc_eff, auc_eff = model.evaluate(test_eff)

results = pd.DataFrame([
    {"model": "Baseline CNN", "acc": acc_bl, "auc": auc_bl, "loss": loss_bl},
    {"model": "EfficientNetB0 (FT)", "acc": acc_eff, "auc": auc_eff, "loss": loss_eff},
    {"model": "ResNet50V2 (FT)", "acc": acc_r, "auc": auc_r, "loss": loss_r},
])
results = results.sort_values(by=["auc", "acc"], ascending=False)
print(results)
results.to_csv('artifacts/results_summary.csv', index=False)