# Retinopathy: Download + Train CNN + Grad-CAM 

This notebook will:
- Configure Kaggle API from the local file `kaggle (1).json`
- Download the Indian Diabetic Retinopathy dataset from Kaggle
- Train a simple TensorFlow/Keras CNN and export metrics
- Generate Grad-CAM overlays and save them under `outputs_retino/gradcam/`

Run the cells from top to bottom. If you have an Apple Silicon Mac, the setup cell installs `tensorflow-macos` + `tensorflow-metal` automatically for acceleration.

In [9]:
# 1) Dependencies: install and import
import sys, subprocess, platform, importlib.util

def ensure_pkg(pkg_name, pip_name=None):
    if pip_name is None:
        pip_name = pkg_name
    if importlib.util.find_spec(pkg_name) is None:
        print(f"Installing {pip_name} ...")
        subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", pip_name])
    else:
        print(f"{pkg_name} already installed.")

# TensorFlow special handling for Apple Silicon
if importlib.util.find_spec("tensorflow") is None:
    if platform.system().lower() == "darwin" and platform.machine().lower() == "arm64":
        subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "tensorflow-macos", "tensorflow-metal"]) 
    else:
        subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "tensorflow"]) 

# Core deps
ensure_pkg("kaggle")
ensure_pkg("PIL", "Pillow")
ensure_pkg("cv2", "opencv-python")
ensure_pkg("matplotlib")
ensure_pkg("numpy")
ensure_pkg("pandas")
ensure_pkg("sklearn", "scikit-learn")

print("Dependency setup complete.")

kaggle already installed.
PIL already installed.
cv2 already installed.
matplotlib already installed.
numpy already installed.
pandas already installed.
sklearn already installed.
Dependency setup complete.


In [10]:
# 2) Configure Kaggle from local file and verify
import os
from pathlib import Path
import json, stat, subprocess

PROJECT_ROOT = Path.cwd()
LOCAL_KAGGLE_JSON = PROJECT_ROOT / "kaggle (1).json"
KAGGLE_DIR = Path.home() / ".kaggle"
KAGGLE_CRED = KAGGLE_DIR / "kaggle.json"

print("Project root:", PROJECT_ROOT)
print("Local kaggle json exists:", LOCAL_KAGGLE_JSON.exists())

assert LOCAL_KAGGLE_JSON.exists(), (
    f"Expected Kaggle json at {LOCAL_KAGGLE_JSON}. Place your API file there (username/key)."
)

KAGGLE_DIR.mkdir(parents=True, exist_ok=True)
KAGGLE_CRED.write_text(LOCAL_KAGGLE_JSON.read_text())
os.chmod(KAGGLE_CRED, stat.S_IRUSR | stat.S_IWUSR)  # 0o600

# Verify Kaggle CLI works
try:
    out = subprocess.check_output(["kaggle", "--version"]).decode("utf-8", errors="ignore").strip()
    print("Kaggle CLI:", out)
except Exception as e:
    raise RuntimeError("Kaggle CLI not available. Install with `pip install kaggle`.")

Project root: /Applications/CODES/AiHC
Local kaggle json exists: True
Kaggle CLI: Kaggle API 1.7.4.5


In [11]:
# 3) Download and extract dataset
import zipfile

DATASET_SLUG = 'aaryapatel98/indian-diabetic-retinopathy-image-dataset'
DATA_ROOT = PROJECT_ROOT / 'data'
ZIP_PATH = DATA_ROOT / (DATASET_SLUG.split('/')[-1] + '.zip')
EXTRACT_DIR = DATA_ROOT / 'retinopathy'

DATA_ROOT.mkdir(parents=True, exist_ok=True)

if not ZIP_PATH.exists():
    print(f"Downloading {DATASET_SLUG} to {ZIP_PATH} ...")
    subprocess.check_call([
        'kaggle', 'datasets', 'download', '-d', DATASET_SLUG, '-p', str(DATA_ROOT), '-o'
    ])
else:
    print("Zip already present:", ZIP_PATH)

print(f"Extracting to {EXTRACT_DIR} ...")
EXTRACT_DIR.mkdir(parents=True, exist_ok=True)
with zipfile.ZipFile(ZIP_PATH, 'r') as zf:
    zf.extractall(EXTRACT_DIR)

# Flatten single nested directory if needed
entries = list(EXTRACT_DIR.iterdir())
if len(entries) == 1 and entries[0].is_dir():
    nested = entries[0]
    for p in nested.iterdir():
        p.rename(EXTRACT_DIR / p.name)
    nested.rmdir()

print("Dataset ready at:", EXTRACT_DIR)
print("Some items:", [p.name for p in list(EXTRACT_DIR.iterdir())[:10]])

Zip already present: /Applications/CODES/AiHC/data/indian-diabetic-retinopathy-image-dataset.zip
Extracting to /Applications/CODES/AiHC/data/retinopathy ...
Dataset ready at: /Applications/CODES/AiHC/data/retinopathy
Some items: ['A.%20Segmentation', 'C.%20Localization', 'B.%20Disease%20Grading']


In [12]:
# 4) Parameters
IMG_SIZE = 224
BATCH_SIZE = 32
EPOCHS = 5
VAL_SPLIT = 0.2
LIMIT_PER_CLASS = 200  # reduce for quick smoke test; set None for full
SAMPLE_COUNT_GRADCAM = 5

OUT_DIR = PROJECT_ROOT / 'outputs_retino'
(OUT_DIR / 'gradcam').mkdir(parents=True, exist_ok=True)
print('Outputs will be saved to:', OUT_DIR)

Outputs will be saved to: /Applications/CODES/AiHC/outputs_retino


In [13]:
# 5) Data pipeline utilities (detect layout, build datasets)
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np

from typing import Optional, Tuple, Dict

DATA_DIR = EXTRACT_DIR


def detect_layout(data_dir: Path) -> Dict[str, Optional[Path]]:
    result = {"mode": "flat", "train_dir": None, "val_dir": None, "test_dir": None}
    if not data_dir.exists():
        return result
    candidates = {"train": None, "val": None, "validation": None, "test": None}
    for child in data_dir.iterdir():
        if child.is_dir():
            name = child.name.lower()
            if name in candidates:
                candidates[name] = child
    train_dir = candidates.get("train")
    val_dir = candidates.get("val") or candidates.get("validation")
    test_dir = candidates.get("test")
    if train_dir is not None and (val_dir is not None or test_dir is not None):
        result.update({"mode": "split", "train_dir": train_dir, "val_dir": val_dir, "test_dir": test_dir})
        return result
    result.update({"mode": "flat", "train_dir": data_dir, "val_dir": None, "test_dir": None})
    return result


def build_datasets(
    data_dir: Path,
    img_size: int,
    batch_size: int,
    val_split: float = 0.2,
    seed: int = 42,
    limit_per_class: Optional[int] = None,
) -> Tuple[tf.data.Dataset, tf.data.Dataset, Optional[tf.data.Dataset], int, list]:
    layout = detect_layout(data_dir)
    image_size = (img_size, img_size)

    def limit_dataset(ds: tf.data.Dataset, class_names: list) -> tf.data.Dataset:
        if limit_per_class is None:
            return ds
        if limit_per_class <= 0:
            return ds
        by_class = {i: 0 for i in range(len(class_names))}
        xs = []
        ys = []
        for x, y in ds.unbatch().take(100000):  # safety upper bound
            cls = int(y.numpy())
            if by_class[cls] < limit_per_class:
                xs.append(x.numpy())
                ys.append(cls)
                by_class[cls] += 1
        if len(xs) == 0:
            return ds
        xs = np.stack(xs, axis=0)
        ys = np.array(ys, dtype=np.int64)
        new_ds = tf.data.Dataset.from_tensor_slices((xs, ys)).batch(batch_size).prefetch(tf.data.AUTOTUNE)
        return new_ds

    if layout["mode"] == "split":
        train_ds = tf.keras.utils.image_dataset_from_directory(
            layout["train_dir"], labels="inferred", label_mode="int", image_size=image_size, batch_size=batch_size, seed=seed
        )
        val_ds = None
        if layout["val_dir"] is not None:
            val_ds = tf.keras.utils.image_dataset_from_directory(
                layout["val_dir"], labels="inferred", label_mode="int", image_size=image_size, batch_size=batch_size, seed=seed
            )
        test_ds = None
        if layout["test_dir"] is not None:
            test_ds = tf.keras.utils.image_dataset_from_directory(
                layout["test_dir"], labels="inferred", label_mode="int", image_size=image_size, batch_size=batch_size, seed=seed
            )
        class_names = train_ds.class_names
        if limit_per_class is not None:
            train_ds = limit_dataset(train_ds, class_names)
            if val_ds is not None:
                val_ds = limit_dataset(val_ds, class_names)
        return train_ds, val_ds, test_ds, len(class_names), class_names

    # flat: split
    train_ds = tf.keras.utils.image_dataset_from_directory(
        data_dir, validation_split=val_split, subset="training", labels="inferred", label_mode="int",
        image_size=image_size, batch_size=batch_size, seed=seed
    )
    val_ds = tf.keras.utils.image_dataset_from_directory(
        data_dir, validation_split=val_split, subset="validation", labels="inferred", label_mode="int",
        image_size=image_size, batch_size=batch_size, seed=seed
    )
    class_names = train_ds.class_names
    if limit_per_class is not None:
        train_ds = limit_dataset(train_ds, class_names)
        val_ds = limit_dataset(val_ds, class_names)
    return train_ds, val_ds, None, len(class_names), class_names


print("Building datasets from:", DATA_DIR)
train_ds, val_ds, test_ds, num_classes, class_names = build_datasets(
    DATA_DIR, IMG_SIZE, BATCH_SIZE, VAL_SPLIT, seed=42, limit_per_class=LIMIT_PER_CLASS
)
print("Classes:", class_names)
AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.prefetch(AUTOTUNE)
if val_ds is not None:
    val_ds = val_ds.prefetch(AUTOTUNE)
if test_ds is not None:
    test_ds = test_ds.prefetch(AUTOTUNE)

Building datasets from: /Applications/CODES/AiHC/data/retinopathy
Found 1113 files belonging to 3 classes.
Using 891 files for training.
Found 1113 files belonging to 3 classes.
Using 222 files for validation.


2025-10-28 10:21:58.973340: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Classes: ['A.%20Segmentation', 'B.%20Disease%20Grading', 'C.%20Localization']


In [14]:
# 6) Define CNN model, train, and export metrics
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


def build_model(img_size: int, num_classes: int) -> keras.Model:
    inputs = layers.Input(shape=(img_size, img_size, 3))
    x = layers.Rescaling(1./255)(inputs)
    x = layers.Conv2D(32, 3, activation='relu', padding='same')(x)
    x = layers.MaxPooling2D()(x)
    x = layers.Conv2D(64, 3, activation='relu', padding='same')(x)
    x = layers.MaxPooling2D()(x)
    x = layers.Conv2D(128, 3, activation='relu', padding='same')(x)
    x = layers.MaxPooling2D()(x)
    x = layers.Dropout(0.3)(x)
    x = layers.Flatten()(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.3)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)
    model = keras.Model(inputs, outputs)
    model.compile(optimizer=keras.optimizers.Adam(1e-3), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model


def evaluate_model(model: keras.Model, ds) -> dict:
    y_true, y_pred = [], []
    for bx, by in ds:
        preds = model.predict(bx, verbose=0)
        y_true.extend(by.numpy().tolist())
        y_pred.extend(np.argmax(preds, axis=1).tolist())
    acc = accuracy_score(y_true, y_pred)
    report = classification_report(y_true, y_pred, target_names=[str(c) for c in class_names], digits=4)
    cm = confusion_matrix(y_true, y_pred).tolist()
    return {"accuracy": float(acc), "report": report, "confusion_matrix": cm}

print("Building model...")
model = build_model(IMG_SIZE, len(class_names))
model.summary()

callbacks = [keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=3, restore_best_weights=True)]

print("Training...")
history = model.fit(train_ds, validation_data=val_ds, epochs=EPOCHS, callbacks=callbacks, verbose=1)

metrics = {}
if val_ds is not None:
    print("Evaluating on validation set...")
    metrics['val'] = evaluate_model(model, val_ds)
    print("Val accuracy:", metrics['val']['accuracy'])
    print(metrics['val']['report'])

if test_ds is not None:
    print("Evaluating on test set...")
    metrics['test'] = evaluate_model(model, test_ds)
    print("Test accuracy:", metrics['test']['accuracy'])
    print(metrics['test']['report'])

# Save artifacts
model_path = OUT_DIR / 'model.keras'
class_path = OUT_DIR / 'class_names.json'
metrics_path = OUT_DIR / 'metrics.json'

print("Saving model to", model_path)
model.save(model_path)

with open(class_path, 'w') as f:
    json.dump({"class_names": list(class_names)}, f, indent=2)
print("Saved class names to", class_path)

with open(metrics_path, 'w') as f:
    json.dump(metrics, f, indent=2)
print("Saved metrics to", metrics_path)

Building model...


Training...
Epoch 1/5
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 470ms/step - accuracy: 0.3889 - loss: 1.4509 - val_accuracy: 0.4234 - val_loss: 0.9549
Epoch 2/5
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 480ms/step - accuracy: 0.4359 - loss: 1.0912 - val_accuracy: 0.4234 - val_loss: 0.9977
Epoch 3/5
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 533ms/step - accuracy: 0.4316 - loss: 1.0709 - val_accuracy: 0.4234 - val_loss: 0.9501
Epoch 4/5
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 471ms/step - accuracy: 0.4316 - loss: 1.0499 - val_accuracy: 0.4234 - val_loss: 0.9694
Evaluating on validation set...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


Val accuracy: 0.42342342342342343
                        precision    recall  f1-score   support

     A.%20Segmentation     0.0000    0.0000    0.0000        13
B.%20Disease%20Grading     0.4234    1.0000    0.5949        94
     C.%20Localization     0.0000    0.0000    0.0000       115

              accuracy                         0.4234       222
             macro avg     0.1411    0.3333    0.1983       222
          weighted avg     0.1793    0.4234    0.2519       222

Saving model to /Applications/CODES/AiHC/outputs_retino/model.keras
Saved class names to /Applications/CODES/AiHC/outputs_retino/class_names.json
Saved metrics to /Applications/CODES/AiHC/outputs_retino/metrics.json


In [15]:
# 7) Grad-CAM: generate overlays and save to outputs_retino/gradcam/
import os
import cv2
from PIL import Image


def get_last_conv_layer(m: keras.Model):
    for layer in reversed(m.layers):
        if isinstance(layer, layers.Conv2D):
            return layer
    return None


def make_gradcam_heatmap(img_array, model, last_conv_layer):
    grad_model = keras.models.Model(
        [model.inputs], [last_conv_layer.output, model.output]
    )
    with tf.GradientTape() as tape:
        conv_outputs, predictions = grad_model(img_array)
        pred_index = tf.argmax(predictions[0])
        class_channel = predictions[:, pred_index]
    grads = tape.gradient(class_channel, conv_outputs)
    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))
    conv_outputs = conv_outputs[0]
    heatmap = tf.reduce_sum(tf.multiply(pooled_grads, conv_outputs), axis=-1)
    heatmap = tf.maximum(heatmap, 0) / (tf.reduce_max(heatmap) + 1e-8)
    return heatmap.numpy()


last_conv = get_last_conv_layer(model)
assert last_conv is not None, "No Conv2D layer found in the model for Grad-CAM."

layout = detect_layout(DATA_DIR)
# Prefer test -> val -> train
pick_dir = layout.get('test_dir') or layout.get('val_dir') or layout.get('train_dir') or DATA_DIR
print('Sampling images from:', pick_dir)

# Collect sample image paths across classes
img_paths = []
if isinstance(pick_dir, Path):
    root = pick_dir
else:
    root = Path(pick_dir)

class_folders = [p for p in sorted(root.iterdir()) if p.is_dir()]
if not class_folders:
    # If flat (no class subfolders), just take images directly
    flat_images = [p for p in root.glob('**/*') if p.suffix.lower() in {'.jpg', '.jpeg', '.png'}]
    img_paths = flat_images[:SAMPLE_COUNT_GRADCAM]
else:
    quota = max(1, SAMPLE_COUNT_GRADCAM // max(1, len(class_folders)))
    for cdir in class_folders:
        files = [p for p in sorted(cdir.glob('*')) if p.suffix.lower() in {'.jpg', '.jpeg', '.png'}]
        img_paths.extend(files[:quota])
    img_paths = img_paths[:SAMPLE_COUNT_GRADCAM]

print(f"Generating Grad-CAM for {len(img_paths)} images ...")

for ipath in img_paths:
    try:
        img = Image.open(ipath).convert('RGB')
        img = img.resize((IMG_SIZE, IMG_SIZE), Image.BILINEAR)
        img_arr = np.array(img)
        img_batch = np.expand_dims(img_arr, axis=0)  # model rescales internally

        preds = model.predict(img_batch, verbose=0)[0]
        pred_idx = int(np.argmax(preds))
        pred_label = class_names[pred_idx] if pred_idx < len(class_names) else str(pred_idx)

        heatmap = make_gradcam_heatmap(img_batch, model, last_conv)
        heatmap = cv2.resize(heatmap, (IMG_SIZE, IMG_SIZE))
        heatmap_uint8 = np.uint8(255 * heatmap)
        heatmap_color = cv2.applyColorMap(heatmap_uint8, cv2.COLORMAP_JET)

        overlay = cv2.addWeighted(heatmap_color, 0.4, cv2.cvtColor(img_arr, cv2.COLOR_RGB2BGR), 0.6, 0)

        out_name = f"{ipath.stem}_pred-{pred_label}.jpg"
        out_path = OUT_DIR / 'gradcam' / out_name
        cv2.imwrite(str(out_path), overlay)
        print('Saved:', out_path)
    except Exception as e:
        print('Failed on', ipath, 'error:', e)

print('Grad-CAM generation complete. Check:', OUT_DIR / 'gradcam')

Sampling images from: /Applications/CODES/AiHC/data/retinopathy
Generating Grad-CAM for 0 images ...
Grad-CAM generation complete. Check: /Applications/CODES/AiHC/outputs_retino/gradcam


In [16]:
# 8) Quick summary: show saved metrics (if any)
from pprint import pprint

metrics_path = OUT_DIR / 'metrics.json'
if metrics_path.exists():
    with open(metrics_path, 'r') as f:
        saved_metrics = json.load(f)
    print('Loaded metrics from', metrics_path)
    for split, md in saved_metrics.items():
        print(f"\n=== {split.upper()} ===")
        print('Accuracy:', md.get('accuracy'))
        print(md.get('report', '')[:1200], '...' if len(md.get('report','')) > 1200 else '')
else:
    print('No metrics.json found at', metrics_path)

Loaded metrics from /Applications/CODES/AiHC/outputs_retino/metrics.json

=== VAL ===
Accuracy: 0.42342342342342343
                        precision    recall  f1-score   support

     A.%20Segmentation     0.0000    0.0000    0.0000        13
B.%20Disease%20Grading     0.4234    1.0000    0.5949        94
     C.%20Localization     0.0000    0.0000    0.0000       115

              accuracy                         0.4234       222
             macro avg     0.1411    0.3333    0.1983       222
          weighted avg     0.1793    0.4234    0.2519       222
 


In [17]:
# 6b) Improved training: Transfer Learning + Augmentation + Class Weights
import math
from tensorflow.keras import mixed_precision
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.applications.efficientnet import preprocess_input as eff_preprocess

# Optional: enable mixed precision if a GPU is available (Apple Silicon / Metal included)
try:
    gpus = tf.config.list_physical_devices('GPU')
    if gpus:
        mixed_precision.set_global_policy('mixed_float16')
        print('Mixed precision enabled (GPU detected).')
except Exception as e:
    print('Mixed precision not set:', e)

# Build augmentation as part of the model so it's traced and runs on-device
augmentation = keras.Sequential([
    layers.RandomFlip('horizontal'),
    layers.RandomRotation(0.05),
    layers.RandomZoom(0.1),
    layers.RandomContrast(0.1),
], name='augmentation')


def build_improved_model(img_size: int, num_classes: int) -> keras.Model:
    inputs = layers.Input(shape=(img_size, img_size, 3))
    x = augmentation(inputs)
    # EfficientNet expects inputs scaled to [-1, 1] via its preprocess function
    x = layers.Lambda(lambda im: eff_preprocess(im))(x)
    base = EfficientNetB0(include_top=False, weights='imagenet', input_tensor=x)
    base.trainable = False  # phase 1: freeze
    x = layers.GlobalAveragePooling2D()(base.output)
    x = layers.Dropout(0.3)(x)
    # Use float32 for final dense if using mixed precision
    dtype = 'float32' if mixed_precision.global_policy().compute_dtype == 'float16' else None
    outputs = layers.Dense(num_classes, activation='softmax', dtype=dtype)(x)
    model = keras.Model(inputs, outputs, name='effb0_transfer')
    opt = keras.optimizers.Adam(learning_rate=1e-3)
    model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=[
        'accuracy',
        tf.keras.metrics.SparseTopKCategoricalAccuracy(k=2, name='top2_acc')
    ])
    return model

# Cache datasets for speed during repeated passes
train_cached = train_ds.cache().prefetch(AUTOTUNE)
val_cached = val_ds.cache().prefetch(AUTOTUNE) if val_ds is not None else None

# Compute class weights to mitigate imbalance
print('Computing class weights ...')
counts = np.zeros(len(class_names), dtype=np.int64)
for _, y in train_ds.unbatch():
    counts[int(y.numpy())] += 1

total = int(counts.sum())
class_weight = {}
for i, c in enumerate(counts):
    # Inverse frequency: total / (num_classes * count)
    class_weight[i] = (total / (len(counts) * max(1, int(c))))
print('Class counts:', counts.tolist())
print('Class weights:', {k: round(v, 3) for k, v in class_weight.items()})

print('Building improved model (EfficientNetB0) ...')
model_improved = build_improved_model(IMG_SIZE, len(class_names))
model_improved.summary()

# Callbacks
ckpt_path = OUT_DIR / 'best_improved.keras'
callbacks = [
    keras.callbacks.ModelCheckpoint(filepath=str(ckpt_path), monitor='val_accuracy', save_best_only=True, verbose=1),
    keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=4, restore_best_weights=True),
]

# Phase 1: train head
head_epochs = max(3, min(8, EPOCHS))
print(f'Training classification head for {head_epochs} epochs ...')
hist_head = model_improved.fit(
    train_cached,
    validation_data=val_cached,
    epochs=head_epochs,
    class_weight=class_weight,
    verbose=1,
    callbacks=callbacks,
)

# Phase 2: fine-tune – unfreeze top layers
fine_tune_layers = 50
print(f'Unfreezing top {fine_tune_layers} layers and fine-tuning ...')
base_model = None
for layer in model_improved.layers:
    if isinstance(layer, tf.keras.Model) or 'efficientnet' in layer.name.lower():
        base_model = layer
        break
if base_model is None:
    # fallback: search by attribute
    for l in model_improved.layers:
        if hasattr(l, 'layers') and any('efficientnet' in sub.name.lower() for sub in getattr(l, 'layers', [])):
            base_model = l
            break

if base_model is not None:
    for l in base_model.layers[-fine_tune_layers:]:
        l.trainable = True
else:
    print('Warning: could not locate EfficientNet base, skipping unfreeze step.')

# Lower LR for fine-tuning
model_improved.compile(
    optimizer=keras.optimizers.Adam(learning_rate=1e-4),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy', tf.keras.metrics.SparseTopKCategoricalAccuracy(k=2, name='top2_acc')]
)

fine_epochs = max(EPOCHS, head_epochs + 2)
print(f'Fine-tuning for up to {fine_epochs} epochs ...')
hist_fine = model_improved.fit(
    train_cached,
    validation_data=val_cached,
    epochs=fine_epochs,
    class_weight=class_weight,
    verbose=1,
    callbacks=callbacks,
)

# Evaluate and save improved artifacts
metrics_improved = {}
if val_ds is not None:
    print('Evaluating improved model on validation set ...')
    metrics_improved['val'] = evaluate_model(model_improved, val_ds,)
    print('Val accuracy (improved):', metrics_improved['val']['accuracy'])
    print(metrics_improved['val']['report'])

if test_ds is not None:
    print('Evaluating improved model on test set ...')
    metrics_improved['test'] = evaluate_model(model_improved, test_ds)
    print('Test accuracy (improved):', metrics_improved['test']['accuracy'])
    print(metrics_improved['test']['report'])

model_improved_path = OUT_DIR / 'model_improved.keras'
print('Saving improved model to', model_improved_path)
model_improved.save(model_improved_path)

metrics_improved_path = OUT_DIR / 'metrics_improved.json'
with open(metrics_improved_path, 'w') as f:
    json.dump(metrics_improved, f, indent=2)
print('Saved improved metrics to', metrics_improved_path)


Computing class weights ...
Class counts: [68, 200, 200]
Class weights: {0: 2.294, 1: 0.78, 2: 0.78}
Building improved model (EfficientNetB0) ...
Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5
[1m16705208/16705208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 0us/step


Training classification head for 5 epochs ...
Epoch 1/5
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 236ms/step - accuracy: 0.3690 - loss: 1.0907 - top2_acc: 0.7197
Epoch 1: val_accuracy improved from None to 0.44595, saving model to /Applications/CODES/AiHC/outputs_retino/best_improved.keras
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 425ms/step - accuracy: 0.3803 - loss: 1.2472 - top2_acc: 0.7628 - val_accuracy: 0.4459 - val_loss: 0.9694 - val_top2_acc: 0.9414
Epoch 2/5
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 236ms/step - accuracy: 0.3784 - loss: 1.0396 - top2_acc: 0.7380
Epoch 2: val_accuracy did not improve from 0.44595
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 349ms/step - accuracy: 0.3419 - loss: 1.1021 - top2_acc: 0.6731 - val_accuracy: 0.2477 - val_loss: 1.1526 - val_top2_acc: 0.6306
Epoch 3/5
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 244ms/step - accuracy: 0.2476 - loss:

2025-10-28 10:23:56.174367: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


In [18]:
# 9) Summary (original + improved metrics)
summary_files = [
    ('Original', OUT_DIR / 'metrics.json'),
    ('Improved', OUT_DIR / 'metrics_improved.json'),
]
for label, path in summary_files:
    print(f"\n===== {label} Metrics =====")
    if path.exists():
        with open(path, 'r') as f:
            m = json.load(f)
        for split, md in m.items():
            print(f"-- {split.upper()} --")
            print('Accuracy:', md.get('accuracy'))
            rep = md.get('report', '')
            print(rep[:1200], '...' if len(rep) > 1200 else '')
    else:
        print('Not found at', path)


===== Original Metrics =====
-- VAL --
Accuracy: 0.42342342342342343
                        precision    recall  f1-score   support

     A.%20Segmentation     0.0000    0.0000    0.0000        13
B.%20Disease%20Grading     0.4234    1.0000    0.5949        94
     C.%20Localization     0.0000    0.0000    0.0000       115

              accuracy                         0.4234       222
             macro avg     0.1411    0.3333    0.1983       222
          weighted avg     0.1793    0.4234    0.2519       222
 

===== Improved Metrics =====
-- VAL --
Accuracy: 0.45495495495495497
                        precision    recall  f1-score   support

     A.%20Segmentation     1.0000    0.0769    0.1429        13
B.%20Disease%20Grading     0.4278    0.8511    0.5694        94
     C.%20Localization     0.5882    0.1739    0.2685       115

              accuracy                         0.4550       222
             macro avg     0.6720    0.3673    0.3269       222
          weighted avg