# EfficientNet vs CBAM Benchmarking (ODIR-5K)

This notebook generates:
- Row-normalized (percent) confusion matrices for both models
- Grad-CAM grids per class to visually benchmark attention effectiveness

It mirrors the H-priority pipeline and uses the same preprocessing/augmentation.


In [None]:
import os, json, itertools
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.metrics import confusion_matrix, classification_report

OUTPUT_DIR = '../new_work/figures'
os.makedirs(OUTPUT_DIR, exist_ok=True)

TARGET = ['G','C','A','H','M']
NAMES_FULL = ['Glaucoma','Cataract','AMD','Hypertension','Myopia']
LABELS = list(range(5))

# Utility: row-normalized confusion matrix and save as image+csv

def save_cm_percent(y_true, y_pred, name):
    cm = confusion_matrix(y_true, y_pred, labels=LABELS)
    cm_pct = cm.astype(np.float32)
    row_sums = cm_pct.sum(axis=1, keepdims=True)
    row_sums[row_sums==0] = 1.0
    cm_pct = (cm_pct / row_sums) * 100.0
    plt.figure(figsize=(7,6))
    sns.heatmap(cm_pct, annot=True, fmt='.1f', cmap='Blues',
                xticklabels=NAMES_FULL, yticklabels=NAMES_FULL)
    plt.title(f'Confusion Matrix (%) - {name}')
    plt.xlabel('Predicted label'); plt.ylabel('True label')
    plt.tight_layout()
    plt.savefig(os.path.join(OUTPUT_DIR, f'cm_{name}_percent.png'))
    plt.close()
    pd.DataFrame(cm_pct, index=NAMES_FULL, columns=NAMES_FULL).to_csv(
        os.path.join(OUTPUT_DIR, f'cm_{name}_percent.csv'), float_format='%.1f')

print('Setup OK')


In [None]:
# Grad-CAM utilities for EfficientNet-like models

def get_last_conv_layer(model):
    # Find last conv layer for Grad-CAM
    for layer in reversed(model.layers):
        if isinstance(layer, layers.Conv2D):
            return layer.name
    # EfficientNetV2/TF-Keras may use DepthwiseConv2D
    for layer in reversed(model.layers):
        if 'conv' in layer.name.lower():
            return layer.name
    raise ValueError('No convolutional layer found for Grad-CAM')

@tf.function
def gradcam_heatmap(img_tensor, model, last_conv_name, pred_index=None):
    grad_model = tf.keras.models.Model(
        [model.inputs], [model.get_layer(last_conv_name).output, model.output]
    )
    with tf.GradientTape() as tape:
        conv_outputs, preds = grad_model(img_tensor)
        if pred_index is None:
            pred_index = tf.argmax(preds[0])
        class_channel = preds[:, pred_index]
    grads = tape.gradient(class_channel, conv_outputs)
    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))
    conv_outputs = conv_outputs[0]
    heatmap = conv_outputs @ pooled_grads[..., tf.newaxis]
    heatmap = tf.squeeze(heatmap)
    heatmap = tf.maximum(heatmap, 0) / (tf.reduce_max(heatmap) + 1e-8)
    return heatmap

def overlay_heatmap(img, heatmap, alpha=0.4, cmap='jet'):
    import cv2
    heatmap = np.uint8(255 * heatmap)
    heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
    hmap = cv2.addWeighted(heatmap, alpha, img, 1 - alpha, 0)
    return hmap

print('Grad-CAM utilities ready')


In [None]:
# Placeholders: load prepared numpy arrays or use tf.data pipelines from prior notebooks
# Expect variables: x_test, y_test (one-hot), and trained models effnet_model, effnet_cbam_model

# Example hooks (user to replace with actual loading)
# x_test = np.load('/kaggle/working/x_test.npy')
# y_test = np.load('/kaggle/working/y_test.npy')
# effnet_model = keras.models.load_model('/kaggle/working/effnet_best.keras')
# effnet_cbam_model = keras.models.load_model('/kaggle/working/effnet_cbam_best.keras')

print('Please load x_test, y_test, effnet_model, effnet_cbam_model before running next cells.')


In [None]:
# Generate percent confusion matrices

y_true = np.argmax(y_test, axis=1)

pred_effnet = np.argmax(effnet_model.predict(x_test, verbose=0), axis=1)
pred_cbam = np.argmax(effnet_cbam_model.predict(x_test, verbose=0), axis=1)

save_cm_percent(y_true, pred_effnet, 'effnet_baseline')
save_cm_percent(y_true, pred_cbam, 'effnet_cbam')

print('Percent confusion matrices saved to', OUTPUT_DIR)


In [None]:
# Grad-CAM grids per class for both models
import cv2

def deprocess(x):
    x = np.uint8(np.clip(x, 0, 255))
    return x

def grid_for_class(cls_idx, samples_per_class=2, img_size=224):
    idxs = np.where(y_true == cls_idx)[0][:samples_per_class]
    if len(idxs) == 0:
        return None
    imgs = x_test[idxs]
    last_eff = get_last_conv_layer(effnet_model)
    last_cb = get_last_conv_layer(effnet_cbam_model)
    rows = []
    for img in imgs:
        it = tf.convert_to_tensor(img[None, ...])
        hm_eff = gradcam_heatmap(it, effnet_model, last_eff, pred_index=cls_idx).numpy()
        hm_cb = gradcam_heatmap(it, effnet_cbam_model, last_cb, pred_index=cls_idx).numpy()
        img_uint = deprocess(img)
        ov_eff = overlay_heatmap(img_uint, cv2.resize(hm_eff, (img_uint.shape[1], img_uint.shape[0])))
        ov_cb = overlay_heatmap(img_uint, cv2.resize(hm_cb, (img_uint.shape[1], img_uint.shape[0])))
        row = [img_uint[..., ::-1], ov_eff[..., ::-1], ov_cb[..., ::-1]]  # BGR->RGB correction for cv2
        rows.append(np.concatenate(row, axis=1))
    grid = np.concatenate(rows, axis=0)
    plt.figure(figsize=(6, 4))
    plt.imshow(grid)
    plt.axis('off')
    plt.title(f'{NAMES_FULL[cls_idx]}: Input | EffNet Grad-CAM | EffNet+CBAM Grad-CAM')
    out = os.path.join(OUTPUT_DIR, f'gradcam_grid_{TARGET[cls_idx]}.png')
    plt.savefig(out, bbox_inches='tight')
    plt.close()
    return out

saved = []
for c in LABELS:
    p = grid_for_class(c, samples_per_class=2)
    if p: saved.append(p)

print('Saved Grad-CAM grids:', saved)
