<a href="https://colab.research.google.com/github/sallysparrow/diabetic-retinopathy/blob/main/Untitled2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install -q --upgrade pip

!pip install -q \
    numpy==2.0.0 \
    shap==0.50.0 \
    scikit-image==0.23.2 \
    opencv-python-headless==4.12.0.88 \
    reportlab==4.2.0 PyPDF2==3.0.1 pdfkit==1.0.0 textstat==0.7.3

!apt-get -q install -y wkhtmltopdf >/dev/null

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m21.1 MB/s[0m eta [36m0:00:00[0m
Extracting templates from packages: 100%


In [2]:
# Cell 1 – safe writable output folder
from google.colab import drive, files
drive.mount('/content/drive')            #  read-write if Drive is healthy

BASE_DIR = '/content/drive/MyDrive/Explainability Exercise/'
TRY_OUT  = BASE_DIR + 'output/'
LOCAL_OUT = '/content/output/'           # always writable

BASE_DIR  = '/content/drive/MyDrive/Explainability Exercise/'
TRAIN_CSV = BASE_DIR + 'data/train.csv'
TEST_CSV  = BASE_DIR + 'data/test.csv'
TRAIN_IMG = BASE_DIR + 'data/train_images/'
TEST_IMG  = BASE_DIR + 'data/test_images/'
MODEL_H5  = BASE_DIR + 'model.h5'

import os, warnings, numpy as np, pandas as pd
warnings.filterwarnings("ignore")

# choose first writable path
OUT_DIR = TRY_OUT
try:
    os.makedirs(OUT_DIR, exist_ok=True)
except OSError:
    print("Drive is read-only; falling back to local folder.")
    OUT_DIR = LOCAL_OUT
    os.makedirs(OUT_DIR, exist_ok=True)

print("All new files will be saved to:", OUT_DIR)


Mounted at /content/drive
Drive is read-only; falling back to local folder.
All new files will be saved to: /content/output/


In [3]:
# Cell 2 – dataframes and model
import tensorflow as tf
train = pd.read_csv(TRAIN_CSV)
test  = pd.read_csv(TEST_CSV)

# strip .png extension once, keep numeric class as int
for df in (train, test):
    df['id_code']   = df['id_code'].str.replace('.png','',regex=False)
    df['diagnosis'] = df['diagnosis'].astype(int)

print(f'train rows: {len(train)},  test rows: {len(test)}')

model = tf.keras.models.load_model(MODEL_H5, compile=False)
print('model loaded ✓')


train rows: 2929,  test rows: 733
model loaded ✓


In [4]:
# Cell 3 – image helper
from tensorflow.keras.preprocessing.image import load_img, img_to_array
IMG_SIZE = (320, 320)

def load_tensor(id_code:str, folder:str):
    """returns (tensor 1×320×320×3 , original_PIL_image)"""
    path = f'{folder}{id_code}.png'
    pil  = load_img(path, target_size=IMG_SIZE)
    arr  = img_to_array(pil)/255.0          # scale 0-1
    return np.expand_dims(arr, 0), pil      # batch dimension


In [5]:
# Cell 4 – pick examples
hi_id  = train.loc[train.diagnosis>=3 , 'id_code'].iloc[0]
lo_id  = train.loc[train.diagnosis==0 , 'id_code'].iloc[0]
print('high-risk:', hi_id, '  low-risk:', lo_id)

X_hi,  PIL_hi = load_tensor(hi_id, TRAIN_IMG)
X_lo,  PIL_lo = load_tensor(lo_id, TRAIN_IMG)


high-risk: 9859e2a6cc24   low-risk: e8ddfc9709ce


In [13]:
# Cell 5 – background + explainer
import shap, random, numpy as np, skimage.segmentation as seg
from shap.maskers import Image as ImageMasker

BG_IDS = random.sample(list(train.id_code), 3)
bg = np.concatenate([load_tensor(i, TRAIN_IMG)[0] for i in BG_IDS], 0)

masker   = ImageMasker("inpaint_telea", (320, 320, 3))
iexplainer = shap.Explainer(model, masker, output_names=[str(i) for i in range(5)])

print("ImageExplainer ready; background shape:", bg.shape)


ImageExplainer ready; background shape: (3, 320, 320, 3)


In [14]:
# Cell 6 – SHAP overlay creator
import matplotlib.pyplot as plt, cv2

def save_overlay(img_tensor, pil_img, patient_id, tag):
    # --- 1. SHAP for top-1 output ---------------------------------
    sv_exp = iexplainer(
        img_tensor,
        max_evals=150,                        # a bit faster
        outputs=shap.Explanation.argsort.flip[:1]
    )
    sv = sv_exp.values.squeeze()             # <─ remove the leading 1-dim
    sv = sv.mean(-1)                         # (320,320) grayscale

    # --- 2. super-pixel aggregation -------------------------------
    img_np   = np.array(pil_img)             # (320,320,3) uint8
    segments = seg.slic(img_np, n_segments=250, compactness=10, sigma=1)

    sv_smooth = np.zeros_like(sv)
    for s in np.unique(segments):
        mask = segments == s
        sv_smooth[mask] = sv[mask].mean()

    # --- 3. colour-map with fixed vmax ----------------------------
    vmax  = np.percentile(np.abs(sv_smooth), 95)
    heat  = plt.cm.seismic(np.clip(sv_smooth / vmax, -1, 1))[..., :3]  # (320,320,3)

    # --- 4. alpha-blend -------------------------------------------
    base  = img_np / 255.0
    blend = 0.55 * heat + 0.45 * base        # shapes now match

    # --- 5. save ---------------------------------------------------
    out_png = f"{OUT_DIR}{patient_id}_{tag}_explain.png"
    plt.imsave(out_png, blend)
    print("saved", out_png)
    return out_png




In [15]:
# Cell 7
X_hi, PIL_hi = load_tensor(hi_id, TRAIN_IMG)
X_lo, PIL_lo = load_tensor(lo_id, TRAIN_IMG)

png_hi = save_overlay(X_hi, PIL_hi, hi_id, 'high')
png_lo = save_overlay(X_lo, PIL_lo, lo_id,  'low')


  0%|          | 0/148 [00:00<?, ?it/s]

PartitionExplainer explainer: 2it [00:16, 16.21s/it]               


saved /content/output/9859e2a6cc24_high_explain.png


  0%|          | 0/148 [00:00<?, ?it/s]

PartitionExplainer explainer: 2it [00:13, 13.08s/it]               


saved /content/output/e8ddfc9709ce_low_explain.png
