<a href="https://colab.research.google.com/github/sallysparrow/diabetic-retinopathy/blob/main/Untitled1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install shap fpdf reportlab PyPDF2 pdfkit torch torchvision
!apt-get -q install -y wkhtmltopdf


Collecting fpdf
  Downloading fpdf-1.7.2.tar.gz (39 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting reportlab
  Downloading reportlab-4.4.5-py3-none-any.whl.metadata (1.7 kB)
Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Collecting pdfkit
  Downloading pdfkit-1.0.0-py3-none-any.whl.metadata (9.3 kB)
Downloading reportlab-4.4.5-py3-none-any.whl (2.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m36.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m14.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pdfkit-1.0.0-py3-none-any.whl (12 kB)
Building wheels for collected packages: fpdf
  Building wheel for fpdf (setup.py) ... [?25l[?25hdone
  Created wheel for fpdf: filename=fpdf-1.7.2-py2.py3-none-any.whl size=40704 sha256=682865e26b096e34bc8946423c9bd7924161d10ed78ea02991c8

In [3]:
# Cell 1 – safe writable output folder
from google.colab import drive, files
drive.mount('/content/drive')            #  read-write if Drive is healthy

BASE_DIR = '/content/drive/MyDrive/Explainability Exercise/'
TRY_OUT  = BASE_DIR + 'output/'
LOCAL_OUT = '/content/output/'           # always writable

BASE_DIR  = '/content/drive/MyDrive/Explainability Exercise/'
TRAIN_CSV = BASE_DIR + 'data/train.csv'
TEST_CSV  = BASE_DIR + 'data/test.csv'
TRAIN_IMG = BASE_DIR + 'data/train_images/'
TEST_IMG  = BASE_DIR + 'data/test_images/'
MODEL_H5  = BASE_DIR + 'model.h5'

import os, warnings, numpy as np, pandas as pd
warnings.filterwarnings("ignore")

# choose first writable path
OUT_DIR = TRY_OUT
try:
    os.makedirs(OUT_DIR, exist_ok=True)
except OSError:
    print("Drive is read-only; falling back to local folder.")
    OUT_DIR = LOCAL_OUT
    os.makedirs(OUT_DIR, exist_ok=True)

print("All new files will be saved to:", OUT_DIR)


Mounted at /content/drive
Drive is read-only; falling back to local folder.
All new files will be saved to: /content/output/


In [4]:
# Cell 2 – dataframes and model
import tensorflow as tf
train = pd.read_csv(TRAIN_CSV)
test  = pd.read_csv(TEST_CSV)

# strip .png extension once, keep numeric class as int
for df in (train, test):
    df['id_code']   = df['id_code'].str.replace('.png','',regex=False)
    df['diagnosis'] = df['diagnosis'].astype(int)

print(f'train rows: {len(train)},  test rows: {len(test)}')

model = tf.keras.models.load_model(MODEL_H5, compile=False)
print('model loaded ✓')


train rows: 2929,  test rows: 733
model loaded ✓


In [5]:
# Cell 3 – image helper
from tensorflow.keras.preprocessing.image import load_img, img_to_array
IMG_SIZE = (320, 320)

def load_tensor(id_code:str, folder:str):
    """returns (tensor 1×320×320×3 , original_PIL_image)"""
    path = f'{folder}{id_code}.png'
    pil  = load_img(path, target_size=IMG_SIZE)
    arr  = img_to_array(pil)/255.0          # scale 0-1
    return np.expand_dims(arr, 0), pil      # batch dimension


In [6]:
# Cell 4 – pick examples
hi_id  = train.loc[train.diagnosis>=3 , 'id_code'].iloc[0]
lo_id  = train.loc[train.diagnosis==0 , 'id_code'].iloc[0]
print('high-risk:', hi_id, '  low-risk:', lo_id)

X_hi,  PIL_hi = load_tensor(hi_id, TRAIN_IMG)
X_lo,  PIL_lo = load_tensor(lo_id, TRAIN_IMG)


high-risk: 9859e2a6cc24   low-risk: e8ddfc9709ce


In [7]:
# Cell 5 – background + explainer
import shap, random, numpy as np

# 1. Build ten-image background
BG_IDS = random.sample(list(train.id_code), 3)
bg = np.concatenate([load_tensor(i, TRAIN_IMG)[0] for i in BG_IDS], axis=0)

# 2. Use GradientExplainer (DeepExplainer crashes on TF 2.15)
explainer = shap.GradientExplainer((model.input, model.output), bg)

print("GradientExplainer ready, background shape:", bg.shape)


GradientExplainer ready, background shape: (3, 320, 320, 3)


In [10]:
# Cell 6 – SHAP overlay creator
import matplotlib.pyplot as plt

# --- patched helper ---------------------------------------------
def save_shap_overlay(tensor, patient_id, tag):
    """Make one SHAP overlay, save to PNG, return (png_path, predicted_class)."""
    # ask for only the highest-probability output
    raw = explainer.shap_values(tensor, ranked_outputs=1)   # list len = 1
    # raw[0] is a tuple: (values, indexes) → keep the values
    shap_values = [raw[0][0]]                               #  <── fix

    pred = int(np.argmax(model.predict(tensor), 1))

    import matplotlib.pyplot as plt
    plt.figure(figsize=(6, 6))
    shap.image_plot(
        shap_values,              # list[ndarray], not tuple
        tensor,
        show=False,
        labels=[f'Predicted class {pred}']
    )

    out_png = f'{OUT_DIR}{patient_id}_{tag}_shap.png'
    plt.savefig(out_png, bbox_inches='tight')
    plt.close()
    print("saved", out_png)
    return out_png, pred



In [11]:
# Cell 7
png_hi, pred_hi = save_shap_overlay(X_hi, hi_id, 'high')
png_lo, pred_lo = save_shap_overlay(X_lo, lo_id, 'low')


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
saved /content/output/9859e2a6cc24_high_shap.png
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
saved /content/output/e8ddfc9709ce_low_shap.png


<Figure size 600x600 with 0 Axes>

<Figure size 600x600 with 0 Axes>