<a href="https://colab.research.google.com/github/manushi0304/Diabetic_Retinopathy/blob/main/tfeval1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

BASE = "/content/drive/MyDrive/DiabeticProject"
print("Using BASE =", BASE)


Mounted at /content/drive
Using BASE = /content/drive/MyDrive/DiabeticProject


In [None]:
%%bash
set -euo pipefail
BASE="/content/drive/MyDrive/DiabeticProject"

echo "BASE: $BASE"

echo "=== TFLite models (.tflite) ==="
find "$BASE" -type f -name "*.tflite" | sort || true

echo
echo "=== Parent models (.keras / .h5) ==="
find "$BASE" -type f \( -name "*.keras" -o -name "*.h5" \) | sort || true

echo
echo "=== CSVs (with sizes) ==="
find "$BASE" -maxdepth 5 -type f -iname "*.csv" -printf "%p\t%k KB\n" | sort || true

echo
echo "=== Image folders (top 50 by file count) ==="
find "$BASE" -type f \( -iname "*.png" -o -iname "*.jpg" -o -iname "*.jpeg" -o -iname "*.bmp" -o -iname "*.tif" -o -iname "*.tiff" -o -iname "*.webp" \) \
| sed -r 's|/[^/]+$||' | sort | uniq -c | sort -nr | head -n 50 || true


BASE: /content/drive/MyDrive/DiabeticProject
=== TFLite models (.tflite) ===
/content/drive/MyDrive/DiabeticProject/tflite/DenseNet121_model_fp16.tflite
/content/drive/MyDrive/DiabeticProject/tflite/DenseNet121_model_fp32.tflite
/content/drive/MyDrive/DiabeticProject/tflite/DenseNet121_model_int8.tflite
/content/drive/MyDrive/DiabeticProject/tflite/NASNetMobile_model_fp16.tflite
/content/drive/MyDrive/DiabeticProject/tflite/NASNetMobile_model_fp32.tflite
/content/drive/MyDrive/DiabeticProject/tflite/NASNetMobile_model_int8.tflite
/content/drive/MyDrive/DiabeticProject/tflite/ResNet50V2model_fp16.tflite
/content/drive/MyDrive/DiabeticProject/tflite/ResNet50V2model_fp32.tflite
/content/drive/MyDrive/DiabeticProject/tflite/ResNet50V2model_int8.tflite

=== Parent models (.keras / .h5) ===
/content/drive/MyDrive/DiabeticProject/saved_models/DenseNet121_single/latest_model.keras
/content/drive/MyDrive/DiabeticProject/saved_models/DenseNet121_single_split.keras
/content/drive/MyDrive/Diabetic

In [None]:
# Cell 2 — Build /content/drive/MyDrive/DiabeticProject/test.csv robustly
import os, glob, pandas as pd, zipfile, io
from google.colab import files as colab_files  # avoid name clash

BASE = "/content/drive/MyDrive/DiabeticProject"
OUT_CSV = f"{BASE}/test.csv"
os.makedirs(BASE, exist_ok=True)

IMG_EXTS = (".png",".jpg",".jpeg",".bmp",".tif",".tiff",".webp")

def valid_test_csv(path):
    if not os.path.exists(path): return False
    try:
        df = pd.read_csv(path)
        if not {"filepath","label"}.issubset(df.columns): return False
        df = df[df["filepath"].apply(os.path.exists)]
        return len(df) > 0
    except Exception:
        return False

def build_df_from_class_folder(root):
    """root should contain subfolders per class; we create filepath,label rows."""
    rows = []
    if not os.path.isdir(root): return None
    subdirs = [d for d in sorted(os.listdir(root)) if os.path.isdir(os.path.join(root, d))]
    if not subdirs: return None
    has_any = False
    for cls in subdirs:
        cls_path = os.path.join(root, cls)
        files_in_cls = [f for f in os.listdir(cls_path) if os.path.isfile(os.path.join(cls_path, f))]
        img_files = [f for f in files_in_cls if f.lower().endswith(IMG_EXTS)]
        for f in img_files:
            rows.append((os.path.join(cls_path, f), str(cls)))
        if img_files:
            has_any = True
    if not has_any:
        return None
    df = pd.DataFrame(rows, columns=["filepath","label"])
    df = df[df["filepath"].apply(os.path.exists)].reset_index(drop=True)
    return df if len(df) else None

def find_class_folder_dataset(base_dir, max_depth=3):
    """
    Heuristic: find a directory whose immediate subdirs contain images.
    Searches a few common names first, then a shallow walk.
    """
    candidates = []
    common = ["test_images","val_images","valid_images","test","val","valid","train_images","train","images/train","dataset/train"]
    for name in common:
        p = os.path.join(base_dir, name)
        if os.path.isdir(p):
            candidates.append(p)

    # add shallow scan
    for r, dirs, files in os.walk(base_dir):
        depth = r[len(base_dir):].count(os.sep)
        if depth > max_depth:
            continue
        for d in dirs:
            candidates.append(os.path.join(r, d))

    # check candidates
    seen = set()
    for c in candidates:
        c = os.path.normpath(c)
        if c in seen:
            continue
        seen.add(c)
        df = build_df_from_class_folder(c)
        if df is not None and len(df) >= 20:  # sanity threshold
            return c, df
    return None, None

def try_build_from_aptos_anywhere():
    """Find any train.csv in Drive and build OUT_CSV (.png/.jpg/.jpeg supported)."""
    cands = glob.glob("/content/drive/**/train.csv", recursive=True)
    for csv_path in cands:
        try:
            df = pd.read_csv(csv_path)
            if not {"id_code","diagnosis"}.issubset(df.columns):
                continue
            root = os.path.dirname(csv_path)
            img_roots = [
                os.path.join(root, "train_images"),
                os.path.join(root, "images", "train"),
                os.path.join(root, "train"),
            ]
            img_root = next((r for r in img_roots if os.path.isdir(r)), None)
            if img_root is None:
                continue

            paths, labels = [], []
            for _, row in df.iterrows():
                idc = str(row["id_code"])
                for ext in [".png",".jpg",".jpeg"]:
                    p = os.path.join(img_root, f"{idc}{ext}")
                    if os.path.exists(p):
                        paths.append(p); labels.append(int(row["diagnosis"]))
                        break
            if not paths:
                continue
            pd.DataFrame({"filepath":paths,"label":labels}).to_csv(OUT_CSV, index=False)
            print(f"✅ Built test CSV from APTOS in Drive: {OUT_CSV}  ({len(paths)} rows)")
            return True
        except Exception:
            pass
    return False

def try_build_from_kagglehub_or_classfolders():
    """Use kagglehub if available; if no train.csv, still try class-folder scanning inside the dataset."""
    try:
        import kagglehub
        dpath = kagglehub.dataset_download('subhajeetdas/aptos-2019-jpg')
        # First, if there is a train.csv, use it:
        csvs = glob.glob(os.path.join(dpath, "**", "train.csv"), recursive=True)
        if csvs:
            csv_path = csvs[0]
            df = pd.read_csv(csv_path)
            if {"id_code","diagnosis"}.issubset(df.columns):
                # try to guess an images root near the csv
                guess_roots = [
                    os.path.join(os.path.dirname(csv_path), "train_images"),
                    os.path.join(os.path.dirname(csv_path), "train"),
                    os.path.dirname(csv_path),
                    dpath,
                ]
                img_root = None
                for r in guess_roots:
                    if os.path.isdir(r):
                        img_root = r; break
                if img_root:
                    paths, labels = [], []
                    for _, row in df.iterrows():
                        idc = str(row["id_code"])
                        for ext in [".png",".jpg",".jpeg"]:
                            p = os.path.join(img_root, f"{idc}{ext}")
                            if os.path.exists(p):
                                paths.append(p); labels.append(int(row["diagnosis"]))
                                break
                    if paths:
                        pd.DataFrame({"filepath":paths,"label":labels}).to_csv(OUT_CSV, index=False)
                        print(f"✅ Built test CSV from kagglehub(train.csv): {OUT_CSV}  ({len(paths)} rows)")
                        return True
        # No train.csv? Try class-folder layout inside kaggle dataset
        root, df_cf = find_class_folder_dataset(dpath, max_depth=4)
        if df_cf is not None:
            df_cf.to_csv(OUT_CSV, index=False)
            print(f"✅ Built test CSV from class-folders in kagglehub dataset:\n    root={root}\n    rows={len(df_cf)}\n    → {OUT_CSV}")
            return True
        print("kagglehub dataset found but no train.csv or class-folder dataset detected.")
        return False
    except Exception as e:
        print("kagglehub not available or failed:", e)
        return False

def try_build_from_zip_upload():
    """
    Prompt a ZIP upload with folders per class:
      test_images/
        0/*.png ...
        1/*.png ...
    """
    print("⚠️ Upload a ZIP with folders per class (e.g., test_images/0, test_images/1, ...).")
    uploaded = colab_files.upload()
    if not uploaded:
        return False
    zip_name = list(uploaded.keys())[0]
    extract_dir = "/content/uploaded_images"
    os.makedirs(extract_dir, exist_ok=True)
    with zipfile.ZipFile(io.BytesIO(uploaded[zip_name]), 'r') as zf:
        zf.extractall(extract_dir)

    # Build CSV from extracted structure
    rows = []
    for root, dirnames, filenames in os.walk(extract_dir):  # <- renamed to avoid shadowing
        if root == extract_dir:
            continue
        label = os.path.basename(root)
        for fn in filenames:
            if fn.lower().endswith(IMG_EXTS):
                rows.append((os.path.join(root, fn), label))
    if not rows:
        print("ZIP extracted but no images found.")
        return False
    df = pd.DataFrame(rows, columns=["filepath","label"])
    df.to_csv(OUT_CSV, index=False)
    print(f"✅ Built test CSV from uploaded ZIP: {OUT_CSV}  ({len(df)} rows)")
    return True

# ---- main flow
if valid_test_csv(OUT_CSV):
    print(f"✅ Using existing test CSV: {OUT_CSV}")
else:
    if try_build_from_aptos_anywhere():
        pass
    elif try_build_from_kagglehub_or_classfolders():
        pass
    else:
        print("Could not find APTOS in Drive or usable class-folders.")
        ok = try_build_from_zip_upload()
        if not ok:
            raise FileNotFoundError(
                "No usable test set. Re-run this cell and upload a ZIP with folders per class, "
                "or place APTOS train.csv + train_images/ in Drive."
            )

# Preview
df = pd.read_csv(OUT_CSV)
print("Head of test.csv:")
display(df.head())
print("Rows:", len(df))


Downloading from https://www.kaggle.com/api/v1/datasets/download/subhajeetdas/aptos-2019-jpg?dataset_version_number=12...


100%|██████████| 2.82G/2.82G [00:34<00:00, 87.9MB/s]

Extracting files...





✅ Built test CSV from class-folders in kagglehub dataset:
    root=/root/.cache/kagglehub/datasets/subhajeetdas/aptos-2019-jpg/versions/12/APTOS 2019 (Original) (Binary)
    rows=3662
    → /content/drive/MyDrive/DiabeticProject/test.csv
Head of test.csv:


Unnamed: 0,filepath,label
0,/root/.cache/kagglehub/datasets/subhajeetdas/a...,DR
1,/root/.cache/kagglehub/datasets/subhajeetdas/a...,DR
2,/root/.cache/kagglehub/datasets/subhajeetdas/a...,DR
3,/root/.cache/kagglehub/datasets/subhajeetdas/a...,DR
4,/root/.cache/kagglehub/datasets/subhajeetdas/a...,DR


Rows: 3662


In [None]:
# Quantized TFLite evaluator aligned with your parent code (binary DR vs No-DR)
import os, time, glob, numpy as np, pandas as pd
from PIL import Image
from tqdm import tqdm
import tensorflow as tf
from sklearn.metrics import confusion_matrix, accuracy_score

# ===== CONFIG =====
# Your 9 models (from your listing)
MODEL_PATHS = [
    "/content/drive/MyDrive/DiabeticProject/tflite/DenseNet121_model_fp16.tflite",
    "/content/drive/MyDrive/DiabeticProject/tflite/DenseNet121_model_fp32.tflite",
    "/content/drive/MyDrive/DiabeticProject/tflite/DenseNet121_model_int8.tflite",
    "/content/drive/MyDrive/DiabeticProject/tflite/NASNetMobile_model_fp16.tflite",
    "/content/drive/MyDrive/DiabeticProject/tflite/NASNetMobile_model_fp32.tflite",
    "/content/drive/MyDrive/DiabeticProject/tflite/NASNetMobile_model_int8.tflite",
    "/content/drive/MyDrive/DiabeticProject/tflite/ResNet50V2model_fp16.tflite",
    "/content/drive/MyDrive/DiabeticProject/tflite/ResNet50V2model_fp32.tflite",
    "/content/drive/MyDrive/DiabeticProject/tflite/ResNet50V2model_int8.tflite",
]
SAVE_SUMMARY_CSV = "/content/drive/MyDrive/DiabeticProject/tflite_eval_summary.csv"

# Which string in a CSV should count as "DR" if we end up reading strings:
POSITIVE_LABEL_NAME = "DR"

# APTOS 5-class convention for No-DR id:
NO_DR_CLASS_ID_FOR_5CLASS = 0

# For true binary softmax/order [NoDR, DR], which index is DR?
DR_CLASS_INDEX_FOR_BINARY = 1

# speed/controls
WARMUP_RUNS = 5
LIMIT_IMAGES = None  # e.g., 500 for a quick pass; None = all
# ===================

# ---------- helpers ----------
def get_preprocess_from_name(model_path):
    name = os.path.basename(model_path).lower()
    f = lambda x: x / 255.0
    if "resnet50v2" in name or "resnetv2" in name:
        f = tf.keras.applications.resnet_v2.preprocess_input
    elif "nasnetmobile" in name or "nasnet" in name:
        f = tf.keras.applications.nasnet.preprocess_input
    elif "densenet121" in name or "densenet" in name:
        f = tf.keras.applications.densenet.preprocess_input
    return f

def quantize_np(x_float, scale, zero_point, dtype):
    if scale is None or scale == 0: return x_float.astype(dtype)
    q = np.round(x_float / scale + zero_point)
    if np.issubdtype(dtype, np.integer):
        info = np.iinfo(dtype); q = np.clip(q, info.min, info.max)
    return q.astype(dtype)

def dequantize_np(q, scale, zero_point):
    if scale is None or scale == 0: return q.astype(np.float32)
    return scale * (q.astype(np.float32) - zero_point)

def load_and_prep(path, target_hw, preprocess_fn):
    img = Image.open(path).convert("RGB").resize(target_hw, Image.BILINEAR)
    x = np.array(img, dtype=np.float32)
    x = preprocess_fn(x)
    return np.expand_dims(x, 0)

def file_mb(path):
    try:
        return round(os.path.getsize(path)/ (1024*1024), 2)
    except:
        return None

# ---------- load ground truth like your parent pipeline ----------
def load_test_dataframe():
    # 1) Use your in-memory test_df if present with columns (image_path, label)
    g = globals()
    if "test_df" in g and isinstance(g["test_df"], pd.core.frame.DataFrame):
        df = g["test_df"].copy()
        if {"image_path","label"}.issubset(df.columns):
            df = df.rename(columns={"image_path":"filepath"})
            # parent code uses 1 for DR, 0 for No DR
            df["label"] = df["label"].astype(int)
            df["y_true"] = df["label"]
            print(f"Using in-memory test_df (rows={len(df)})")
            return df[["filepath","y_true"]].reset_index(drop=True)

    # 2) Fallback to the test.csv we created earlier (DR/No DR strings)
    csv_path = "/content/drive/MyDrive/DiabeticProject/test.csv"
    if os.path.exists(csv_path):
        df = pd.read_csv(csv_path)
        if {"filepath","label"}.issubset(df.columns):
            lab = df["label"].astype(str).str.strip().str.lower()
            df["y_true"] = (lab == POSITIVE_LABEL_NAME.lower()).astype(int)
            print(f"Using {csv_path} (rows={len(df)})")
            return df[["filepath","y_true"]].reset_index(drop=True)

    raise FileNotFoundError(
        "No test set found. Either keep your parent session variables alive (test_df with image_path,label), "
        "or ensure /content/drive/MyDrive/DiabeticProject/test.csv exists with columns filepath,label."
    )

# ---------- core evaluation ----------
def evaluate_tflite_binary(model_path, df):
    interpreter = tf.lite.Interpreter(model_path=model_path)
    interpreter.allocate_tensors()
    in_det = interpreter.get_input_details()[0]
    out_det = interpreter.get_output_details()[0]

    _, H, W, C = in_det["shape"]
    in_dtype = in_det["dtype"]
    in_scale, in_zp = in_det.get("quantization", (None, None))
    out_dtype = out_det["dtype"]
    out_scale, out_zp = out_det.get("quantization", (None, None))

    preprocess_fn = get_preprocess_from_name(model_path)

    N = len(df) if LIMIT_IMAGES is None else min(LIMIT_IMAGES, len(df))

    # warmup
    dummy = np.zeros((1, H, W, C), dtype=np.float32)
    dummy = preprocess_fn(dummy)
    dummy_q = quantize_np(dummy, in_scale, in_zp, in_dtype) if in_dtype != np.float32 else dummy.astype(in_dtype)
    interpreter.set_tensor(in_det["index"], dummy_q)
    for _ in range(max(0, WARMUP_RUNS)):
        interpreter.invoke(); _ = interpreter.get_tensor(out_det["index"])

    times = []
    y_pred = []
    out_len_first = None

    for i in tqdm(range(N), desc=os.path.basename(model_path)):
        fp = df.iloc[i]["filepath"]
        x = load_and_prep(fp, (W, H), preprocess_fn)
        x = quantize_np(x, in_scale, in_zp, in_dtype) if in_dtype != np.float32 else x.astype(in_dtype)

        interpreter.set_tensor(in_det["index"], x)
        t0 = time.perf_counter(); interpreter.invoke(); t1 = time.perf_counter()
        out = interpreter.get_tensor(out_det["index"])
        out = dequantize_np(out, out_scale, out_zp) if out_dtype != np.float32 else out
        out = np.squeeze(out)

        # Map model output -> binary prediction (DR=1, NoDR=0)
        if out.ndim == 0:
            prob = 1 / (1 + np.exp(-out))
            pred_bin = int(prob >= 0.5)
            out_len_first = out_len_first or 1
        elif out.ndim == 1:
            K = out.shape[0]; out_len_first = out_len_first or K
            if K == 1:
                prob = 1 / (1 + np.exp(-out[0])); pred_bin = int(prob >= 0.5)
            elif K == 2:
                pred_bin = int(np.argmax(out) == DR_CLASS_INDEX_FOR_BINARY)
            else:
                # 5-class APTOS: class 0 is No-DR -> DR if argmax != 0
                pred_bin = int(np.argmax(out) != NO_DR_CLASS_ID_FOR_5CLASS)
        else:
            pred_bin = int(np.argmax(out) != NO_DR_CLASS_ID_FOR_5CLASS)

        y_pred.append(pred_bin)
        times.append((t1 - t0) * 1000.0)

    y_true = df.iloc[:N]["y_true"].to_numpy(dtype=int)
    y_pred = np.array(y_pred, dtype=int)

    tn, fp, fn, tp = confusion_matrix(y_true, y_pred, labels=[0,1]).ravel()
    def safe(a,b): return float(a)/float(b) if b else 0.0
    sensitivity = safe(tp, tp+fn)
    specificity = safe(tn, tn+fp)
    fpr = safe(fp, fp+tn)
    fnr = safe(fn, fn+tp)
    acc = (tp+tn) / (tp+tn+fp+fn)
    avg_ms = float(np.mean(times))

    return {
        "model": os.path.basename(model_path),
        "images": int(N),
        "avg_inference_time_ms": round(avg_ms, 4),
        "accuracy": round(acc, 6),
        "sensitivity": round(sensitivity, 6),
        "specificity": round(specificity, 6),
        "FPR": round(fpr, 6),
        "FNR": round(fnr, 6),
        "tn": int(tn), "fp": int(fp), "fn": int(fn), "tp": int(tp),
        "input_dtype": str(in_dtype),
        "input_shape": [int(d) for d in in_det["shape"]],
        "output_len": out_len_first,
        "tflite_size_mb": file_mb(model_path),
    }

# ---------- run ----------
df_eval = load_test_dataframe()
if LIMIT_IMAGES is not None:
    df_eval = df_eval.sample(n=min(LIMIT_IMAGES, len(df_eval)), random_state=42).reset_index(drop=True)

results = []
for mp in [p for p in MODEL_PATHS if os.path.exists(p)]:
    res = evaluate_tflite_binary(mp, df_eval)
    results.append(res)
    print("\n", res)

cols = ["model","tflite_size_mb","images","avg_inference_time_ms",
        "accuracy","sensitivity","specificity","FPR","FNR",
        "tn","fp","fn","tp","input_dtype","input_shape","output_len"]
pd.DataFrame(results)[cols].to_csv(SAVE_SUMMARY_CSV, index=False)
print(f"\nSaved summary → {SAVE_SUMMARY_CSV}")
pd.DataFrame(results)[cols]


Using /content/drive/MyDrive/DiabeticProject/test.csv (rows=3662)


    TF 2.20. Please use the LiteRT interpreter from the ai_edge_litert package.
    See the [migration guide](https://ai.google.dev/edge/litert/migration)
    for details.
    
DenseNet121_model_fp16.tflite: 100%|██████████| 3662/3662 [13:03<00:00,  4.67it/s]
    TF 2.20. Please use the LiteRT interpreter from the ai_edge_litert package.
    See the [migration guide](https://ai.google.dev/edge/litert/migration)
    for details.
    



 {'model': 'DenseNet121_model_fp16.tflite', 'images': 3662, 'avg_inference_time_ms': 153.8063, 'accuracy': np.float64(0.594211), 'sensitivity': 0.304793, 'specificity': 0.891967, 'FPR': 0.108033, 'FNR': 0.695207, 'tn': 1610, 'fp': 195, 'fn': 1291, 'tp': 566, 'input_dtype': "<class 'numpy.float32'>", 'input_shape': [1, 256, 256, 3], 'output_len': 2, 'tflite_size_mb': 14.85}


DenseNet121_model_fp32.tflite:  41%|████      | 1498/3662 [05:19<07:41,  4.69it/s]


KeyboardInterrupt: 