In [1]:
# JUPYTER-FRIENDLY INFERENCE: predict on single image or folder, save CSV results
# Requires: tensorflow>=2.10, numpy, pandas, pillow, scikit-learn (for loading the label encoder)

import os, json, pickle
import numpy as np
import pandas as pd
from pathlib import Path
from typing import List, Tuple, Optional

import tensorflow as tf
from PIL import Image

# ================== CONFIG ==================
BASE_DIR = Path(r"C:\Users\NXTWAVE\Downloads\COVID Radiography Detection")
ARTIFACTS = BASE_DIR / "artifacts"
MODEL_PATH = ARTIFACTS / "model.h5"
LE_PATH    = ARTIFACTS / "label_encoder.pkl"
CLS_JSON   = ARTIFACTS / "class_indices.json"
OUT_CSV    = ARTIFACTS / "predictions_infer.csv"

IMG_SIZE = 224
BATCH_SIZE = 32

# Set this to either a single image path OR a folder path (it will scan images recursively)
# Examples:
# INPUT_PATH = r"C:\Users\NXTWAVE\Downloads\COVID Radiography Detection\archive\COVID-19_Radiography_Dataset\Normal\images\Normal-1.png"
# INPUT_PATH = r"C:\Users\NXTWAVE\Downloads\COVID Radiography Detection\archive\COVID-19_Radiography_Dataset\COVID\images"
INPUT_PATH = r"C:\Users\NXTWAVE\Downloads\COVID Radiography Detection\archive\COVID-19_Radiography_Dataset\Normal\images"

# ================== HELPERS ==================
ALLOWED_EXTS = {".jpg", ".jpeg", ".png", ".bmp"}

def list_images(path: Path) -> List[Path]:
    path = Path(path)
    if path.is_file():
        return [path]
    files = []
    for p in path.rglob("*"):
        if p.suffix.lower() in ALLOWED_EXTS:
            files.append(p)
    files.sort()
    return files

def decode_image_tf(path: tf.Tensor) -> tf.Tensor:
    """Read/resize/normalize an image to (H,W,3) float32 with ImageNet mean/std."""
    img = tf.io.read_file(path)
    img = tf.io.decode_image(img, channels=3, expand_animations=False)
    img = tf.image.convert_image_dtype(img, tf.float32)  # [0,1]
    img = tf.image.resize(img, (IMG_SIZE, IMG_SIZE))
    # Normalize with ImageNet mean/std (same as training)
    mean = tf.constant([0.485, 0.456, 0.406], dtype=tf.float32)
    std  = tf.constant([0.229, 0.224, 0.225], dtype=tf.float32)
    img = (img - mean) / std
    return img

def make_infer_dataset(paths: List[str], batch_size: int = 32) -> tf.data.Dataset:
    ds = tf.data.Dataset.from_tensor_slices(paths)
    ds = ds.map(lambda p: decode_image_tf(p), num_parallel_calls=tf.data.AUTOTUNE)
    ds = ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return ds

def load_class_names() -> List[str]:
    """Prefer label_encoder.pkl; fallback to class_indices.json ordering."""
    if LE_PATH.exists():
        with open(LE_PATH, "rb") as f:
            le = pickle.load(f)
        return list(le.classes_)
    elif CLS_JSON.exists():
        with open(CLS_JSON, "r") as f:
            m = json.load(f)
        # class_indices.json is {class_name: index}; we need names sorted by index
        inv = sorted([(idx, name) for name, idx in m.items()], key=lambda x: x[0])
        return [name for idx, name in inv]
    else:
        raise FileNotFoundError("Neither label_encoder.pkl nor class_indices.json found in artifacts.")

def triage_from_probs(class_names: List[str], probs: np.ndarray, normal_key: str = "Normal",
                      threshold: float = 0.5) -> Tuple[str, float]:
    """Return triage label (Abnormal/Likely Normal) and abnormal probability (1 - p(Normal))."""
    if normal_key in class_names:
        p_normal = float(probs[class_names.index(normal_key)])
        p_abnormal = 1.0 - p_normal
    else:
        # If no explicit 'Normal' class, treat 'not top1==Normal' as abnormal-ish score
        p_abnormal = float(1.0 - probs.max())
    label = "Abnormal" if p_abnormal >= threshold else "Likely Normal"
    return label, p_abnormal

# ================== LOAD MODEL & CLASSES ==================
assert MODEL_PATH.exists(), f"Model file not found: {MODEL_PATH}"
class_names = load_class_names()
num_classes = len(class_names)
print(f"[INFO] Loaded classes ({num_classes}): {class_names}")

model = tf.keras.models.load_model(MODEL_PATH, compile=False)
print("[INFO] Model loaded.")

# ================== COLLECT INPUTS ==================
paths = list_images(Path(INPUT_PATH))
assert len(paths) > 0, f"No images found at: {INPUT_PATH}"
print(f"[INFO] Found {len(paths)} image(s) for inference.")

# ================== INFERENCE ==================
str_paths = [str(p) for p in paths]
ds = make_infer_dataset(str_paths, batch_size=BATCH_SIZE)
probs = model.predict(ds, verbose=1)  # shape (N, C), softmax outputs

# ================== BUILD RESULTS TABLE ==================
pred_idx = probs.argmax(axis=1)
pred_label = [class_names[i] for i in pred_idx]
pred_score = probs.max(axis=1)

triage_label, triage_prob = [], []
for pi in probs:
    t_label, t_prob = triage_from_probs(class_names, pi, normal_key="Normal", threshold=0.5)
    triage_label.append(t_label)
    triage_prob.append(t_prob)

rows = []
for p, lab, score, pi in zip(str_paths, pred_label, pred_score, probs):
    row = {
        "path": p,
        "pred_label": lab,
        "pred_score": float(score),
        "triage_label": triage_label.pop(0),
        "triage_prob_abnormal": float(triage_prob.pop(0)),
    }
    # add per-class probabilities
    for i, cname in enumerate(class_names):
        row[f"prob_{cname}"] = float(pi[i])
    rows.append(row)

df = pd.DataFrame(rows)
OUT_CSV.parent.mkdir(parents=True, exist_ok=True)
df.to_csv(OUT_CSV, index=False, encoding="utf-8")
print(f"\n[SAVED] Predictions CSV: {OUT_CSV}")

# ================== SHOW A FEW RESULTS ==================
# Show top-5 rows for quick inspection
display(df.head(5))

# Also print concise per-file summary
for i in range(min(5, len(df))):
    print(f"{df.loc[i, 'path']}")
    print(f"  → Pred: {df.loc[i, 'pred_label']} (score={df.loc[i, 'pred_score']:.3f}) | Triage: {df.loc[i, 'triage_label']} [{df.loc[i, 'triage_prob_abnormal']:.3f}]")



[INFO] Loaded classes (4): ['COVID', 'Lung_Opacity', 'Normal', 'Viral Pneumonia']


[INFO] Model loaded.
[INFO] Found 10192 image(s) for inference.

[SAVED] Predictions CSV: C:\Users\NXTWAVE\Downloads\COVID Radiography Detection\artifacts\predictions_infer.csv


Unnamed: 0,path,pred_label,pred_score,triage_label,triage_prob_abnormal,prob_COVID,prob_Lung_Opacity,prob_Normal,prob_Viral Pneumonia
0,C:\Users\NXTWAVE\Downloads\COVID Radiography D...,Normal,0.97368,Likely Normal,0.02632,0.008494,0.017781,0.97368,4.4e-05
1,C:\Users\NXTWAVE\Downloads\COVID Radiography D...,Normal,0.965339,Likely Normal,0.034661,0.023854,0.010759,0.965339,4.8e-05
2,C:\Users\NXTWAVE\Downloads\COVID Radiography D...,Normal,0.932012,Likely Normal,0.067988,0.00951,0.005811,0.932012,0.052666
3,C:\Users\NXTWAVE\Downloads\COVID Radiography D...,Normal,0.936965,Likely Normal,0.063035,0.008048,0.008878,0.936965,0.046109
4,C:\Users\NXTWAVE\Downloads\COVID Radiography D...,Normal,0.801057,Likely Normal,0.198943,0.151612,0.04684,0.801057,0.000491


C:\Users\NXTWAVE\Downloads\COVID Radiography Detection\archive\COVID-19_Radiography_Dataset\Normal\images\Normal-1.png
  → Pred: Normal (score=0.974) | Triage: Likely Normal [0.026]
C:\Users\NXTWAVE\Downloads\COVID Radiography Detection\archive\COVID-19_Radiography_Dataset\Normal\images\Normal-10.png
  → Pred: Normal (score=0.965) | Triage: Likely Normal [0.035]
C:\Users\NXTWAVE\Downloads\COVID Radiography Detection\archive\COVID-19_Radiography_Dataset\Normal\images\Normal-100.png
  → Pred: Normal (score=0.932) | Triage: Likely Normal [0.068]
C:\Users\NXTWAVE\Downloads\COVID Radiography Detection\archive\COVID-19_Radiography_Dataset\Normal\images\Normal-1000.png
  → Pred: Normal (score=0.937) | Triage: Likely Normal [0.063]
C:\Users\NXTWAVE\Downloads\COVID Radiography Detection\archive\COVID-19_Radiography_Dataset\Normal\images\Normal-10000.png
  → Pred: Normal (score=0.801) | Triage: Likely Normal [0.199]
