In [1]:
import os, json, csv, shutil, pickle
from pathlib import Path
from typing import List, Dict, Tuple
from collections import Counter, defaultdict

import numpy as np
import pandas as pd
from PIL import Image, ImageDraw, ImageFont

import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.applications.efficientnet import preprocess_input

# ----------------------------
# CONFIG (Windows paths)
# ----------------------------
OUTPUT_DIR = Path(r"C:\Users\sagni\Downloads\Docu Verify")
MODEL_H5   = OUTPUT_DIR / "model.h5"
CLASS_PKL  = OUTPUT_DIR / "class_indices.pkl"

DEMO_DIR = Path(
    r"C:\Users\sagni\Downloads\Docu Verify\Forged Handwritten Document Database\Forged Handwritten Document Database\Handwritten Forged Document Dataset 2023\Demo"
)

IMG_SIZE   = (256, 256)
TOP_K      = 3
ANNOTATE   = True          # draw predicted label + confidence on images
COPY_MISTAKES = True       # copy misclassified images for quick review
ANN_DIR    = OUTPUT_DIR / "annotated_demo"
MISTAKE_DIR= OUTPUT_DIR / "mistakes_demo"

CSV_OUT    = OUTPUT_DIR / "demo_predictions.csv"
JSON_OUT   = OUTPUT_DIR / "demo_predictions.json"
SUMMARY_OUT= OUTPUT_DIR / "demo_summary.json"

# ----------------------------
# Helpers
# ----------------------------
def ensure_paths():
    if not MODEL_H5.exists():
        raise FileNotFoundError(f"Missing model: {MODEL_H5}")
    if not CLASS_PKL.exists():
        raise FileNotFoundError(f"Missing class map: {CLASS_PKL}")
    if not DEMO_DIR.exists():
        raise FileNotFoundError(f"Demo folder not found: {DEMO_DIR}")

def load_class_indices(pkl_path: Path) -> Tuple[Dict[str,int], Dict[int,str], List[str]]:
    with open(pkl_path, "rb") as f:
        class_indices: Dict[str,int] = pickle.load(f)
    idx_to_class = {v: k for k, v in class_indices.items()}
    ordered = [idx_to_class[i] for i in range(len(idx_to_class))]
    return class_indices, idx_to_class, ordered

def list_images(path: Path) -> List[Path]:
    exts = {".jpg",".jpeg",".png",".bmp",".tif",".tiff",".webp"}
    if path.is_file():
        return [path] if path.suffix.lower() in exts else []
    return sorted([p for p in path.rglob("*") if p.suffix.lower() in exts])

def load_tensor(img_path: Path) -> np.ndarray:
    img = Image.open(img_path).convert("RGB").resize(IMG_SIZE)
    arr = np.array(img).astype(np.float32)
    arr = preprocess_input(arr)
    return np.expand_dims(arr, axis=0)  # (1,H,W,3)

def annotate(img_path: Path, text: str, out_path: Path):
    img = Image.open(img_path).convert("RGB")
    draw = ImageDraw.Draw(img)
    try:
        font = ImageFont.truetype("arial.ttf", 24)
    except:
        font = ImageFont.load_default()
    margin = 8
    try:
        bbox = draw.textbbox((0,0), text, font=font)
        tw, th = bbox[2]-bbox[0], bbox[3]-bbox[1]
    except:
        tw, th = int(draw.textlength(text, font=font)), 24
    bw, bh = tw + 2*margin, th + 2*margin
    draw.rectangle([(10,10),(10+bw,10+bh)], fill=(0,0,0,180))
    draw.text((10+margin,10+margin), text, font=font, fill=(255,255,255))
    out_path.parent.mkdir(parents=True, exist_ok=True)
    img.save(out_path)

def infer_truth_from_parent(img_path: Path, known_classes: List[str]) -> str:
    """
    If Demo images are stored like Demo\<TrueLabel>\image.jpg,
    we use the immediate parent folder name as the ground truth.
    If parent name doesn't match known classes, return "".
    """
    parent = img_path.parent.name
    # match exact first, then case-insensitive fallback
    if parent in known_classes:
        return parent
    for c in known_classes:
        if c.lower() == parent.lower():
            return c
    return ""

# ----------------------------
# Main
# ----------------------------
def main():
    ensure_paths()

    print("[INFO] Loading model and class map…")
    model = load_model(str(MODEL_H5))
    class_indices, idx_to_class, ordered_classes = load_class_indices(CLASS_PKL)
    num_classes = len(ordered_classes)
    k = min(TOP_K, num_classes)
    print("[INFO] Classes:", ordered_classes)

    print(f"[INFO] Scanning Demo folder: {DEMO_DIR}")
    files = list_images(DEMO_DIR)
    if not files:
        raise FileNotFoundError(f"No images found under: {DEMO_DIR}")
    print(f"[INFO] Found {len(files)} image(s)")

    rows = []
    per_class_totals   = Counter()
    per_class_corrects = Counter()
    overall_correct = 0
    overall_total   = 0

    for i, img_path in enumerate(files, start=1):
        x = load_tensor(img_path)
        probs = model.predict(x, verbose=0)[0]  # (C,)

        top_idx = np.argsort(probs)[::-1][:k]
        top_classes = [idx_to_class[int(t)] for t in top_idx]
        top_scores  = [float(probs[int(t)]) for t in top_idx]
        pred_class, pred_conf = top_classes[0], top_scores[0]

        # infer ground truth from parent folder
        true_label = infer_truth_from_parent(img_path, ordered_classes)

        correct = None
        if true_label != "":
            correct = (pred_class == true_label)
            overall_total += 1
            if correct:
                overall_correct += 1
                per_class_corrects[true_label] += 1
            per_class_totals[true_label] += 1

        row = {
            "file": str(img_path),
            "true_label": true_label,
            "pred_class": pred_class,
            "confidence": round(pred_conf, 6),
            "correct": (None if correct is None else bool(correct))
        }
        for j, (c, s) in enumerate(zip(top_classes, top_scores), start=1):
            row[f"top{j}_class"] = c
            row[f"top{j}_p"] = round(s, 6)

        rows.append(row)

        # annotate
        if ANNOTATE:
            tag = f"{pred_class} ({pred_conf*100:.1f}%)"
            if true_label:
                tag += f" | GT: {true_label} | {'✓' if correct else '✗'}"
            out_img = ANN_DIR / f"{img_path.stem}_pred.png"
            try:
                annotate(img_path, tag, out_img)
            except Exception as e:
                print(f"[WARN] Annotate failed for {img_path.name}: {e}")

        # copy mistakes for quick review
        if COPY_MISTAKES and correct is False and true_label:
            dst_dir = MISTAKE_DIR / true_label
            dst_dir.mkdir(parents=True, exist_ok=True)
            try:
                shutil.copy2(img_path, dst_dir / img_path.name)
            except Exception as e:
                print(f"[WARN] Could not copy mistake {img_path.name}: {e}")

        # console preview
        if true_label:
            print(f"[{i}/{len(files)}] {img_path.name} → pred={pred_class} ({pred_conf*100:.1f}%) | GT={true_label} | {'CORRECT' if correct else 'WRONG'}")
        else:
            print(f"[{i}/{len(files)}] {img_path.name} → pred={pred_class} ({pred_conf*100:.1f}%) | GT=UNKNOWN")

    # --- Save CSV/JSON
    df = pd.DataFrame(rows)
    df.to_csv(CSV_OUT, index=False, encoding="utf-8")
    with open(JSON_OUT, "w", encoding="utf-8") as f:
        json.dump(rows, f, indent=2)
    print(f"[INFO] Saved predictions CSV → {CSV_OUT}")
    print(f"[INFO] Saved predictions JSON → {JSON_OUT}")

    # --- Summary with accuracy (if GT available)
    summary = {
        "demo_dir": str(DEMO_DIR),
        "num_images": len(rows),
        "top_k": k,
        "annotated_dir": str(ANN_DIR) if ANNOTATE else "",
        "mistakes_dir": str(MISTAKE_DIR) if COPY_MISTAKES else "",
        "per_class_counts": df["pred_class"].value_counts().to_dict(),
    }

    if overall_total > 0:
        overall_acc = overall_correct / overall_total
        per_class_acc = {c: (per_class_corrects[c] / per_class_totals[c]) if per_class_totals[c] > 0 else None
                         for c in ordered_classes}
        summary.update({
            "gt_images": overall_total,
            "overall_top1_accuracy": round(overall_acc, 6),
            "per_class_accuracy": {k: (None if v is None else round(v, 6)) for k, v in per_class_acc.items()}
        })
    else:
        summary.update({"gt_images": 0, "overall_top1_accuracy": None, "per_class_accuracy": {c: None for c in ordered_classes}})

    with open(SUMMARY_OUT, "w", encoding="utf-8") as f:
        json.dump(summary, f, indent=2)
    print(f"[INFO] Saved summary → {SUMMARY_OUT}")

    print("\n=== DONE ===")
    if ANNOTATE:
        print(f"Annotated images → {ANN_DIR}")
    if COPY_MISTAKES:
        print(f"Misclassified copies → {MISTAKE_DIR}")

if __name__ == "__main__":
    main()


[INFO] Loading model and class map…




[INFO] Classes: ['Blur', 'Copy Paste', 'CopyPaste+Blur', 'CopyPaste+Insertion', 'CopyPaste+Noise', 'Insertion', 'Insertion+Blur', 'Insertion+Noise', 'Noise', 'Normal']
[INFO] Scanning Demo folder: C:\Users\sagni\Downloads\Docu Verify\Forged Handwritten Document Database\Forged Handwritten Document Database\Handwritten Forged Document Dataset 2023\Demo
[INFO] Found 33 image(s)
[1/33] InsNoise(10).jpg → pred=Insertion+Blur (18.3%) | GT=UNKNOWN
[2/33] InsNoise(11).jpg → pred=Insertion+Blur (18.3%) | GT=UNKNOWN
[3/33] InsNoise(12).jpg → pred=Insertion+Blur (18.6%) | GT=UNKNOWN
[4/33] InsNoise(13).jpg → pred=Insertion+Blur (17.4%) | GT=UNKNOWN
[5/33] InsNoise(16).jpg → pred=Insertion+Blur (18.0%) | GT=UNKNOWN
[6/33] InsNoise(17).jpg → pred=Insertion+Blur (15.1%) | GT=UNKNOWN
[7/33] InsNoise(19).jpg → pred=Insertion+Blur (16.1%) | GT=UNKNOWN
[8/33] InsNoise(4).jpg → pred=Insertion+Blur (14.9%) | GT=UNKNOWN
[9/33] InsNoise(5).jpg → pred=Insertion+Blur (14.6%) | GT=UNKNOWN
[10/33] InsNoise(8).