In [3]:
import os, sys, json, csv, pickle
from pathlib import Path
from typing import List, Dict, Tuple
import collections

import numpy as np
from PIL import Image, ImageDraw, ImageFont
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.applications.efficientnet import preprocess_input

# ----------------------------
# CONFIG (Windows paths)
# ----------------------------
OUTPUT_DIR = Path(r"C:\Users\sagni\Downloads\Docu Verify")
MODEL_H5   = OUTPUT_DIR / "model.h5"
CLASS_PKL  = OUTPUT_DIR / "class_indices.pkl"

# Your Demo folder (recursively scanned)
DEMO_DIR = Path(
    r"C:\Users\sagni\Downloads\Docu Verify\Forged Handwritten Document Database\Forged Handwritten Document Database\Handwritten Forged Document Dataset 2023\Demo"
)

# Inference options
IMG_SIZE   = (256, 256)
TOP_K      = 3
ANNOTATE   = True
ANN_DIR    = OUTPUT_DIR / "annotated_predictions"

# Outputs
JSON_OUT   = OUTPUT_DIR / "predictions.json"
CSV_OUT    = OUTPUT_DIR / "predictions.csv"
SUMMARY_JSON = OUTPUT_DIR / "summary.json"
BAR_PNG    = OUTPUT_DIR / "class_counts.png"

# ----------------------------
# Utilities
# ----------------------------
def ensure_artifacts():
    if not MODEL_H5.exists():
        raise FileNotFoundError(f"Missing model: {MODEL_H5}")
    if not CLASS_PKL.exists():
        raise FileNotFoundError(f"Missing class map: {CLASS_PKL}")
    if not DEMO_DIR.exists():
        raise FileNotFoundError(f"Demo folder not found: {DEMO_DIR}")

def load_class_indices(pkl_path: Path) -> Tuple[Dict[str,int], Dict[int,str], List[str]]:
    with open(pkl_path, "rb") as f:
        class_indices: Dict[str,int] = pickle.load(f)  # {'ClassName': index, ...}
    idx_to_class = {v: k for k, v in class_indices.items()}
    ordered_classes = [idx_to_class[i] for i in range(len(idx_to_class))]
    return class_indices, idx_to_class, ordered_classes

def list_images(path: Path) -> List[Path]:
    exts = {".jpg",".jpeg",".png",".bmp",".tif",".tiff",".webp"}
    if path.is_file():
        return [path] if path.suffix.lower() in exts else []
    return sorted([p for p in path.rglob("*") if p.suffix.lower() in exts])

def load_tensor(img_path: Path) -> np.ndarray:
    img = Image.open(img_path).convert("RGB").resize(IMG_SIZE)
    arr = np.array(img).astype(np.float32)
    arr = preprocess_input(arr)
    return np.expand_dims(arr, axis=0)  # (1,H,W,3)

def annotate_image(img_path: Path, label_text: str, out_path: Path):
    img = Image.open(img_path).convert("RGB")
    draw = ImageDraw.Draw(img)
    try:
        font = ImageFont.truetype("arial.ttf", 24)
    except:
        font = ImageFont.load_default()

    margin = 8
    try:
        bbox = draw.textbbox((0,0), label_text, font=font)
        tw, th = bbox[2]-bbox[0], bbox[3]-bbox[1]
    except:
        tw = int(draw.textlength(label_text, font=font)); th = 24

    bw, bh = tw + 2*margin, th + 2*margin
    draw.rectangle([(10,10),(10+bw,10+bh)], fill=(0,0,0,180))
    draw.text((10+margin,10+margin), label_text, font=font, fill=(255,255,255))

    out_path.parent.mkdir(parents=True, exist_ok=True)
    img.save(out_path)

def plot_counts_bar(class_counts: Dict[str,int], out_png: Path):
    labels = list(class_counts.keys())
    values = [class_counts[k] for k in labels]
    plt.figure(figsize=(8,4))
    plt.bar(labels, values)
    plt.xticks(rotation=45, ha="right")
    plt.ylabel("# Images")
    plt.title("Predicted Class Counts")
    plt.tight_layout()
    plt.savefig(out_png, dpi=200)
    plt.close()

def infer_ground_truth_from_subfolders(files: List[Path], known_classes: List[str]) -> Dict[str, str]:

    lowercase_classes = {c.lower(): c for c in known_classes}
    gt = {}
    for f in files:
        # parent name as possible class
        parent_name = f.parent.name.lower()
        gt[f.as_posix()] = lowercase_classes.get(parent_name, "")
    return gt

# ----------------------------
# Main
# ----------------------------
def main():
    ensure_artifacts()

    print("[INFO] Loading model and classes…")
    model = load_model(str(MODEL_H5))
    class_indices, idx_to_class, ordered_classes = load_class_indices(CLASS_PKL)
    num_classes = len(ordered_classes)
    k = min(TOP_K, num_classes)
    print("[INFO] Classes:", ordered_classes)

    print(f"[INFO] Scanning Demo folder: {DEMO_DIR}")
    files = list_images(DEMO_DIR)
    if not files:
        raise FileNotFoundError(f"No images found under: {DEMO_DIR}")
    print(f"[INFO] Found {len(files)} image(s)")

    # Optional: try to infer ground-truth from subfolder names for a quick accuracy
    inferred_gt = infer_ground_truth_from_subfolders(files, ordered_classes)

    results = []
    class_counts = collections.Counter()
    correct = total = 0

    for i, img_path in enumerate(files, start=1):
        arr = load_tensor(img_path)
        probs = model.predict(arr, verbose=0)[0]  # (C,)

        top_idx = np.argsort(probs)[::-1][:k]
        top_classes = [idx_to_class[int(t)] for t in top_idx]
        top_scores  = [float(probs[int(t)]) for t in top_idx]
        pred_class, pred_conf = top_classes[0], top_scores[0]

        class_counts[pred_class] += 1

        # Quick top-1 accuracy if we could infer GT
        gt = inferred_gt.get(img_path.as_posix(), "")
        if gt:
            total += 1
            if gt == pred_class:
                correct += 1

        row = {
            "file": str(img_path),
            "pred_class": pred_class,
            "confidence": round(pred_conf, 6),
            "topk": [{"class": c, "p": round(s,6)} for c, s in zip(top_classes, top_scores)],
            "inferred_ground_truth": gt
        }
        # also flatten top-k columns for CSV convenience
        for j, (c,s) in enumerate(zip(top_classes, top_scores), start=1):
            row[f"top{j}_class"] = c
            row[f"top{j}_p"] = round(s, 6)
        results.append(row)

        if ANNOTATE:
            try:
                label = f"{pred_class} ({pred_conf*100:.1f}%)"
                out_img = ANN_DIR / f"{img_path.stem}_pred.png"
                annotate_image(img_path, label, out_img)
            except Exception as e:
                print(f"[WARN] Annotate failed for {img_path.name}: {e}")

        print(f"[{i}/{len(files)}] {img_path.name} → {pred_class} ({pred_conf*100:.2f}%)")

    # Save JSON
    with open(JSON_OUT, "w", encoding="utf-8") as f:
        json.dump(results, f, indent=2)
    print(f"[INFO] Saved JSON → {JSON_OUT}")

    # Save CSV
    fieldnames = list(results[0].keys())
    with open(CSV_OUT, "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        for r in results:
            writer.writerow(r)
    print(f"[INFO] Saved CSV → {CSV_OUT}")

    # Save class counts bar chart
    plot_counts_bar(dict(class_counts), BAR_PNG)
    print(f"[INFO] Saved class count bar chart → {BAR_PNG}")

    # Save summary
    summary = {
        "demo_dir": str(DEMO_DIR),
        "num_images": len(files),
        "class_counts": dict(class_counts),
        "annotated_dir": str(ANN_DIR) if ANNOTATE else "",
    }
    if total > 0:
        summary["top1_accuracy_inferred"] = round(correct / total, 6)
        summary["num_with_inferred_gt"] = total

    with open(SUMMARY_JSON, "w", encoding="utf-8") as f:
        json.dump(summary, f, indent=2)
    print(f"[INFO] Saved summary → {SUMMARY_JSON}")

    if ANNOTATE:
        print(f"[INFO] Annotated images saved in → {ANN_DIR}")

    print("\n[DONE] Prediction complete.")

if __name__ == "__main__":
    main()


[INFO] Loading model and classes…




[INFO] Classes: ['Blur', 'Copy Paste', 'CopyPaste+Blur', 'CopyPaste+Insertion', 'CopyPaste+Noise', 'Insertion', 'Insertion+Blur', 'Insertion+Noise', 'Noise', 'Normal']
[INFO] Scanning Demo folder: C:\Users\sagni\Downloads\Docu Verify\Forged Handwritten Document Database\Forged Handwritten Document Database\Handwritten Forged Document Dataset 2023\Demo
[INFO] Found 33 image(s)
[1/33] InsNoise(10).jpg → Insertion+Blur (18.34%)
[2/33] InsNoise(11).jpg → Insertion+Blur (18.26%)
[3/33] InsNoise(12).jpg → Insertion+Blur (18.61%)
[4/33] InsNoise(13).jpg → Insertion+Blur (17.41%)
[5/33] InsNoise(16).jpg → Insertion+Blur (18.02%)
[6/33] InsNoise(17).jpg → Insertion+Blur (15.08%)
[7/33] InsNoise(19).jpg → Insertion+Blur (16.07%)
[8/33] InsNoise(4).jpg → Insertion+Blur (14.93%)
[9/33] InsNoise(5).jpg → Insertion+Blur (14.61%)
[10/33] InsNoise(8).jpg → Insertion+Blur (17.29%)
[11/33] Noise(10).jpg → Insertion+Blur (18.40%)
[12/33] Noise(11).jpg → Insertion+Blur (18.39%)
[13/33] Noise(13).jpg → Ins