In [None]:
# ======================================================================
# Semantic Segmentation Polygon Annotation Analysis Notebook
# No image loading or visualization (image-free version)
# Author: Karan Heera
# ======================================================================

import json
import os
import numpy as np
from collections import defaultdict
import pandas as pd

# -------------------------------------------------------------
# Configuration
# -------------------------------------------------------------
ANNOTATION_PATH = "../annotations/annotations.json"   # Adjust if needed
OUTPUT_DIR = "../results"
os.makedirs(OUTPUT_DIR, exist_ok=True)

print("Loading annotations from:", ANNOTATION_PATH)

# -------------------------------------------------------------
# Load COCO-style segmentation data
# -------------------------------------------------------------
with open(ANNOTATION_PATH, "r") as f:
    coco = json.load(f)

categories = {c["id"]: c for c in coco["categories"]}
images = {img["id"]: img for img in coco["images"]}
annotations = coco["annotations"]

print(f"Loaded {len(images)} images")
print(f"Loaded {len(categories)} categories")
print(f"Loaded {len(annotations)} annotations")

# -------------------------------------------------------------
# Annotation Statistics
# -------------------------------------------------------------
cat_count = defaultdict(int)
image_count = defaultdict(int)
areas = []

for ann in annotations:
    cat_count[ann["category_id"]] += 1
    image_count[ann["image_id"]] += 1
    areas.append(ann.get("area", 0))

stats_file = os.path.join(OUTPUT_DIR, "annotations_stats.txt")

with open(stats_file, "w") as f:
    f.write("=== Semantic Segmentation Annotation Statistics ===\n\n")
    f.write(f"Total Images: {len(images)}\n")
    f.write(f"Total Annotations: {len(annotations)}\n\n")

    f.write("Annotations Per Category:\n")
    for cid, count in cat_count.items():
        cat_name = categories[cid]["name"]
        f.write(f"- {cat_name}: {count}\n")

    f.write("\nAnnotations Per Image:\n")
    for img_id, count in image_count.items():
        f.write(f"- Image {img_id}: {count}\n")

    f.write("\nArea Statistics:\n")
    if len(areas) > 0:
        f.write(f"- Min: {np.min(areas):.2f}\n")
        f.write(f"- Max: {np.max(areas):.2f}\n")
        f.write(f"- Mean: {np.mean(areas):.2f}\n")
    else:
        f.write("No area information available.\n")

print("Saved:", stats_file)


# -------------------------------------------------------------
# Validation (Structural & Logical)
# -------------------------------------------------------------
validation_msgs = []

# 1. Missing images (no actual files loaded, only check metadata)
missing_image_names = [img for img in images.values() if not img.get("file_name")]
if missing_image_names:
    validation_msgs.append(f"Images with missing file_name fields: {len(missing_image_names)}")
else:
    validation_msgs.append("All images have file_name fields.")

# 2. Polygon validity: length, coordinates
invalid_polygons = []

for ann in annotations:
    segs = ann.get("segmentation", [])
    if not segs:
        invalid_polygons.append(f"Annotation {ann['id']} has no segmentation data.")
        continue

    for seg in segs:
        if len(seg) < 6:
            invalid_polygons.append(f"Annotation {ann['id']} has too few points (<3).")
        if len(seg) % 2 != 0:
            invalid_polygons.append(f"Annotation {ann['id']} has odd number of coordinates.")

if invalid_polygons:
    validation_msgs.append("Invalid polygons detected:")
    validation_msgs.extend(invalid_polygons)
else:
    validation_msgs.append("All polygons valid.")

# 3. Area check
zero_area = [ann["id"] for ann in annotations if ann.get("area", 0) <= 0]
if zero_area:
    validation_msgs.append("Annotations with zero or negative area: " + ", ".join(map(str, zero_area)))
else:
    validation_msgs.append("All annotation areas > 0.")

# Save validation summary
eval_file = os.path.join(OUTPUT_DIR, "eval_summary.txt")
with open(eval_file, "w") as f:
    f.write("=== Dataset Validation Summary ===\n\n")
    for msg in validation_msgs:
        f.write(msg + "\n\n")

print("Saved:", eval_file)

# -------------------------------------------------------------
# Markdown Report (includes output.png graph)
# -------------------------------------------------------------
report_file = os.path.join(OUTPUT_DIR, "report.md")

with open(report_file, "w") as f:
    f.write("# Semantic Segmentation Annotation Report\n\n")

    f.write("## Summary\n")
    f.write(f"- Total Images: **{len(images)}**\n")
    f.write(f"- Total Annotations: **{len(annotations)}**\n")
    f.write(f"- Total Categories: **{len(categories)}**\n\n")

    f.write("## Category Distribution\n")
    f.write("```\n")
    for cid, count in cat_count.items():
        f.write(f"{categories[cid]['name']}: {count}\n")
    f.write("```\n\n")

    f.write("## Area Statistics\n")
    if len(areas) > 0:
        f.write(f"- Min: **{np.min(areas):.2f}**\n")
        f.write(f"- Max: **{np.max(areas):.2f}**\n")
        f.write(f"- Mean: **{np.mean(areas):.2f}**\n")
    else:
        f.write("No area information.\n")

    f.write("\n## Dataset Validation\n")
    f.write("See `eval_summary.txt` for detailed validation errors.\n\n")

    f.write("## Visualization\n")
    f.write("A summary plot has been generated from the annotation data.\n\n")
    f.write("### ðŸ“Š Annotation Summary Plot\n")
    f.write("Below is the combined graph showing:\n")
    f.write("- Annotation count per category\n")
    f.write("- Annotation count per image ID\n\n")
    f.write("![Annotation Summary](output.png)\n\n")

   
print("Saved:", report_file)




# -------------------------------------------------------------
# Generate Plots
# -------------------------------------------------------------
import matplotlib.pyplot as plt

plot_path = os.path.join(OUTPUT_DIR, "output.png")

# --- Prepare data ---
cat_names = [categories[cid]["name"] for cid in cat_count.keys()]
cat_values = [cat_count[cid] for cid in cat_count.keys()]

image_ids = list(image_count.keys())
image_values = list(image_count.values())

# --- Create figure ---
plt.figure(figsize=(14, 7))

# ----------- Plot 1: Category Distribution ----------
plt.subplot(1, 2, 1)
plt.barh(cat_names, cat_values, color="skyblue")
plt.title("Annotations per Category")
plt.xlabel("Count")
plt.tight_layout()

# ----------- Plot 2: Image-wise Annotation Count ----------
plt.subplot(1, 2, 2)
plt.bar(image_ids, image_values, color="lightgreen")
plt.title("Annotations per Image ID")
plt.xlabel("Image ID")
plt.ylabel("Count")
plt.tight_layout()

plt.savefig(plot_path, dpi=200)
plt.close()

print("Saved plot:", plot_path)

