In [1]:
# ======================================================================
# Semantic Segmentation MASK Annotation Analysis Notebook
# For CVAT RLE / Mask Brush Tool (COCO JSON)
# No model training, no image visualization
# Author: Karan Heera
# ======================================================================

import json
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from collections import defaultdict

# -------------------------------------------------------------
# Configuration
# -------------------------------------------------------------
ANNOTATION_PATH = "../annotations/annotations.json"   # Adjust path if needed
OUTPUT_DIR = "../results"
os.makedirs(OUTPUT_DIR, exist_ok=True)

print("Loading annotations from:", ANNOTATION_PATH)

# -------------------------------------------------------------
# Load COCO-style data
# -------------------------------------------------------------
with open(ANNOTATION_PATH, "r") as f:
    coco = json.load(f)

categories = {c["id"]: c["name"] for c in coco["categories"]}
images = {img["id"]: img for img in coco["images"]}
annotations = coco["annotations"]

print(f"Loaded {len(images)} images")
print(f"Loaded {len(categories)} categories")
print(f"Loaded {len(annotations)} annotations")

# -------------------------------------------------------------
# Helper: Decode RLE mask to area
# -------------------------------------------------------------
def rle_area(rle):
    """
    Compute area from RLE counts list.
    COCO RLE format: [start, length, start, length, ...]
    """
    if isinstance(rle, dict) and "counts" in rle:
        # CVAT uses uncompressed RLE (list)
        counts = rle["counts"]
        return sum(counts[1::2])  # sum of lengths
    return 0

# -------------------------------------------------------------
# Build statistics
# -------------------------------------------------------------
image_ann_count = defaultdict(int)
category_ann_count = defaultdict(int)
zero_area_annotations = []
invalid_annotations = []
ann_area_list = []

for ann in annotations:
    image_id = ann["image_id"]
    cat_id = ann["category_id"]
    seg = ann["segmentation"]

    image_ann_count[image_id] += 1
    category_ann_count[cat_id] += 1

    # Determine mask area
    if isinstance(seg, dict) and "counts" in seg:
        area = rle_area(seg)
    else:
        # Fallback for polygon lists
        try:
            # polygon area estimation
            area = ann.get("area", 0)
        except:
            area = 0

    ann_area_list.append(area)

    if area == 0:
        zero_area_annotations.append(ann["id"])

    if "segmentation" not in ann:
        invalid_annotations.append(ann["id"])

# -------------------------------------------------------------
# Save annotation statistics
# -------------------------------------------------------------
stats_path = os.path.join(OUTPUT_DIR, "annotations_stats.txt")
with open(stats_path, "w") as f:
    f.write("=== Annotation Statistics ===\n\n")
    f.write(f"Total Images: {len(images)}\n")
    f.write(f"Total Annotations: {len(annotations)}\n")
    f.write(f"Total Categories: {len(categories)}\n\n")

    f.write("--- Annotations per Image ---\n")
    for img_id, count in image_ann_count.items():
        fname = images[img_id]["file_name"]
        f.write(f"{fname}: {count}\n")

    f.write("\n--- Annotations per Category ---\n")
    for cat_id, count in category_ann_count.items():
        f.write(f"{categories[cat_id]}: {count}\n")

    f.write("\n--- Zero Area Masks ---\n")
    f.write(", ".join(map(str, zero_area_annotations)) + "\n")

    f.write("\n--- Invalid Annotations (missing segmentation) ---\n")
    f.write(", ".join(map(str, invalid_annotations)) + "\n")

print("Saved:", stats_path)

# -------------------------------------------------------------
# Evaluation summary
# -------------------------------------------------------------
eval_path = os.path.join(OUTPUT_DIR, "eval_summary.txt")
with open(eval_path, "w") as f:
    f.write("=== Evaluation Summary (Basic Quality Checks) ===\n\n")
    f.write(f"Zero area annotations: {len(zero_area_annotations)}\n")
    f.write(f"Invalid annotations: {len(invalid_annotations)}\n")
    f.write(f"Mean mask area: {np.mean(ann_area_list):.2f}\n")
    f.write(f"Median mask area: {np.median(ann_area_list):.2f}\n")
    f.write(f"Max mask area: {np.max(ann_area_list):.2f}\n")

print("Saved:", eval_path)

# -------------------------------------------------------------
# Class Distribution Plot
# -------------------------------------------------------------
labels = [categories[c] for c in category_ann_count.keys()]
counts = [category_ann_count[c] for c in category_ann_count.keys()]

plt.figure(figsize=(12, 7))
plt.barh(labels, counts)
plt.xlabel("Annotation Count")
plt.title("Category Distribution in Semantic Segmentation Masks")
plt.tight_layout()

plot_path = os.path.join(OUTPUT_DIR, "output.png")
plt.savefig(plot_path)
plt.close()

print("Saved:", plot_path)

# -------------------------------------------------------------
# Markdown Report
# -------------------------------------------------------------
report_path = os.path.join(OUTPUT_DIR, "report.md")
with open(report_path, "w") as f:
    f.write("# Semantic Segmentation Mask Annotation Report\n")
    f.write("Generated automatically from CVAT COCO annotations.\n\n")

    f.write("## Dataset Summary\n")
    f.write(f"- **Images:** {len(images)}\n")
    f.write(f"- **Annotations:** {len(annotations)}\n")
    f.write(f"- **Categories:** {len(categories)}\n\n")

    f.write("## Annotation Quality Checks\n")
    f.write(f"- Zero Area Masks: **{len(zero_area_annotations)}**\n")
    f.write(f"- Invalid Annotations: **{len(invalid_annotations)}**\n\n")

    f.write("## Distribution Plot\n")
    f.write("![](output.png)\n\n")

    f.write("## Notes\n")
    f.write("- All masks analyzed are RLE (Run-Length Encoding) from CVAT Mask Brush.\n")
    f.write("- No images were loaded or displayed.\n")
    f.write("- No model training performed.\n")

print("Saved:", report_path)

print("\n=== ALL DONE! ===")
print("Results saved to:", OUTPUT_DIR)


Loading annotations from: ../annotations/annotations.json
Loaded 21 images
Loaded 48 categories
Loaded 203 annotations
Saved: ../results/annotations_stats.txt
Saved: ../results/eval_summary.txt
Saved: ../results/output.png
Saved: ../results/report.md

=== ALL DONE! ===
Results saved to: ../results
