In [1]:
# ======================================================================
# Line Segmentation Polyline Annotation Analysis Notebook
# For CVAT Polyline Annotations (XML)
# Author: Karan Heera
# ======================================================================

import xml.etree.ElementTree as ET
import os
from collections import defaultdict
import matplotlib.pyplot as plt
import numpy as np

# -------------------------------------------------------------
# Configuration
# -------------------------------------------------------------
ANNOTATION_PATH = "../annotations/annotations.xml"   # Adjust path if needed
OUTPUT_DIR = "../results"
os.makedirs(OUTPUT_DIR, exist_ok=True)

print("Loading annotations from:", ANNOTATION_PATH)

# -------------------------------------------------------------
# Parse XML
# -------------------------------------------------------------
tree = ET.parse(ANNOTATION_PATH)
root = tree.getroot()

# -------------------------------------------------------------
# Extract Labels
# -------------------------------------------------------------
labels = [label.find("name").text for label in root.findall(".//label")]
labels = list(set(labels))
labels.sort()
print(f"Detected {len(labels)} labels:", labels)

# -------------------------------------------------------------
# Extract Polyline Annotations
# -------------------------------------------------------------
polyline_count_per_image = defaultdict(int)
polyline_count_per_label = defaultdict(int)
polyline_points_count = []
invalid_annotations = []

for image in root.findall("image"):
    image_name = image.get("name")
    polylines = image.findall("polyline")
    
    for poly in polylines:
        label = poly.get("label")
        points = poly.get("points")
        
        polyline_count_per_image[image_name] += 1
        polyline_count_per_label[label] += 1
        
        if points is None or points.strip() == "":
            invalid_annotations.append(f"{image_name}:{label}")
        else:
            # count number of points in polyline
            num_points = len(points.strip().split(";"))
            polyline_points_count.append(num_points)

# -------------------------------------------------------------
# Save Annotation Statistics
# -------------------------------------------------------------
stats_path = os.path.join(OUTPUT_DIR, "annotations_stats.txt")
with open(stats_path, "w") as f:
    f.write("=== Polyline Annotation Statistics ===\n\n")
    f.write(f"Total Images: {len(root.findall('image'))}\n")
    f.write(f"Total Annotations: {sum(polyline_count_per_label.values())}\n")
    f.write(f"Total Labels: {len(labels)}\n\n")
    
    f.write("--- Annotations per Image ---\n")
    for img, count in polyline_count_per_image.items():
        f.write(f"{img}: {count}\n")
    
    f.write("\n--- Annotations per Label ---\n")
    for label, count in polyline_count_per_label.items():
        f.write(f"{label}: {count}\n")
    
    f.write("\n--- Invalid Annotations (missing points) ---\n")
    f.write(", ".join(invalid_annotations) + "\n")

print("Saved:", stats_path)

# -------------------------------------------------------------
# Evaluation Summary
# -------------------------------------------------------------
eval_path = os.path.join(OUTPUT_DIR, "eval_summary.txt")
with open(eval_path, "w") as f:
    f.write("=== Evaluation Summary (Polyline Quality Checks) ===\n\n")
    f.write(f"Invalid annotations: {len(invalid_annotations)}\n")
    f.write(f"Mean points per polyline: {np.mean(polyline_points_count):.2f}\n")
    f.write(f"Median points per polyline: {np.median(polyline_points_count):.2f}\n")
    f.write(f"Max points per polyline: {np.max(polyline_points_count):.2f}\n")

print("Saved:", eval_path)

# -------------------------------------------------------------
# Distribution Plot
# -------------------------------------------------------------
labels_list = list(polyline_count_per_label.keys())
counts_list = list(polyline_count_per_label.values())

plt.figure(figsize=(12, 7))
plt.barh(labels_list, counts_list, color="skyblue")
plt.xlabel("Annotation Count")
plt.title("Polyline Annotation Distribution per Label")
plt.tight_layout()

plot_path = os.path.join(OUTPUT_DIR, "category_distribution.png")
plt.savefig(plot_path)
plt.close()
print("Saved:", plot_path)

# -------------------------------------------------------------
# Markdown Report
# -------------------------------------------------------------
report_path = os.path.join(OUTPUT_DIR, "report.md")
with open(report_path, "w") as f:
    f.write("# Line Segmentation Polyline Annotation Report\n")
    f.write("Generated automatically from CVAT polyline XML annotations.\n\n")
    
    f.write("## Dataset Summary\n")
    f.write(f"- **Images:** {len(root.findall('image'))}\n")
    f.write(f"- **Annotations:** {sum(polyline_count_per_label.values())}\n")
    f.write(f"- **Labels:** {len(labels)}\n\n")
    
    f.write("## Annotation Quality Checks\n")
    f.write(f"- Invalid annotations (missing points): **{len(invalid_annotations)}**\n\n")
    
    f.write("## Distribution Plot\n")
    f.write("![](category_distribution.png)\n\n")
    
    f.write("## Notes\n")
    f.write("- All annotations analyzed are CVAT polyline annotations.\n")
    f.write("- No images were loaded or displayed.\n")
    f.write("- No model training performed.\n")

print("Saved:", report_path)

print("\n=== ALL DONE! ===")
print("Results saved to:", OUTPUT_DIR)


Loading annotations from: ../annotations/annotations.xml
Detected 12 labels: ['Crack_line', 'Curb_line', 'Electricity_line', 'Lane_center_dashed', 'Lane_center_solid', 'Lane_left_boundary', 'Lane_right_boundary', 'Rail_track_line', 'River_centerline', 'Telephone_line', 'Tree_Stem', 'Tree_branches']
Saved: ../results/annotations_stats.txt
Saved: ../results/eval_summary.txt
Saved: ../results/category_distribution.png
Saved: ../results/report.md

=== ALL DONE! ===
Results saved to: ../results
