In [None]:
# ======================================================================
# Facial Landmarks 21-Keypoints Annotation Analysis Notebook
# For CVAT XML Point Annotations (21 keypoints per face)
# Author: Karan Heera
# ======================================================================

import xml.etree.ElementTree as ET
import os
import pandas as pd
import matplotlib.pyplot as plt
from collections import defaultdict
import numpy as np

# -------------------------------------------------------------
# Configuration
# -------------------------------------------------------------
ANNOTATION_XML = "../annotations/annotations.xml"  # Adjust path if needed
OUTPUT_DIR = "../results"
os.makedirs(OUTPUT_DIR, exist_ok=True)

print("Loading annotations from:", ANNOTATION_XML)

# -------------------------------------------------------------
# Parse XML
# -------------------------------------------------------------
tree = ET.parse(ANNOTATION_XML)
root = tree.getroot()

# -------------------------------------------------------------
# Helper: Extract keypoints per image
# -------------------------------------------------------------
images_data = []
label_count = defaultdict(int)
invalid_annotations = []

for image in root.findall("image"):
    img_name = image.get("name")
    width = int(image.get("width"))
    height = int(image.get("height"))
    points_list = image.findall("points")
    
    image_points = []
    for pt in points_list:
        label = pt.get("label")
        coords = pt.get("points")
        occluded = int(pt.get("occluded", 0))
        if coords:
            x_str, y_str = coords.split(",")
            x = float(x_str)
            y = float(y_str)
            if 0 <= x <= width and 0 <= y <= height:
                image_points.append((label, x, y))
                label_count[label] += 1
            else:
                invalid_annotations.append((img_name, label))
        else:
            invalid_annotations.append((img_name, label))
    
    images_data.append({"image": img_name, "num_points": len(image_points), "points": image_points})

# -------------------------------------------------------------
# Basic Statistics
# -------------------------------------------------------------
total_images = len(images_data)
total_annotations = sum(d["num_points"] for d in images_data)
total_labels = len(label_count)

print(f"Total Images: {total_images}")
print(f"Total Annotations (points): {total_annotations}")
print(f"Total Labels: {total_labels}")

# Annotations per Image
annotations_per_image = {d["image"]: d["num_points"] for d in images_data}

# Annotations per Label
annotations_per_label = dict(label_count)

# -------------------------------------------------------------
# Save Statistics
# -------------------------------------------------------------
stats_path = os.path.join(OUTPUT_DIR, "annotations_stats.txt")
with open(stats_path, "w") as f:
    f.write("=== Facial Landmarks 21-Keypoints Annotation Statistics ===\n\n")
    f.write(f"Total Images: {total_images}\n")
    f.write(f"Total Annotations (points): {total_annotations}\n")
    f.write(f"Total Labels: {total_labels}\n\n")
    
    f.write("--- Annotations per Image ---\n")
    for img, count in annotations_per_image.items():
        f.write(f"{img}: {count}\n")
    
    f.write("\n--- Annotations per Label ---\n")
    for label, count in annotations_per_label.items():
        f.write(f"{label}: {count}\n")
    
    f.write("\n--- Invalid Annotations (missing or out-of-bound points) ---\n")
    for img, label in invalid_annotations:
        f.write(f"{img} -> {label}\n")

print("Saved statistics to:", stats_path)

# -------------------------------------------------------------
# Evaluation Summary
# -------------------------------------------------------------
all_points_count = [d["num_points"] for d in images_data]

eval_path = os.path.join(OUTPUT_DIR, "eval_summary.txt")
with open(eval_path, "w") as f:
    f.write("=== Evaluation Summary (Point Quality Checks) ===\n\n")
    f.write(f"Invalid Annotations (missing/out-of-bound): {len(invalid_annotations)}\n")
    f.write(f"Mean points per image: {np.mean(all_points_count):.2f}\n")
    f.write(f"Median points per image: {np.median(all_points_count):.2f}\n")
    f.write(f"Max points per image: {np.max(all_points_count):.2f}\n")

print("Saved evaluation summary to:", eval_path)

# -------------------------------------------------------------
# Distribution Plot
# -------------------------------------------------------------
labels = list(annotations_per_label.keys())
counts = list(annotations_per_label.values())

plt.figure(figsize=(14,6))
plt.bar(labels, counts, color='skyblue')
plt.xticks(rotation=90)
plt.ylabel("Number of Keypoints")
plt.title("Facial Landmarks Distribution (21 Keypoints)")
plt.tight_layout()
plot_path = os.path.join(OUTPUT_DIR, "category_distribution_21kp.png")
plt.savefig(plot_path)
plt.close()
print("Saved distribution plot to:", plot_path)

# -------------------------------------------------------------
# Markdown Report
# -------------------------------------------------------------
report_path = os.path.join(OUTPUT_DIR, "report.md")
with open(report_path, "w") as f:
    f.write("# Facial Landmarks 21-Keypoints Annotation Report\n")
    f.write("Generated automatically from CVAT XML point annotations.\n\n")
    
    f.write("## Dataset Summary\n")
    f.write(f"- **Images:** {total_images}\n")
    f.write(f"- **Annotations:** {total_annotations}\n")
    f.write(f"- **Labels:** {total_labels}\n\n")
    
    f.write("## Annotation Quality Checks\n")
    f.write(f"- Invalid Annotations (missing/out-of-bound points): **{len(invalid_annotations)}**\n\n")
    
    f.write("## Distribution Plot\n")
    f.write("![](category_distribution_21kp.png)\n\n")
    
    f.write("## Notes\n")
    f.write("- All annotations analyzed are CVAT point annotations (21 facial keypoints per face).\n")
    f.write("- No images were loaded or displayed.\n")
    f.write("- No model training performed.\n")

print("Saved report to:", report_path)

print("\n=== ALL DONE! ===")
print("Results saved to:", OUTPUT_DIR)


Loading annotations from: ../annotations/annotations.xml
Total Images: 5
Total Annotations (points): 105
Total Labels: 21
Saved statistics to: ../results/facial_landmarks_21kp_stats.txt
Saved evaluation summary to: ../results/eval_summary_21kp.txt
Saved distribution plot to: ../results/category_distribution_21kp.png
Saved report to: ../results/report_21kp.md

=== ALL DONE! ===
Results saved to: ../results
