In [1]:
dataset_name = "controlled-conditions"
coco_json_path = "../../annotations/" + dataset_name + "/info/" + dataset_name + "_all.json"
images_dir = "/mnt/data0/martez/" + dataset_name + "/dataset/images/"

In [2]:
import json 
def validate_coco_annotations(coco_json_path):
    with open(coco_json_path) as f:
        data = json.load(f)

    print("🔎 Starting annotation validation...")

    bad_annotations = 0
    for ann in data.get("annotations", []):
        bbox = ann.get("bbox", None)
        area = ann.get("area", None)

        if bbox is None or len(bbox) != 4:
            print(f"❌ Bad bbox: {bbox}")
            bad_annotations += 1
            continue

        x, y, w, h = bbox
        if not all(isinstance(val, (int, float)) for val in [x, y, w, h]):
            print(f"❌ Non-numeric bbox: {bbox}")
            bad_annotations += 1
            continue
        if w <= 0 or h <= 0:
            print(f"❌ Zero or negative width/height in bbox: {bbox}")
            bad_annotations += 1
            continue

        if area is not None and (area <= 0 or not isinstance(area, (int, float))):
            print(f"⚠️ Suspicious area: {area}")
            bad_annotations += 1

        if "category_id" not in ann:
            print(f"❌ Missing category_id in annotation: {ann}")
            bad_annotations += 1

    print(f"\n✅ Validation complete. Found {bad_annotations} problematic annotations.")

In [4]:
#validate_coco_annotations(coco_json_path)
validate_coco_annotations("../../annotations/osr-fields/gh/GH_OSR.json")
validate_coco_annotations("../../annotations/osr-fields/lg/LG_OSR.json")

🔎 Starting annotation validation...

✅ Validation complete. Found 0 problematic annotations.
🔎 Starting annotation validation...

✅ Validation complete. Found 0 problematic annotations.


In [None]:
from pycocotools.coco import COCO

# Load the COCO dataset
try:
    coco = COCO("../../annotations/osr-fields/gh/GH_OSR.json")
    print("COCO dataset loaded successfully!")
except Exception as e:
    print("Error loading COCO dataset:", e)

# Get basic dataset stats
print("Number of images:", len(coco.imgs))
print("Number of annotations:", len(coco.anns))
print("Number of categories:", len(coco.cats))

# List all category names
categories = coco.loadCats(coco.getCatIds())
category_names = [cat["name"] for cat in categories]
print("Categories:", category_names)

loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
COCO dataset loaded successfully!
Number of images: 6113
Number of annotations: 3955
Number of categories: 6
Categories: ['aranae', 'carabidae', 'diptera-hymenoptera', 'isopoda', 'myriapoda', 'staphylinidae']


In [7]:
all_image_ids = set(coco.imgs.keys())
annotated_image_ids = set([ann["image_id"] for ann in coco.anns.values()])
unannotated_image_ids = all_image_ids - annotated_image_ids

print(f"Total images: {len(all_image_ids)}")
print(f"Images with annotations: {len(annotated_image_ids)}")
print(f"Images with NO annotations: {len(unannotated_image_ids)}")

Total images: 6113
Images with annotations: 3596
Images with NO annotations: 2517


In [8]:
invalid_bboxes = []
for ann in coco.anns.values():
    x, y, w, h = ann["bbox"]
    if w <= 0 or h <= 0:
        invalid_bboxes.append(ann)

print(f"Invalid bounding boxes found: {len(invalid_bboxes)}")
if invalid_bboxes:
    print("Example of an invalid bbox:", invalid_bboxes[0])

Invalid bounding boxes found: 0


In [9]:
from collections import defaultdict

dup_check = defaultdict(int)
duplicates = []
for ann in coco.anns.values():
    key = (ann["image_id"], ann["category_id"], tuple(ann["bbox"]))
    dup_check[key] += 1
    if dup_check[key] > 1:
        duplicates.append(ann)

print(f"Duplicate annotations found: {len(duplicates)}")
if duplicates:
    print("Example of a duplicate annotation:", duplicates[0])

Duplicate annotations found: 0


In [39]:
import cv2
for img_id in coco.imgs:
    img_info = coco.imgs[img_id]
    img_path = f"{images_dir}{img_info['file_name']}"
    img = cv2.imread(img_path)
    if img is None:
        print(f"Corrupt or missing image: {img_path}")
    elif img.shape[:2] == (0, 0):
        print(f"Invalid image dimensions: {img_path} -> {img.shape}")

KeyboardInterrupt: 