In [2]:
from pycocotools.coco import COCO

# Path to load the COCO annotation file
annotation_file = './voc_to_coco.json'

# Initialise the COCO object
coco = COCO(annotation_file)


loading annotations into memory...
Done (t=0.27s)
creating index...
index created!


In [3]:
# 查看 info 字段
info = coco.dataset.get('info', {})
print("Dataset Info:")
for key, value in info.items():
    print(f"{key}: {value}")

Dataset Info:
description: VOC to COCO conversion
version: 1.0
year: 2025
date_created: 2025-05-23


In [4]:
# 获取所有图像 ID
img_ids = coco.getImgIds()
print(f"Total number of images: {len(img_ids)}")

# 加载图像信息
images = coco.loadImgs(img_ids)

# 打印前几个图像的信息
print("\nImage Information (first 3 images):")
for img in images[:3]:  # 限制只显示前 3 张图像
    print(f"Image ID: {img['id']}, File Name: {img['file_name']}, "
          f"Width: {img['width']}, Height: {img['height']}")

Total number of images: 17125

Image Information (first 3 images):
Image ID: 1, File Name: 2011_001429.jpg, Width: 500, Height: 375
Image ID: 2, File Name: 2010_001241.jpg, Width: 375, Height: 500
Image ID: 3, File Name: 2008_002666.jpg, Width: 332, Height: 500


In [5]:
# 获取所有类别 ID
cat_ids = coco.getCatIds()
print(f"Total number of categories: {len(cat_ids)}")

# 加载类别信息
categories = coco.loadCats(cat_ids)

# 打印类别信息
print("\nCategory Information:")
for cat in categories:
    print(f"Category ID: {cat['id']}, Name: {cat['name']}, "
          f"Supercategory: {cat.get('supercategory', 'N/A')}")

Total number of categories: 20

Category Information:
Category ID: 1, Name: aeroplane, Supercategory: none
Category ID: 2, Name: bicycle, Supercategory: none
Category ID: 3, Name: bird, Supercategory: none
Category ID: 4, Name: boat, Supercategory: none
Category ID: 5, Name: bottle, Supercategory: none
Category ID: 6, Name: bus, Supercategory: none
Category ID: 7, Name: car, Supercategory: none
Category ID: 8, Name: cat, Supercategory: none
Category ID: 9, Name: chair, Supercategory: none
Category ID: 10, Name: cow, Supercategory: none
Category ID: 11, Name: diningtable, Supercategory: none
Category ID: 12, Name: dog, Supercategory: none
Category ID: 13, Name: horse, Supercategory: none
Category ID: 14, Name: motorbike, Supercategory: none
Category ID: 15, Name: person, Supercategory: none
Category ID: 16, Name: pottedplant, Supercategory: none
Category ID: 17, Name: sheep, Supercategory: none
Category ID: 18, Name: sofa, Supercategory: none
Category ID: 19, Name: train, Supercategory:

In [6]:
# 获取所有标注 ID
ann_ids = coco.getAnnIds()
print(f"Total number of annotations: {len(ann_ids)}")

# 加载标注信息
annotations = coco.loadAnns(ann_ids)

# 统计每个类别的标注数量
cat_anns = {cat_id: 0 for cat_id in cat_ids}
for ann in annotations:
    cat_anns[ann['category_id']] += 1

# 打印类别对应的标注数量
print("\nAnnotations per Category:")
for cat_id, count in cat_anns.items():
    cat_name = coco.loadCats([cat_id])[0]['name']
    print(f"Category: {cat_name} (ID: {cat_id}), Annotations: {count}")

# 打印前几个标注的详细信息
print("\nAnnotation Details (first 3 annotations):")
for ann in annotations[:3]:  # 限制只显示前 3 个标注
    print(f"Annotation ID: {ann['id']}, Image ID: {ann['image_id']}, "
          f"Category ID: {ann['category_id']}, "
          f"BBox: {ann['bbox']}, Area: {ann['area']}, "
          f"Segmentation: {ann.get('segmentation', 'N/A')[:10]}...")

Total number of annotations: 40138

Annotations per Category:
Category: aeroplane (ID: 1), Annotations: 1002
Category: bicycle (ID: 2), Annotations: 837
Category: bird (ID: 3), Annotations: 1271
Category: boat (ID: 4), Annotations: 1059
Category: bottle (ID: 5), Annotations: 1561
Category: bus (ID: 6), Annotations: 685
Category: car (ID: 7), Annotations: 2492
Category: cat (ID: 8), Annotations: 1277
Category: chair (ID: 9), Annotations: 3056
Category: cow (ID: 10), Annotations: 771
Category: diningtable (ID: 11), Annotations: 800
Category: dog (ID: 12), Annotations: 1598
Category: horse (ID: 13), Annotations: 803
Category: motorbike (ID: 14), Annotations: 801
Category: person (ID: 15), Annotations: 17401
Category: pottedplant (ID: 16), Annotations: 1202
Category: sheep (ID: 17), Annotations: 1084
Category: sofa (ID: 18), Annotations: 841
Category: train (ID: 19), Annotations: 704
Category: tvmonitor (ID: 20), Annotations: 893

Annotation Details (first 3 annotations):
Annotation ID: 1,

In [7]:
# 统计每张图像的标注数量
print("\nAnnotations per Image:")
for img_id in img_ids[:5]:  # 限制只显示前 5 张图像
    ann_ids = coco.getAnnIds(imgIds=[img_id])
    img = coco.loadImgs([img_id])[0]
    print(f"Image: {img['file_name']}, Annotations: {len(ann_ids)}")


Annotations per Image:
Image: 2011_001429.jpg, Annotations: 1
Image: 2010_001241.jpg, Annotations: 3
Image: 2008_002666.jpg, Annotations: 2
Image: 2008_000558.jpg, Annotations: 1
Image: 2008_001719.jpg, Annotations: 1
