In [10]:
import os
import json
import numpy as np
from PIL import Image
from pycocotools.coco import COCO

# 定义自定义类别及其对应的ID映射，去掉背景类
categories = ["grass", "herb", "litter", "soil", "stone", "wood", "woodchip"]
category_id_mapping = {category: idx + 1 for idx, category in enumerate(categories)}

# 定义类别ID到颜色的映射（去掉背景类的颜色）
category_colors = {
    1: (128, 0, 0),       # grass - 深红色
    2: (0, 128, 0),       # herb - 深绿色
    3: (128, 128, 0),     # litter - 橄榄色
    4: (0, 0, 128),       # soil - 深蓝色
    5: (128, 0, 128),     # stone - 紫色
    6: (0, 128, 128),     # wood - 青色
    7: (128, 128, 128)    # woodchip - 灰色
}

def apply_color_map(seg_mask, category_colors):
    height, width = seg_mask.shape
    color_mask = np.zeros((height, width, 3), dtype=np.uint8)
    
    for category_id, color in category_colors.items():
        color_mask[seg_mask == category_id] = color
    
    return color_mask

def coco_to_voc_segmentation(coco_annotation_file, coco_image_dir, output_dir, category_id_mapping, category_colors):
    os.makedirs(output_dir, exist_ok=True)
    os.makedirs(os.path.join(output_dir, "JPEGImages"), exist_ok=True)
    os.makedirs(os.path.join(output_dir, "SegmentationClass"), exist_ok=True)
    os.makedirs(os.path.join(output_dir, "SegmentationClassVisualization"), exist_ok=True)
    
    # 加载COCO标注文件
    coco = COCO(coco_annotation_file)
    
    for img_id in coco.getImgIds():
        # 加载图像信息
        img_info = coco.loadImgs(img_id)[0]
        img_file = os.path.join(coco_image_dir, img_info['file_name'])
        
        # 复制图像到JPEGImages目录
        img = Image.open(img_file).convert("RGB")
        img.save(os.path.join(output_dir, "JPEGImages", img_info['file_name']))
        
        # 准备分割掩码，初始化为0（未标注的区域保持为0）
        seg_mask = np.zeros((img_info['height'], img_info['width']), dtype=np.uint8)

        # 获取该图像的所有标注
        ann_ids = coco.getAnnIds(imgIds=img_id, iscrowd=None)
        anns = coco.loadAnns(ann_ids)
        
        for ann in anns:
            category_name = coco.loadCats(ann['category_id'])[0]['name']
            category_id = category_id_mapping.get(category_name)
            
            if category_id is not None:
                mask = coco.annToMask(ann)
                seg_mask[mask == 1] = category_id
        
        # 检查生成的掩码中包含哪些类别
        unique_values = np.unique(seg_mask)
        print(f"Image {img_info['file_name']} contains category IDs: {unique_values}")
        
        # 保存原始分割掩码为PNG格式
        seg_img = Image.fromarray(seg_mask)
        seg_img.save(os.path.join(output_dir, "SegmentationClass", img_info['file_name'].replace('.jpg', '.png')))
        
        # 生成彩色分割掩码用于可视化
        color_mask = apply_color_map(seg_mask, category_colors)
        color_mask_img = Image.fromarray(color_mask)
        color_mask_img.save(os.path.join(output_dir, "SegmentationClassVisualization", img_info['file_name']))
        
        print(f"Processed {img_info['file_name']}")

if __name__ == "__main__":
    # 示例使用
    coco_annotation_file = "/Users/lu/awesome-semantic-segmentation-pytorch/scripts/coco/annotations.json"  # COCO标注文件路径
    coco_image_dir = "/Users/lu/awesome-semantic-segmentation-pytorch/scripts/coco"                       # COCO图像文件夹路径
    output_dir = "/Users/lu/awesome-semantic-segmentation-pytorch/scripts/voc"                             # VOC数据集输出目录

    # 转换COCO数据集到Pascal VOC格式
    coco_to_voc_segmentation(coco_annotation_file, coco_image_dir, output_dir, category_id_mapping, category_colors)

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Image Soil10.jpg contains category IDs: [0 4]
Processed Soil10.jpg
Image Stone3.jpg contains category IDs: [0 5]
Processed Stone3.jpg
Image Herb9.jpg contains category IDs: [0 1 2 5 6]
Processed Herb9.jpg
Image Grass20.jpg contains category IDs: [0 2 6]
Processed Grass20.jpg
Image Herb17.jpg contains category IDs: [0 2]
Processed Herb17.jpg
Image Woodchip10.jpg contains category IDs: [0 7]
Processed Woodchip10.jpg
Image Grass16.jpg contains category IDs: [0 1]
Processed Grass16.jpg
Image Litter9.jpg contains category IDs: [0 3]
Processed Litter9.jpg
Image Soil26.jpg contains category IDs: [0 4]
Processed Soil26.jpg
Image Stone12.jpg contains category IDs: [0 2 5 7]
Processed Stone12.jpg
Image Herb5.jpg contains category IDs: [0 1 2 5 6]
Processed Herb5.jpg
Image Soil30.jpg contains category IDs: [0 4]
Processed Soil30.jpg
Image Herb4.jpg contains category IDs: [0 2 4 7]
Processed Herb4.jpg
Image Soil31.j