In [1]:
import xml.etree.ElementTree as ET
import pathlib
import json
import shutil
import os 
from PIL import Image

import numpy as np 
import cv2 
import pycocotools.mask as coco_mask

from detectron2.structures import BoxMode

# Segmentation Image copy to SegImg dir

In [2]:
CLASSES = [
    "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat",
    "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person",
    "pottedplant", "sheep", "sofa", "train", "tvmonitor"]

In [3]:
voc12_root_path = './datasets/VOC2012'
image_root_path = './datasets/VOC2012/VOCdevkit/VOC2012/JPEGImages'
filelist_path = './datasets/VOC2012/VOCdevkit/VOC2012/ImageSets/Segmentation/trainval.txt'
seg_root_path = os.path.join(voc12_root_path, 'SegImages')
if not os.path.exists(seg_root_path):
    os.makedirs(seg_root_path, exist_ok=True)

In [4]:
#Image copy to Seg dir 
f = open(filelist_path, 'r')
file_list = []
line = None
while True:
    line = f.readline().replace('\n', '')
    if line is None or len(line) == 0 :
        break 
    file_list.append(line + '.jpg')
for filename in file_list:
    file_path = os.path.join(image_root_path, filename)
    shutil.copy2(file_path, seg_root_path)

# Convert VOC 2 COCO format 

In [5]:
def get_img_size(ann_file):
    # Get the width and height from the annotation file.
    ann_file = open(ann_file)
    tree = ET.parse(ann_file)
    root = tree.getroot()
    size = root.find('size')
    width = int(size.find('width').text)
    height = int(size.find('height').text)
    return width, height

In [6]:
def prepare_annotation_data(ann_file, seg_ann_file, class_agnostic=False):
    ann_file = open(ann_file)
    tree=ET.parse(ann_file)
    root = tree.getroot()
    size = root.find('size')
    w = int(size.find('width').text)
    h = int(size.find('height').text)
    seg_img = np.array(Image.open(seg_ann_file))
    

    annotations = []
    for idx,obj in enumerate(root.iter('object')):
        idx += 1 
        difficult = int(obj.find('difficult').text)

        cls = obj.find('name').text
        if cls not in CLASSES or difficult==1:
            continue

        cls_id = 0 if class_agnostic else CLASSES.index(cls)

        bbox = obj.find("bndbox")
        bbox = [float(bbox.find(x).text) for x in ["xmin", "ymin", "xmax", "ymax"]]
        # Original annotations are integers in the range [1, W or H]
        # Assuming they mean 1-based pixel indices (inclusive),
        # a box with annotation (xmin=1, xmax=W) covers the whole image.
        # In coordinate space this is represented by (xmin=0, xmax=W)
        bbox[0] -= 1.0
        bbox[1] -= 1.0

        #Segmentation 
        #Mask to RLE 
        bimask = np.where(seg_img == idx, 1,0).astype(np.uint8)
        seg = coco_mask.encode(np.asfortranarray(bimask))
        seg['counts'] = seg['counts'].decode('utf8')
        

        annotations.append({
            "iscrowd": 0, #difficult,
            "bbox": bbox,
            "segmentation" : seg,
            "category_id": cls_id,
            "bbox_mode": BoxMode.XYXY_ABS}) #
    return annotations

In [7]:
#ImageSets 에서 Main이 아니라 Segmentation에 있는 이미지 아이디 가져오기 

voc07_dir = './datasets/VOC2007/VOCdevkit/VOC2007'
voc12_dir = './datasets/VOC2012/VOCdevkit/VOC2012'
voc12_segobj_dir = './datasets/VOC2012/VOCdevkit/VOC2012/SegmentationObject'

year2dir = {"2007": voc07_dir, "2012": voc12_dir}
year2segdir = {"2007": voc07_dir, "2012": voc12_segobj_dir}

sets = [('2012', 'trainval')]

is_CAD = True

CAD_name = "_CAD" if is_CAD else ""

i = 1 
for year, image_set in sets:
    image_ids = open(f'{year2dir[year]}/ImageSets/Segmentation/{image_set}.txt').read().strip().split()
    print(f"==> Year: {year}, ImageSet: {image_set}, Number of images: {len(image_ids)}")
    data = []
    for image_id in image_ids:
        full_img_path = pathlib.Path(year2dir[year]) / "JPEGImages" / f"{image_id}.jpg"
        full_ann_path = pathlib.Path(year2dir[year]) / "Annotations" / f"{image_id}.xml"
        width, height = get_img_size(full_ann_path)
        assert full_img_path.is_file()
        full_seg_ann_path = pathlib.Path(year2segdir[year]) / f"{image_id}.png"
        data.append({
            "file_name": str(full_img_path),
            "image_id": image_id,
            "height": height, "width": width,
            "annotations": prepare_annotation_data(full_ann_path, full_seg_ann_path, is_CAD),
        })
        
        

    # json_data = {
    #     "dataset": data,
    #     "meta_data": {
    #         "dirname": f"datasets/VOC{year}",
    #         "evaluator_type": "coco",
    #         "name": f"voc_{year}_trainval{CAD_name}_coco_style",
    #         "split": image_set,
    #         "year": int(year),
     
    json_data = data
    
    
    dst_file = f'./voc_objects_{year}_{image_set}{CAD_name}_coco_style.json'
    print(f"Saving the coco-style voc data at {dst_file}")
    with open(dst_file, 'w') as outfile:
        json.dump(json_data, outfile)

==> Year: 2012, ImageSet: trainval, Number of images: 2913
Saving the coco-style voc data at ./voc_objects_2012_trainval_CAD_coco_style.json
