In [1]:
import os
import json
import cv2
import numpy as np
from PIL import Image  # Using PIL instead of tifffile
from skimage.measure import label, regionprops
from pycocotools import mask as mask_utils
import matplotlib.pyplot as plt

In [None]:
images_path = '/kaggle/input/inria-aerial-image-labeling-dataset/AerialImageDataset/train/images'
masks_path = '/kaggle/input/inria-aerial-image-labeling-dataset/AerialImageDataset/train/gt'

In [None]:
images = os.listdir(images_path)

In [None]:
masks = os.listdir(masks_path)

In [None]:
output = os.makedirs('/kaggle/working/output/annotations')

In [None]:
data = {
}

# Define the output path
output_path = "/kaggle/working/output/annotations/annotation.json"

with open(output_path, "w") as json_file:
    json.dump(data, json_file)

print(f"JSON file saved at: {output_path}")

coco_output = {
    "images": [],
    "annotations": [],
    "categories": [{"id": 1, "name": "building", "supercategory": "building"}]
}

annotation_id = 0

for image_id, mask in enumerate(masks):
    image_path = os.path.join(images_path, mask)
    image = np.array(Image.open(image_path))

    mask_name = mask.split('.')[0]
    mask_path = os.path.join(masks_path, mask)
    mask_img = Image.open(mask_path)

    mask_np = np.array(mask_img)
    label_mask = label(mask_np)

    name = os.path.basename(mask_path)
    coco_output['images'].append({
    "id": image_id,
    'width': label_mask.shape[1],
    'height': label_mask.shape[0],
    'file_name': name
    })

    instances_ids = np.unique(label_mask)
    instances_ids = instances_ids[instances_ids > 0]

    print(mask_name)
    print("number of label: ", instances_ids[-1])

    for id in instances_ids:
        single_building_mask = (label_mask == id).astype(np.uint8)

        # cv2.RETR_EXTERNAL: retrieves only the extreme outer contours, ignores contours inside the object.
        # cv2.CHAIN_APPROX_SIMPLE: compresses horizontal, vertical, and diagonal segments and leaves only their end points.
        contours, _ = cv2.findContours(single_building_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) 

        segmentation = []
        for contour in contours:
            contour = contour.flatten().tolist()  # Convert to list
            if len(contour) >= 6:  # Valid polygon
                segmentation.append(contour)

        if len(segmentation) == 0:
            continue

        props = regionprops(single_building_mask)
        x_min, y_min, x_max, y_max = props[0].bbox
        bbox = [x_min, y_min, x_max - x_min, y_max - y_min]

        rle = mask_utils.encode(np.asfortranarray(single_building_mask))
        area = mask_utils.area(rle)

        # Add annotation
        coco_output["annotations"].append({
            "id": annotation_id,
            "image_id": image_id,
            "category_id": 1,  # 'building'
            "segmentation": segmentation,
            "area": float(area),
            "bbox": bbox,
            "iscrowd": 0
        })
        
        annotation_id += 1

with open(output_path, "w") as f:
    json.dump(coco_output, f, indent=4)

print(f"COCO annotations saved to {output}")