In [2]:
import os
import json
from pathlib import Path
from PIL import Image
from tqdm import tqdm


def convert_yolo_to_coco(data_folder):
    """Convert nested YOLO annotations to COCO format and save in a coco folder."""

    def get_image_info(image_path, image_id):
        """Retrieve image information for COCO format."""
        with Image.open(image_path) as img:
            width, height = img.size
        return {
            "id": image_id,
            "file_name": os.path.relpath(image_path, start=data_folder),  # Relative path
            "width": width,
            "height": height,
            "date_captured": "2025-01-26T12:08:48+00:00",
        }

    def get_annotations(label_path, image_id, annotation_id, width, height):
        """Retrieve annotation information for COCO format."""
        annotations = []
        with open(label_path, "r") as f:
            lines = f.readlines()
            for line in lines:
                parts = line.strip().split()
                category_id = int(parts[0]) + 1  # YOLO classes are zero-indexed
                coords = list(map(float, parts[1:]))

                # Extract segmentation and bounding box
                segmentation = []
                for i in range(0, len(coords), 2):
                    segmentation.append(coords[i] * width)
                    segmentation.append(coords[i + 1] * height)

                x_min = min(segmentation[::2])
                y_min = min(segmentation[1::2])
                x_max = max(segmentation[::2])
                y_max = max(segmentation[1::2])
                bbox_width = x_max - x_min
                bbox_height = y_max - y_min

                annotations.append({
                    "id": annotation_id,
                    "image_id": image_id,
                    "category_id": category_id,
                    "bbox": [x_min, y_min, bbox_width, bbox_height],
                    "area": bbox_width * bbox_height,
                    "segmentation": [segmentation],
                    "iscrowd": 0,
                })
                annotation_id += 1
        return annotations, annotation_id

    def process_split(split_name, coco_output_dir):
        """Process train, val, or test split."""
        image_root = os.path.join(data_folder, "images", split_name)
        label_root = os.path.join(data_folder, "labels", split_name)
        output_file = os.path.join(coco_output_dir, f"{split_name}_coco_annotations.json")

        images = []
        annotations = []
        categories = [{"id": 1, "name": "leaf", "supercategory": "none"}]  # Single class
        image_id = 1
        annotation_id = 1

        # Walk through all subfolders
        all_image_files = []
        for root, _, files in os.walk(image_root):
            for image_file in files:
                if image_file.endswith((".jpg", ".png")):
                    all_image_files.append(os.path.join(root, image_file))

        for image_path in tqdm(all_image_files, desc=f"Processing {split_name} images"):
            label_path = os.path.join(
                label_root, os.path.relpath(os.path.dirname(image_path), image_root),
                os.path.splitext(os.path.basename(image_path))[0] + ".txt"
            )

            if not os.path.exists(label_path):
                continue

            # Add image info
            image_info = get_image_info(image_path, image_id)
            images.append(image_info)

            # Add annotation info
            img_annotations, annotation_id = get_annotations(
                label_path, image_id, annotation_id, image_info["width"], image_info["height"]
            )
            annotations.extend(img_annotations)
            image_id += 1

        # Save COCO JSON
        coco_data = {
            "info": {
                "year": "2025",
                "version": "1",
                "description": "Exported from nested YOLO to COCO script",
                "contributor": "",
                "url": "",
                "date_created": "2025-01-26T12:08:48+00:00",
            },
            "licenses": [
                {
                    "id": 1,
                    "url": "https://creativecommons.org/licenses/by/4.0/",
                    "name": "CC BY 4.0"
                }
            ],
            "categories": categories,
            "images": images,
            "annotations": annotations,
        }
        with open(output_file, "w") as f:
            json.dump(coco_data, f, indent=4)
        print(f"Saved COCO annotations for {split_name} at {output_file}")

    # Create the coco directory
    coco_output_dir = os.path.join(data_folder, "coco")
    os.makedirs(coco_output_dir, exist_ok=True)

    # Process splits
    for split in ["train", "val", "test"]:
        process_split(split, coco_output_dir)

In [3]:
yolo_dataset_path = ""

In [4]:
convert_yolo_to_coco(yolo_dataset_path)

Processing train images: 100%|████████████████████████████████| 11544/11544 [00:12<00:00, 932.32it/s]


Saved COCO annotations for train at /home/rsaric/Desktop/leaf_cv/data/yolo_train/coco/train_coco_annotations.json


Processing val images: 100%|███████████████████████████████████| 3669/3669 [00:03<00:00, 1043.58it/s]


Saved COCO annotations for val at /home/rsaric/Desktop/leaf_cv/data/yolo_train/coco/val_coco_annotations.json


Processing test images: 0it [00:00, ?it/s]

Saved COCO annotations for test at /home/rsaric/Desktop/leaf_cv/data/yolo_train/coco/test_coco_annotations.json



