In [7]:
import agml
import os
import json
import shutil
import random
from tqdm import tqdm

In [10]:
def coco_to_yolo(coco_json_path, images_dir, output_dir):
    # Load COCO annotations
    with open(coco_json_path, 'r') as f:
        coco_data = json.load(f)

    images = coco_data["images"]
    annotations = coco_data["annotations"]
    categories = coco_data["categories"]

    # Mapping image_id to file_name
    img_id_to_filename = {img["id"]: img["file_name"] for img in images}

    # Mapping category_id to YOLO class index
    if len(categories) == 1:
        cat_id_to_yolo_id = {categories[0]["id"]: 0}
    else:
        cat_id_to_yolo_id = {cat["id"]: idx for idx, cat in enumerate(categories)}

    # Organizing annotations by image_id
    img_annotations = {img["id"]: [] for img in images}
    for ann in annotations:
        img_annotations[ann["image_id"]].append(ann)

    # Shuffle images for random split
    random.shuffle(images)

    # Define split sizes
    total = len(images)
    train_size = int(0.6 * total)
    val_size = int(0.15 * total)
    
    train_imgs = images[:train_size]
    val_imgs = images[train_size:train_size + val_size]
    test_imgs = images[train_size + val_size:]

    # Define dataset splits
    splits = {
        "train": train_imgs,
        "val": val_imgs,
        "test": test_imgs
    }

    # Create output directories
    for split in splits.keys():
        os.makedirs(os.path.join(output_dir, split, "images"), exist_ok=True)
        os.makedirs(os.path.join(output_dir, split, "labels"), exist_ok=True)

    # Convert annotations and copy images
    for split, img_list in splits.items():
        for img in tqdm(img_list, desc=f"Processing {split}"):
            img_id = img["id"]
            file_name = img["file_name"]
            img_path = os.path.join(images_dir, file_name)
            output_img_path = os.path.join(output_dir, split, "images", file_name)

            # Copy image to appropriate split folder
            if os.path.exists(img_path):
                shutil.copy(img_path, output_img_path)

            # Create YOLO label file
            label_path = os.path.join(output_dir, split, "labels", file_name.replace('.jpg', '.txt').replace('.png', '.txt'))
            with open(label_path, "w") as label_file:
                for ann in img_annotations.get(img_id, []):
                    x, y, w, h = ann["bbox"]
                    img_w, img_h = img["width"], img["height"]

                    # Convert to YOLO format (normalized x_center, y_center, width, height)
                    x_center = (x + w / 2) / img_w
                    y_center = (y + h / 2) / img_h
                    w /= img_w
                    h /= img_h

                    # For a single-class dataset, always assign class 0.
                    if len(categories) == 1:
                        yolo_class = 0
                    else:
                        category_id = ann["category_id"]
                        if category_id not in cat_id_to_yolo_id:
                            print(f"Warning: category id {category_id} not found in mapping. Skipping annotation.")
                            continue
                        yolo_class = cat_id_to_yolo_id[category_id]

                    label_file.write(f"{yolo_class} {x_center:.6f} {y_center:.6f} {w:.6f} {h:.6f}\n")

    print("Dataset conversion and splitting completed!")


# Source Domain (Synthetic Grape)

In [4]:
# install dataset
agml.data.AgMLDataLoader(
    'grape_detection_syntheticday', 
    dataset_path='../datasets/grape_detection_syntheticday'
)

Downloading grape_detection_syntheticday (size = 48.6 MB): 48635904it [00:01, 41610906.69it/s]                              ape_detection_syntheticday.


[AgML Download]: Extracting files for grape_detection_syntheticday... Done!

You have just downloaded [1mgrape_detection_syntheticday[0m.

This dataset has [1mno license[0m.

When using this dataset, please cite the following:

@ARTICLE{10.3389/fpls.2019.01185,
  
AUTHOR={Bailey, Brian N.},   
	 
TITLE={Helios: A Scalable 3D Plant and Environmental Biophysical Modeling Framework},      
	
JOURNAL={Frontiers in Plant Science},      
	
VOLUME={10},      
	
YEAR={2019},      
	  
URL={https://www.frontiersin.org/article/10.3389/fpls.2019.01185},       
	
DOI={10.3389/fpls.2019.01185},      
	
ISSN={1664-462X},   
   
ABSTRACT={This article presents an overview of Helios, a new three-dimensional (3D) plant and environmental modeling framework. Helios is a model coupling framework designed to provide maximum flexibility in integrating and running arbitrary 3D environmental system models. Users interact with Helios through a well-documented open-source C++ API. Version 1.0 comes with mod

<AgMLDataLoader: (dataset=grape_detection_syntheticday, task=object_detection, images=448) at 0xa6f80c55420>

In [11]:
coco_to_yolo(
    coco_json_path="../datasets/grape_detection_syntheticday/annotations.json", 
    images_dir="../datasets/grape_detection_syntheticday/images",
    output_dir="../datasets/grape_detection_syntheticday/reformatted"
)

Processing train:   0%|          | 0/268 [00:00<?, ?it/s]

Processing train: 100%|██████████| 268/268 [00:01<00:00, 136.11it/s]
Processing val: 100%|██████████| 67/67 [00:00<00:00, 130.62it/s]
Processing test: 100%|██████████| 113/113 [00:00<00:00, 141.08it/s]

Dataset conversion and splitting completed!





# Target Domain (Real Grape)

In [12]:
# install dataset
agml.data.AgMLDataLoader(
    'grape_detection_californiaday', 
    dataset_path='../datasets/grape_detection_californiaday'
)

Downloading grape_detection_californiaday (size = 359.6 MB): 359653376it [00:07, 45528804.61it/s]                               _detection_californiaday.


[AgML Download]: Extracting files for grape_detection_californiaday... Done!

You have just downloaded [1mgrape_detection_californiaday[0m.

This dataset has [1mno license[0m.

When using this dataset, please cite the following:

@misc{GrapeDay,
  author    = {Plant AI and Biophysics Lab},
  title     = {Grape Detection 2019 Day},
  year      = {2019},
  url       = {https://github.com/plant-ai-biophysics-lab/AgML} 
 

You can find additional information about this dataset at:


This message will [1mnot[0m be automatically shown
again. To view this message again, in an AgMLDataLoader
run `loader.info.citation_summary()`. Otherwise, you
can use `agml.data.source(<name>).citation_summary().`

You can find your dataset at /group/jmearlesgrp/scratch/eranario/AGILE/datasets/grape_detection_californiaday.


<AgMLDataLoader: (dataset=grape_detection_californiaday, task=object_detection, images=126) at 0xa6f7b55b790>

In [13]:
coco_to_yolo(
    coco_json_path="../datasets/grape_detection_californiaday/annotations.json", 
    images_dir="../datasets/grape_detection_californiaday/images",
    output_dir="../datasets/grape_detection_californiaday/reformatted"
)

Processing train: 100%|██████████| 75/75 [00:01<00:00, 68.58it/s]
Processing val: 100%|██████████| 18/18 [00:00<00:00, 104.10it/s]
Processing test: 100%|██████████| 33/33 [00:00<00:00, 97.89it/s] 

Dataset conversion and splitting completed!



