In [None]:
#Dataset Builder from Renders Folder
import os
import shutil
import random
from pathlib import Path

def build_yolo_dataset_from_renders(renders_dir, output_dir, val_split=0.1, test_split=0.1, image_exts=(".png", ".jpg", ".jpeg")):
    renders_dir = Path(renders_dir)
    output_dir = Path(output_dir)
    train_img_dir = output_dir / "images" / "train"
    val_img_dir = output_dir / "images" / "val"
    test_img_dir = output_dir / "images" / "test"
    train_lbl_dir = output_dir / "labels" / "train"
    val_lbl_dir = output_dir / "labels" / "val"
    test_lbl_dir = output_dir / "labels" / "test"

    for d in [train_img_dir, val_img_dir, test_img_dir, train_lbl_dir, val_lbl_dir, test_lbl_dir]:
        d.mkdir(parents=True, exist_ok=True)

    class_names = sorted([d.name for d in renders_dir.iterdir() if d.is_dir()])
    class_map = {name: idx for idx, name in enumerate(class_names)}

    print("Найденные классы спутников:")
    for k, v in class_map.items():
        print(f"  {v}: {k}")

    all_pairs = []
    for sat_name in class_names:
        image_folder = renders_dir / sat_name / "image"
        label_folder = renders_dir / sat_name / "mask_det"
        if not image_folder.exists() or not label_folder.exists():
            print(f"Пропущен класс {sat_name} — нет image/ или mask_det/")
            continue

        image_files = sorted([f for f in image_folder.iterdir() if f.suffix.lower() in image_exts])
        total = len(image_files)
        half = total // 2

        first_half = image_files[:half]
        second_half = image_files[half:]

        def collect_annotated_pairs(image_list):
            pairs = []
            for img_file in image_list:
                label_file = label_folder / (img_file.stem + ".txt")
                if label_file.exists():
                    pairs.append((img_file, label_file))
            return pairs

        part1 = collect_annotated_pairs(first_half)
        part2 = collect_annotated_pairs(second_half)

        all_pairs.extend([(sat_name, pair) for pair in part1 + part2])

    print(f"\nВсего валидных пар изображение+боксы: {len(all_pairs)}")

    random.shuffle(all_pairs)

    val_size = int(len(all_pairs) * val_split)
    test_size = int(len(all_pairs) * test_split)
    train_size = len(all_pairs) - val_size - test_size

    val_pairs = all_pairs[:val_size]
    test_pairs = all_pairs[val_size:val_size + test_size]
    train_pairs = all_pairs[val_size + test_size:]

    def copy_pairs(pairs, img_dest, lbl_dest, start_idx=0):
        counter = start_idx
        for _, (img_src, lbl_src) in pairs:
            new_name = f"{counter:06d}"
            new_img = img_dest / (new_name + img_src.suffix.lower())
            new_lbl = lbl_dest / (new_name + ".txt")
            shutil.copyfile(img_src, new_img)
            shutil.copyfile(lbl_src, new_lbl)
            counter += 1
        return counter

    idx = copy_pairs(train_pairs, train_img_dir, train_lbl_dir, start_idx=0)
    idx = copy_pairs(val_pairs, val_img_dir, val_lbl_dir, start_idx=idx)
    _ = copy_pairs(test_pairs, test_img_dir, test_lbl_dir, start_idx=idx)

    yaml_path = output_dir / "data.yaml"
    with open(yaml_path, "w") as f:
        f.write(f"path: {output_dir.as_posix()}\n")
        f.write("train: images/train\n")
        f.write("val: images/val\n")
        f.write("test: images/test\n")
        f.write("\nnames:\n")
        for idx, name in enumerate(class_names):
            f.write(f"  {idx}: {name}\n")

    print(f"\nДатасет готов в: {output_dir}")
    print(f"data.yaml сохранён: {yaml_path}")

In [4]:
# run builder
build_yolo_dataset_from_renders(
    renders_dir=r"D:/Paper_1/renders",
    output_dir=r"D:/Paper_1/dataset_detect",
    val_split=0.1,
    test_split=0.1
)


🛰 Найденные классы спутников:
  0: acrimsat
  1: aqua
  2: aquaris
  3: hubble
  4: icesat2
  5: starlink

📸 Всего валидных пар изображение+боксы: 8302

✅ Датасет готов в: D:\Paper_1\dataset_detect
📄 data.yaml сохранён: D:\Paper_1\dataset_detect\data.yaml
