In [None]:
import os
import shutil
import random
import yaml
from pathlib import Path
from tqdm import tqdm

def prepare_yolo_segmentation_dataset(input_dir, output_dir, val_split=0.1, test_split=0.1):

    image_dir = Path(input_dir) / "image"
    mask_dir = Path(input_dir) / "mask_seg"
    
    assert image_dir.exists() and mask_dir.exists(), "Папки 'image' и 'mask_seg' не найдены!"

    pairs = []
    for img_file in os.listdir(image_dir):
        if img_file.lower().endswith((".png", ".jpg", ".jpeg")):
            base_name = os.path.splitext(img_file)[0]
            mask_file = f"{base_name}.txt"
            if os.path.exists(mask_dir / mask_file):
                pairs.append((image_dir / img_file, mask_dir / mask_file))
    
    print(f"Найдено пар: {len(pairs)}")

    random.shuffle(pairs)

    n_total = len(pairs)
    n_val = int(n_total * val_split)
    n_test = int(n_total * test_split)
    n_train = n_total - n_val - n_test

    splits = {
        "train": pairs[:n_train],
        "val": pairs[n_train:n_train + n_val],
        "test": pairs[n_train + n_val:]
    }

    for split in splits:
        for sub in ["images", "labels"]:
            Path(output_dir, split, sub).mkdir(parents=True, exist_ok=True)

    for split, items in splits.items():
        for img_path, txt_path in tqdm(items, desc=f"📦 Копируем {split}"):
            shutil.copy(img_path, Path(output_dir, split, "images", img_path.name))
            shutil.copy(txt_path, Path(output_dir, split, "labels", txt_path.name))

    data_yaml = {
        "train": str(Path(output_dir) / "train" / "images").replace("\\", "/"),
        "val": str(Path(output_dir) / "val" / "images").replace("\\", "/"),
        "test": str(Path(output_dir) / "test" / "images").replace("\\", "/"),
        "nc": 3,
        "names": ["base", "solar_panel", "payload"]
    }

    with open(Path(output_dir) / "data.yaml", "w") as f:
        yaml.dump(data_yaml, f)

    print(f"\nДатасет готов. YAML сохранён в: {Path(output_dir) / 'data.yaml'}")


In [None]:
input_dir = r"D:\Paper_1\renders\icesat2"
output_dir = r"D:\Paper_1\segmentation\datasets\icesat2"

prepare_yolo_segmentation_dataset(input_dir, output_dir)


🔢 Найдено пар: 1400


📦 Копируем train: 100%|██████████| 1120/1120 [00:24<00:00, 46.13it/s]
📦 Копируем val: 100%|██████████| 140/140 [00:02<00:00, 51.16it/s]
📦 Копируем test: 100%|██████████| 140/140 [00:02<00:00, 51.64it/s]


✅ Датасет готов. YAML сохранён в: D:\Paper_1\segmentation\datasets\icesat2\data.yaml



