In [18]:

import json
from pathlib import Path

# from piglegcv.debug_stitch_split import labels

# ====== KONFIGURACE ======
# coco_json = Path("/content/pilsen-toys/annotations/instances_default.json")
# images_root = Path("/content/pilsen-toys/images")  # kde jsou obrázky
# output_dir = Path("/content/pilsen-toys/labels")

dataset_root = Path("~/Downloads/pigleg_maybe_viktora_single_image_detector").expanduser()
coco_json = Path(dataset_root / "annotations/instances_default.json").expanduser()
images_root = Path(dataset_root / "images")
labels_root = Path(dataset_root / "labels")

assert coco_json.exists(), f"COCO JSON soubor nenalezen: {coco_json}"

In [14]:


# Convert json-coco to ultralytics format

labels_root.mkdir(parents=True, exist_ok=True)

# ====== NAČTENÍ ======
with coco_json.open() as f:
    coco = json.load(f)

# Vytvoříme mapu image_id -> image info
image_map = {img["id"]: img for img in coco["images"]}

# Uložíme YOLO labely
count = 0
for ann in coco["annotations"]:
    img_info = image_map[ann["image_id"]]
    img_path = Path(img_info["file_name"])

    # pokud je file_name relativní, přidej prefix
    if not img_path.is_absolute():
        img_path = images_root / img_path

    if not img_path.exists():
        print(f"⚠️ Obrázek nenalezen: {img_path}")
        continue

    # COCO bbox: [x, y, w, h]
    x, y, w, h = ann["bbox"]
    img_w, img_h = img_info["width"], img_info["height"]

    # YOLO formát: x_center, y_center, width, height (relativní 0-1)
    x_center = (x + w / 2) / img_w
    y_center = (y + h / 2) / img_h
    w_rel = w / img_w
    h_rel = h / img_h

    class_id = ann["category_id"] - 1  # COCO obvykle začíná id=1, YOLO čeká 0-based

    # připravíme řádek
    line = f"{class_id} {x_center:.6f} {y_center:.6f} {w_rel:.6f} {h_rel:.6f}\n"

    # txt file má stejný název jako obrázek, ale s příponou .txt
    rel_img_parent_path = img_path.parent.relative_to(images_root)
    label_file = labels_root / rel_img_parent_path / (img_path.stem + ".txt")
    label_file.parent.mkdir(parents=True, exist_ok=True)
    # label_file = output_dir / (img_path.stem + ".txt")
    with label_file.open("a") as f:
        f.write(line)

    count += 1

print(f"✅ Hotovo! Uloženo {count} anotací do složky {labels_root}")

✅ Hotovo! Uloženo 5690 anotací do složky C:\Users\Jirik\Downloads\pigleg_maybe_viktora_single_image_detector\labels


In [16]:
# Convert PNG to JPG

from pathlib import Path
from PIL import Image
import os
def pngs_to_jpgs(images_root: Path, delete_png: bool = False):
    """Konvertuje všechny PNG obrázky ve složce (rekurzivně) na JPG."""
    converted_count = 0

    # PNG and png files
    png_iterator = [p for p in images_root.rglob("*") if p.suffix.lower() == ".png"]

    for png_path in png_iterator:
        jpg_path = png_path.with_suffix(".jpg")

        # Otevři a konvertuj
        with Image.open(png_path) as im:
            if im.mode in ("RGBA", "LA"):  # má alfa kanál
                # sloučíme na bílý podklad
                background = Image.new("RGB", im.size, (255, 255, 255))
                background.paste(im, mask=im.getchannel("A"))
                rgb_im = background
            else:
                rgb_im = im.convert("RGB")

            rgb_im.save(jpg_path, format="JPEG", quality=90)
            converted_count += 1

        # (volitelně) smaž původní PNG
        if delete_png:
            os.remove(png_path)
            # print(f"Smazán {png_path}")
        # os.remove(png_path)

    print(f"✅ Převedeno {converted_count} PNG souborů na JPG.")


pngs_to_jpgs(images_root, delete_png=True)

✅ Převedeno 2287 PNG souborů na JPG.


In [19]:
# Remove image files without labels
removed = 0

for img_path in images_root.rglob("*.*"):
    if img_path.suffix.lower() not in [".jpg", ".jpeg", ".png"]:
        continue  # přeskoč neobrázkové soubory

    label_path = labels_root / img_path.relative_to(images_root)
    label_path = label_path.with_suffix(".txt")

    if not label_path.exists():
        img_path.unlink()
        removed += 1
        continue

    # pokud existuje, zkontrolujeme jestli není prázdný
    if label_path.stat().st_size == 0:
        img_path.unlink()
        label_path.unlink()  # smažeme i prázdný label
        removed += 1

print(f"✅ Odstraněno {removed} obrázků bez anotací.")


✅ Odstraněno 1156 obrázků bez anotací.


# Train, val, test split

In [20]:

import json, random, shutil
from pathlib import Path

# ===== KONFIGURACE =====
# images_root = Path("/content/pilsen-toys/images")
# labels_root = Path("/content/pilsen-toys/labels")  # složka s .txt soubory
output_root = Path("~/Downloads/suran_sid/").expanduser()
names = ["Dog", "Sun", "Zebra", "Flute"]  # přizpůsob svým třídám
output_root.mkdir(parents=True, exist_ok=True)

# Podíl dat pro train/val/test
split_ratios = {"train": 0.7, "val": 0.2, "test": 0.1}
random.seed(42)

# ===== NAČTENÍ SOUBORŮ =====
image_files = list(images_root.glob("*.jpg"))
random.shuffle(image_files)

n_total = len(image_files)
n_train = int(n_total * split_ratios["train"])
n_val = int(n_total * split_ratios["val"])

splits = {
    "train": image_files[:n_train],
    "val": image_files[n_train:n_train+n_val],
    "test": image_files[n_train+n_val:]
}

print(f"Dataset má {n_total} obrázků → train: {len(splits['train'])}, val: {len(splits['val'])}, test: {len(splits['test'])}")

# ===== KOPÍROVÁNÍ =====
for split, files in splits.items():
    img_out = output_root / "images" / split
    lbl_out = output_root / "labels" / split
    img_out.mkdir(parents=True, exist_ok=True)
    lbl_out.mkdir(parents=True, exist_ok=True)

    for img_path in files:
        # kopíruj obrázek
        shutil.copy2(img_path, img_out / img_path.name)

        # kopíruj odpovídající label, pokud existuje
        lbl_path = labels_root / (img_path.stem + ".txt")
        if lbl_path.exists():
            shutil.copy2(lbl_path, lbl_out / lbl_path.name)

print("✅ Dataset rozdělen a zkopírován.")

# ===== YAML =====
yaml_path = output_root / "dataset.yaml"

with open(yaml_path, "w") as f:
    f.write(f"train: {output_root / 'images/train'}\n")
    f.write(f"val: {output_root / 'images/val'}\n")
    f.write(f"test: {output_root / 'images/test'}\n\n")
    f.write("names:\n")
    for i, name in enumerate(names):
        f.write(f"  {i}: {name}\n")

print(f"✅ YAML uložen do {yaml_path}")

Dataset má 1286 obrázků → train: 900, val: 257, test: 129
✅ Dataset rozdělen a zkopírován.
✅ YAML uložen do C:\Users\Jirik\Downloads\suran_sid\dataset.yaml
