In [None]:
import shutil
from pathlib import Path
import cv2
import os

In [None]:
mvtec_path = Path("../data/capsule")  

In [None]:

if Path("capsule_yolo").exists():
    shutil.rmtree("capsule_yolo", ignore_errors=True)

out = Path("capsule_yolo")
(images := out / "images").mkdir(parents=True)
(labels := out / "labels").mkdir()
for split in ["train", "val"]:
    (images / split).mkdir()
    (labels / split).mkdir()

print("Creating dataset from:", mvtec_path)

# GOOD capsules (train)
for i, p in enumerate((mvtec_path / "train" / "good").glob("*.png")):
    dst = images / "train" / f"good_train_{i:04d}.png"
    shutil.copy(p, dst)
    (labels / "train" / dst.with_suffix(".txt").name).touch()

# GOOD capsules (val)
for i, p in enumerate((mvtec_path / "test" / "good").glob("*.png")):
    dst = images / "val" / f"good_val_{i:04d}.png"
    shutil.copy(p, dst)
    (labels / "val" / dst.with_suffix(".txt").name).touch()



Creating dataset from: ..\data\capsule
Train images : 219
Val images   : 132


In [None]:
for defect in ["crack", "faulty_imprint", "poke", "scratch", "squeeze"]:
    defect_path = mvtec_path / "test" / defect
    if not defect_path.exists():
        continue
    for img_path in defect_path.glob("*.png"):
        # Copy image with UNIQUE name to avoid any conflict
        new_name = f"{defect}_{img_path.name}"
        shutil.copy(img_path, images / "val" / new_name)
        
        # Create segmentation label
        mask_path = mvtec_path / "ground_truth" / defect / (img_path.stem + "_mask.png")
        label_file = labels / "val" / Path(new_name).with_suffix(".txt")
        
        if mask_path.exists():
            mask = cv2.imread(str(mask_path), 0)
            contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
            h, w = mask.shape
            lines = []
            for cnt in contours:
                if cv2.contourArea(cnt) < 30: 
                    continue
                cnt = cnt.flatten().tolist()
                seg = [cnt[i]/w if i%2==0 else cnt[i]/h for i in range(len(cnt))]
                lines.append("1 " + " ".join(f"{x:.6f}" for x in seg))
            with open(label_file, "w") as f:
                f.write("\n".join(lines)+"\n")
        else:
            with open(label_file, "w") as f:
                f.write("1 0.5 0.5 1.0 1.0\n")


