In [1]:
pip install opencv-python tqdm





In [3]:
import os
import cv2
from tqdm import tqdm

INPUT_ROOT = "S:/Summer25/MineDataset/Annotation_Done/All_together"            # your YOLO dataset
OUTPUT_ROOT = "S:/Summer25/MineDataset/Annotation_Done/All_together"       # classification dataset
NUM_CLASSES = 13

IMG_EXTS = [".jpg", ".png", ".jpeg"]

def yolo_to_bbox(yolo_line, img_w, img_h):
    cls, xc, yc, w, h = map(float, yolo_line.split())

    cls = int(cls)

    xc *= img_w
    yc *= img_h
    w *= img_w
    h *= img_h

    x1 = int(xc - w/2)
    y1 = int(yc - h/2)
    x2 = int(xc + w/2)
    y2 = int(yc + h/2)

    return cls, x1, y1, x2, y2

def process_split(split):
    input_dir = os.path.join(INPUT_ROOT, split)
    output_dir = os.path.join(OUTPUT_ROOT, split)

    os.makedirs(output_dir, exist_ok=True)

    # Create class folders
    for c in range(NUM_CLASSES):
        os.makedirs(os.path.join(output_dir, str(c)), exist_ok=True)

    images = [f for f in os.listdir(input_dir)
              if os.path.splitext(f)[1].lower() in IMG_EXTS]

    counter = 0

    for img_name in tqdm(images):

        img_path = os.path.join(input_dir, img_name)
        label_path = os.path.join(
            input_dir,
            os.path.splitext(img_name)[0] + ".txt"
        )

        if not os.path.exists(label_path):
            continue

        image = cv2.imread(img_path)
        if image is None:
            continue

        h, w = image.shape[:2]

        with open(label_path, "r") as f:
            lines = f.readlines()

        for i, line in enumerate(lines):

            cls, x1, y1, x2, y2 = yolo_to_bbox(line, w, h)

            # clamp bbox
            x1 = max(0, x1)
            y1 = max(0, y1)
            x2 = min(w, x2)
            y2 = min(h, y2)

            crop = image[y1:y2, x1:x2]

            if crop.size == 0:
                continue

            # resize to MNIST-style size
            crop = cv2.resize(crop, (32, 32))

            save_path = os.path.join(
                output_dir,
                str(cls),
                f"{split}_{counter}.jpg"
            )

            cv2.imwrite(save_path, crop)
            counter += 1

    print(f"{split} done. Total crops:", counter)

# Run conversion
process_split("train")
process_split("val")


100%|██████████| 2518/2518 [02:11<00:00, 19.18it/s]


train done. Total crops: 2774


100%|██████████| 633/633 [00:32<00:00, 19.53it/s]

val done. Total crops: 702



