In [5]:
import os
import random
import shutil
import cv2
import numpy as np

def augment_image(img):
    # Random grayscale
    if random.random() < 0.7:
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)

    # Random blur
    if random.random() < 0.7:
        k = random.choice([3, 5])
        img = cv2.GaussianBlur(img, (k, k), 0)

    # Random brightness
    if random.random() < 0.7:
        hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        hsv[:, :, 2] = np.clip(hsv[:, :, 2] * random.uniform(0.7, 1.3), 0, 255)
        img = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)

    return img

def prepare_yolo_dataset(open_folder, do_not_open_folder, output_base, split_ratio=0.8):
    # Make folders
    images_train = os.path.join(output_base, "images/train")
    images_val = os.path.join(output_base, "images/val")
    labels_train = os.path.join(output_base, "labels/train")
    labels_val = os.path.join(output_base, "labels/val")
    
    for folder in [images_train, images_val, labels_train, labels_val]:
        os.makedirs(folder, exist_ok=True)

    # Process folders
    all_data = []
    for folder, label in [(open_folder, 0), (do_not_open_folder, 1)]:
        images = [f for f in os.listdir(folder) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
        for img in images:
            all_data.append((os.path.join(folder, img), label))
    
    random.shuffle(all_data)
    split_idx = int(len(all_data) * split_ratio)
    train_data = all_data[:split_idx]
    val_data = all_data[split_idx:]

    for dataset, img_folder, label_folder, augment in [
        (train_data, images_train, labels_train, True),
        (val_data, images_val, labels_val, False)
    ]:
        for img_path, class_id in dataset:
            base_name = os.path.splitext(os.path.basename(img_path))[0]

            # Copy original image
            dst_img_path = os.path.join(img_folder, base_name + ".jpg")
            shutil.copy(img_path, dst_img_path)

            # Write label
            dst_label_path = os.path.join(label_folder, base_name + ".txt")
            with open(dst_label_path, 'w') as f:
                f.write(f"{class_id} 0.5 0.5 1.0 1.0\n")

            # Augmented image (only in training)
            if augment:
                img = cv2.imread(img_path)
                if img is not None:
                    aug_img = augment_image(img)
                    aug_name = base_name + "_aug"
                    cv2.imwrite(os.path.join(img_folder, aug_name + ".jpg"), aug_img)

                    with open(os.path.join(label_folder, aug_name + ".txt"), 'w') as f:
                        f.write(f"{class_id} 0.5 0.5 1.0 1.0\n")

    print(f"✅ Dataset prepared with {len(train_data)} train (plus augmented) and {len(val_data)} val images.")

In [6]:
open_folder = "data/open"
do_not_open_folder = "data/do_not_open"
output_base = "datasets/catflap"

prepare_yolo_dataset(open_folder, do_not_open_folder, output_base)

✅ Dataset prepared with 63 train (plus augmented) and 16 val images.


In [None]:
# shutil.rmtree(
#     os.path.join(
#         output_base,
#         "images"
#     )
# )

# shutil.rmtree(
#     os.path.join(
#         output_base,
#         "labels"
#     )
# )