In [1]:
import os
import random
import shutil
import cv2
import numpy as np

def augment_image(img):
    if random.random() < 0.7:
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
    if random.random() < 0.7:
        k = random.choice([3, 5])
        img = cv2.GaussianBlur(img, (k, k), 0)
    if random.random() < 0.7:
        hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        hsv[:, :, 2] = np.clip(hsv[:, :, 2] * random.uniform(0.7, 1.3), 0, 255)
        img = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
    return img

def prepare_classification_dataset(open_folder, do_not_open_folder, output_base, split_ratio=0.8):
    # Create output directories
    train_open = os.path.join(output_base, "train/open")
    train_do_not = os.path.join(output_base, "train/do_not_open")
    val_open = os.path.join(output_base, "val/open")
    val_do_not = os.path.join(output_base, "val/do_not_open")

    for folder in [train_open, train_do_not, val_open, val_do_not]:
        os.makedirs(folder, exist_ok=True)

    # Collect and shuffle data
    all_data = []
    for folder, label in [(open_folder, "open"), (do_not_open_folder, "do_not_open")]:
        images = [f for f in os.listdir(folder) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
        for img in images:
            all_data.append((os.path.join(folder, img), label))
    
    random.shuffle(all_data)
    split_idx = int(len(all_data) * split_ratio)
    train_data = all_data[:split_idx]
    val_data = all_data[split_idx:]

    for dataset, folder_map, augment in [
        (train_data, {"open": train_open, "do_not_open": train_do_not}, True),
        (val_data, {"open": val_open, "do_not_open": val_do_not}, False)
    ]:
        for img_path, label in dataset:
            base_name = os.path.splitext(os.path.basename(img_path))[0]
            dst_path = os.path.join(folder_map[label], base_name + ".jpg")
            shutil.copy(img_path, dst_path)

            if augment:
                img = cv2.imread(img_path)
                if img is not None:
                    aug_img = augment_image(img)
                    aug_name = base_name + "_aug.jpg"
                    aug_path = os.path.join(folder_map[label], aug_name)
                    cv2.imwrite(aug_path, aug_img)

    print(f"✅ Classification dataset prepared with {len(train_data)} train and {len(val_data)} val images (plus augmentations).")

In [None]:
open_folder = "../../data/open"
do_not_open_folder = "../../data/do_not_open"
output_base = "../../datasets/catflap"

prepare_classification_dataset(open_folder, do_not_open_folder, output_base)

✅ Classification dataset prepared with 63 train and 16 val images (plus augmentations).


In [None]:
# shutil.rmtree(
#     os.path.join(
#         output_base,
#         "images"
#     )
# )

# shutil.rmtree(
#     os.path.join(
#         output_base,
#         "labels"
#     )
# )