In [26]:
import os
import random
import shutil

# Define dataset structure
classes = ["human", "cat", "dog"]
train_samples = 5000  # Per class
test_samples = 1000    # Per class

# Paths
original_train_dir = r"D:\sdp\ll-first\dataset\training"
original_test_dir = r"D:\sdp\ll-first\dataset\test"
output_dir = r"D:\sdp\redd"  # Save reduced dataset in "redd"

In [27]:

# Function to get all images, including inside subfolders
def get_all_images(folder):
    image_list = []
    for root, _, files in os.walk(folder):  # Recursively check subfolders
        for file in files:
            if file.endswith(('.png', '.jpg', '.jpeg')):
                image_list.append(os.path.join(root, file))
    return image_list

# Function to reduce dataset
def reduce_dataset(src_folder, dest_folder, num_samples):
    os.makedirs(dest_folder, exist_ok=True)

    for cls in classes:
        src_cls_folder = os.path.join(src_folder, cls)
        dest_cls_folder = os.path.join(dest_folder, cls)
        os.makedirs(dest_cls_folder, exist_ok=True)

        # Get all images, including from subfolders
        images = get_all_images(src_cls_folder)

        # Use min to avoid errors if not enough images
        sample_size = min(len(images), num_samples)

        if len(images) < num_samples:
            print(f"⚠ Warning: {cls} has only {len(images)} images, reducing sample size.")

        # Select random images
        selected_images = random.sample(images, sample_size)

        # Copy selected images to destination folder
        for img in selected_images:
            shutil.copy(img, os.path.join(dest_cls_folder, os.path.basename(img)))

        print(f"✔ {cls}: {sample_size} images copied to {dest_cls_folder}")

# Process training and testing datasets
print("Processing training dataset...")
reduce_dataset(original_train_dir, os.path.join(output_dir, "train"), train_samples)

print("Processing testing dataset...")
reduce_dataset(original_test_dir, os.path.join(output_dir, "test"), test_samples)

print("\n✅ Dataset reduction completed successfully!")


Processing training dataset...
✔ human: 5000 images copied to D:\sdp\redd\train\human
✔ cat: 5000 images copied to D:\sdp\redd\train\cat
✔ dog: 5000 images copied to D:\sdp\redd\train\dog
Processing testing dataset...
✔ human: 1000 images copied to D:\sdp\redd\test\human
✔ cat: 1000 images copied to D:\sdp\redd\test\cat
✔ dog: 1000 images copied to D:\sdp\redd\test\dog

✅ Dataset reduction completed successfully!
