In [6]:
import os
import shutil
import random
from collections import defaultdict
from sklearn.model_selection import train_test_split

# Set paths
image_folder = "archive\images"  # Folder containing all images
output_folder = "dataset_split"  # Where train/test images will be saved

# Create train/test directories
train_dir = os.path.join(output_folder, "train")
test_dir = os.path.join(output_folder, "test")
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

# Step 1: Group images by class
class_images = defaultdict(list)

for filename in os.listdir(image_folder):
    if filename.endswith(".png"):
        class_label = int(filename.split("_")[0])  # Extract class name before '_'
        class_images[class_label].append(filename)

# Step 2: Split each class into train & test
train_ratio = 0.8  # 80% training, 20% testing

for cl, images in class_images.items():
    train_images, test_images = train_test_split(images, train_size=train_ratio, random_state=42, stratify=None)

    class_label = str(cl)  # Convert class label to string for folder naming
    # Create class-specific subfolders
    os.makedirs(os.path.join(train_dir, class_label), exist_ok=True)
    os.makedirs(os.path.join(test_dir, class_label), exist_ok=True)

    # Move images to respective folders
    for img in train_images:
        shutil.copy(os.path.join(image_folder, img), os.path.join(train_dir, class_label, img))

    for img in test_images:
        shutil.copy(os.path.join(image_folder, img), os.path.join(test_dir, class_label, img))

print("Dataset split completed!")

  image_folder = "archive\images"  # Folder containing all images


Dataset split completed!
