In [1]:
import os
import shutil
import random

# Set paths
source_dir = "breast_ultrasound_images"
target_dir = "breast_cancer_dataset"

# Classes to include (ignore 'normal' for now)
classes = ["benign", "malignant"]

# Split ratios
train_split = 0.7
val_split = 0.15
test_split = 0.15

# Create target folder structure
for split in ["train", "val", "test"]:
    for cls in classes:
        os.makedirs(os.path.join(target_dir, split, cls), exist_ok=True)

# Function to split and copy
def split_and_copy(class_name):
    src_folder = os.path.join(source_dir, class_name)
    images = os.listdir(src_folder)
    random.shuffle(images)
    
    n_total = len(images)
    n_train = int(train_split * n_total)
    n_val = int(val_split * n_total)

    train_files = images[:n_train]
    val_files = images[n_train:n_train + n_val]
    test_files = images[n_train + n_val:]

    # Copy files
    for file_list, split in [(train_files, "train"), (val_files, "val"), (test_files, "test")]:
        for fname in file_list:
            src_path = os.path.join(src_folder, fname)
            dst_path = os.path.join(target_dir, split, class_name, fname)
            shutil.copy(src_path, dst_path)

# Apply to each class
for cls in classes:
    split_and_copy(cls)

print("✅ Dataset reorganized successfully!")


✅ Dataset reorganized successfully!
