In [1]:
import os
import random
import shutil
from PIL import Image
from tqdm import tqdm  # Import tqdm for progress bar

In [9]:
# Set the root directory of your dataset
root_dir = "RawDataset"
train_path = "Datasets/train"
val_path = "Datasets/val"
# Set the percentage of images to use for each set
train_ratio = 0.8
train_ratio = 0.8  # 80% for training
target_size = (224, 224)  # Adjust as needed


In [10]:

# Create destination folders
os.makedirs(train_path, exist_ok=True)
os.makedirs(val_path, exist_ok=True)

# Process each class folder
for class_folder in os.listdir(root_dir):
    class_folder_path = os.path.join(root_dir, class_folder)
    train_class_path = os.path.join(train_path, class_folder)
    val_class_path = os.path.join(val_path, class_folder)
    
    # Create class folders if they don't exist
    os.makedirs(train_class_path, exist_ok=True)
    os.makedirs(val_class_path, exist_ok=True)
    
    # Get and shuffle image list
    images = os.listdir(class_folder_path)
    random.shuffle(images)
    
    # Split into training and validation sets
    split_idx = int(len(images) * train_ratio)
    train_images = images[:split_idx]
    val_images = images[split_idx:]
    
    # Process training images
    for image in tqdm(train_images, desc=f"Training images - {class_folder}", unit="image"):
        image_path = os.path.join(class_folder_path, image)
        img = Image.open(image_path)
        
        # Convert RGBA to RGB if necessary
        if img.mode == 'RGBA':
            img = img.convert('RGB')
            
        # Resize image
        img = img.resize(target_size)
        
        # Save to training folder
        img.save(os.path.join(train_class_path, image))
    
    # Process validation images
    for image in tqdm(val_images, desc=f"Validation images - {class_folder}", unit="image"):
        image_path = os.path.join(class_folder_path, image)
        img = Image.open(image_path)
        
        if img.mode == 'RGBA':
            img = img.convert('RGB')
            
        img = img.resize(target_size)
        img.save(os.path.join(val_class_path, image))

print("Dataset split and resized successfully!")

Training images - Normal: 100%|████████████████████████████████████████████████| 4000/4000 [00:29<00:00, 136.87image/s]
Validation images - Normal: 100%|██████████████████████████████████████████████| 1000/1000 [00:06<00:00, 146.47image/s]
Training images - Tumor: 100%|█████████████████████████████████████████████████| 4000/4000 [00:29<00:00, 136.21image/s]
Validation images - Tumor: 100%|███████████████████████████████████████████████| 1000/1000 [00:07<00:00, 139.14image/s]

Dataset split and resized successfully!



