In [1]:
import os
import shutil
import random
from sklearn.model_selection import train_test_split

In [17]:
def segregate_dataset(input_dir, output_dir, test_size=0.2, val_size=0.1, random_seed=42):
    files_and_dirs = os.listdir(input_dir)
    classes = [d for d in files_and_dirs if os.path.isdir(os.path.join(input_dir, d))]
    for class_name in classes:
        if class_name == '.ipynb_checkpoints':
            continue
        class_dir = os.path.join(input_dir, class_name)
        images = os.listdir(os.path.join(class_dir, 'images'))
        masks = os.listdir(os.path.join(class_dir, 'masks'))
        
        # Split images and masks into train, validation, and test sets
        train_images, test_images, train_masks, test_masks = train_test_split(
            images, masks, test_size=test_size, random_state=random_seed)
        train_images, val_images, train_masks, val_masks = train_test_split(
            train_images, train_masks, test_size=val_size/(1-test_size), random_state=random_seed)
        
        # Create directories for train, validation, and test sets
        train_dir = os.path.join(output_dir, 'train', class_name)
        val_dir = os.path.join(output_dir, 'validation', class_name)
        test_dir = os.path.join(output_dir, 'test', class_name)
        os.makedirs(train_dir, exist_ok=True)
        os.makedirs(val_dir, exist_ok=True)
        os.makedirs(test_dir, exist_ok=True)
        
        # Move images and masks to respective directories
        for image in train_images:
            shutil.copy(os.path.join(class_dir, 'images', image), os.path.join(train_dir, image))
            #shutil.copy(os.path.join(class_dir, 'masks', image), os.path.join(train_dir, image))
        for image in val_images:
            shutil.copy(os.path.join(class_dir, 'images', image), os.path.join(val_dir, image))
            #shutil.copy(os.path.join(class_dir, 'masks', image), os.path.join(val_dir, image))
        for image in test_images:
            shutil.copy(os.path.join(class_dir, 'images', image), os.path.join(test_dir, image))
            #shutil.copy(os.path.join(class_dir, 'masks', image), os.path.join(test_dir, image))

In [18]:
# Example usage
input_directory = 'COVID-19_Radiography_Dataset'
output_directory = 'covid_data'
segregate_dataset(input_directory, output_directory, test_size=0.2, val_size=0.1, random_seed=42)