In [51]:
import os
import random
import shutil

# Set the path of the main directory containing the class subdirectories
main_dir = 'src\dataset\plantvillage\PlantVillage'

# Set the path of the output directory where the train, test, and validation sets will be saved
output_dir = 'src\dataset'

# Set the sizes of the train, test, and validation sets as a percentage of the total dataset size
train_size = 0.7
test_size = 0.2
val_size = 0.1

# Create the output directories if they don't already exist
os.makedirs(output_dir, exist_ok=True)
os.makedirs(os.path.join(output_dir, 'train'), exist_ok=True)
os.makedirs(os.path.join(output_dir, 'test'), exist_ok=True)
os.makedirs(os.path.join(output_dir, 'val'), exist_ok=True)

# Loop through each class subdirectory in the main directory
for class_dir in os.listdir(main_dir):
    if os.path.isdir(os.path.join(main_dir, class_dir)):
        # Create the subdirectories for the current class in the output directories
        os.makedirs(os.path.join(output_dir, 'train', class_dir), exist_ok=True)
        os.makedirs(os.path.join(output_dir, 'test', class_dir), exist_ok=True)
        os.makedirs(os.path.join(output_dir, 'val', class_dir), exist_ok=True)

        # Get a list of all the image filenames in the current class subdirectory
        image_filenames = os.listdir(os.path.join(main_dir, class_dir))

        # Shuffle the image filenames randomly
        random.shuffle(image_filenames)

        # Split the image filenames into training, testing, and validation sets
        num_images = len(image_filenames)
        num_train = int(num_images * train_size)
        num_test = int(num_images * test_size)
        num_val = num_images - num_train - num_test

        train_filenames = image_filenames[:num_train]
        test_filenames = image_filenames[num_train:num_train+num_test]
        val_filenames = image_filenames[num_train+num_test:]

        # Copy the images into the appropriate output directories
        for filename in train_filenames:
            src_path = os.path.join(main_dir, class_dir, filename)
            dst_path = os.path.join(output_dir, 'train', class_dir, filename)
            shutil.copyfile(src_path, dst_path)

        for filename in test_filenames:
            src_path = os.path.join(main_dir, class_dir, filename)
            dst_path = os.path.join(output_dir, 'test', class_dir, filename)
            shutil.copyfile(src_path, dst_path)

        for filename in val_filenames:
            src_path =os.path.join(main_dir, class_dir, filename)
            dst_path = os.path.join(output_dir, 'val', class_dir, filename)
            shutil.copyfile(src_path, dst_path)

print('Image splitting complete.')

Image splitting complete.
