# Train-Test-Validation Splitting

In [1]:
import os
import shutil
import random

input_dir = r"D:\Virtual Environments\Pattern Recognition\processed dataset"         
output_dir = r"D:\Virtual Environments\Pattern Recognition\split dataset"      

train_ratio = 0.7
val_ratio = 0.15
test_ratio = 0.15

for split in ['train', 'validation', 'test']:
    split_path = os.path.join(output_dir, split)
    if not os.path.exists(split_path):
        os.makedirs(split_path)

#                               Iterating through each class folder in the input directory
for class_name in os.listdir(input_dir):
    class_folder = os.path.join(input_dir, class_name)
    if not os.path.isdir(class_folder):
        continue  

    images = [f for f in os.listdir(class_folder) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif'))]
    
    random.shuffle(images)

    num_images = len(images)
    train_count = int(train_ratio * num_images)
    val_count = int(val_ratio * num_images)
    test_count = num_images - train_count - val_count

    #                               Creating class subdirectories in each split folder
    for split in ['train', 'validation', 'test']:
        split_class_dir = os.path.join(output_dir, split, class_name)
        os.makedirs(split_class_dir, exist_ok=True)

    #                               Copying images to the train folder
    for img in images[:train_count]:
        src = os.path.join(class_folder, img)
        dst = os.path.join(output_dir, 'train', class_name, img)
        shutil.copy2(src, dst)
    
    #                               Copying images to the validation folder
    for img in images[train_count:train_count + val_count]:
        src = os.path.join(class_folder, img)
        dst = os.path.join(output_dir, 'validation', class_name, img)
        shutil.copy2(src, dst)
    
    #                               Copying images to the test folder
    for img in images[train_count + val_count:]:
        src = os.path.join(class_folder, img)
        dst = os.path.join(output_dir, 'test', class_name, img)
        shutil.copy2(src, dst)
    
    print(f"Class '{class_name}': {train_count} train, {val_count} validation, {test_count} test images copied.")


Class 'Tomato___Bacterial_spot': 1488 train, 319 validation, 320 test images copied.
Class 'Tomato___Early_blight': 700 train, 150 validation, 150 test images copied.
Class 'Tomato___healthy': 1113 train, 238 validation, 240 test images copied.
Class 'Tomato___Late_blight': 1336 train, 286 validation, 287 test images copied.
Class 'Tomato___Leaf_Mold': 666 train, 142 validation, 144 test images copied.
Class 'Tomato___Septoria_leaf_spot': 1239 train, 265 validation, 267 test images copied.
Class 'Tomato___Spider_mites Two-spotted_spider_mite': 1173 train, 251 validation, 252 test images copied.
Class 'Tomato___Target_Spot': 982 train, 210 validation, 212 test images copied.
Class 'Tomato___Tomato_mosaic_virus': 261 train, 55 validation, 57 test images copied.
Class 'Tomato___Tomato_Yellow_Leaf_Curl_Virus': 3749 train, 803 validation, 805 test images copied.
