In [1]:
import shutil
from pathlib import Path
from sklearn.model_selection import train_test_split
from PIL import Image

accepted_suffix = ('.jpg', '.png', '.jpeg', '.JPG', '.PNG', '.bmp', '.gif')

root_path = Path('./')
#
input_dir = 'all_images'
output_dir = 'training'
#

original_data_path = root_path / input_dir
train_data_path = root_path / output_dir / 'train'
val_data_path = root_path / output_dir / 'val'
split_ratio = 0.8

train_data_path.mkdir(parents=True, exist_ok=True)
val_data_path.mkdir(parents=True, exist_ok=True)

for class_name in original_data_path.iterdir():
    class_name = class_name.name
    class_path = original_data_path / class_name
    if not class_path.is_dir():
        continue

    images = []
    for filename in class_path.iterdir():
        if filename.suffix in accepted_suffix:
            try:
                with Image.open(filename) as img:
                    if img.mode == 'RGB':
                        images.append(filename.name)
            except Exception as e:
                print(f'Error occured. {filename}: {e}')

    train_images, val_images = train_test_split(images,
                                                train_size=split_ratio,
                                                random_state=42,
                                                )
    train_class_path = train_data_path / class_name
    train_class_path.mkdir(parents=True, exist_ok=True)
    for image_name in train_images:
        src = class_path / image_name
        dst = train_class_path / image_name
        shutil.copyfile(src, dst)

    val_class_path = val_data_path / class_name
    val_class_path.mkdir(parents=True, exist_ok=True)
    for image_name in val_images:
        src = class_path / image_name
        dst = val_class_path / image_name
        shutil.copyfile(src, dst)