## Import libraries

In [1]:
import os
import shutil
from sklearn.model_selection import train_test_split
from tqdm import tqdm

## Define paths and constants

In [2]:
SOURCE_DIR = "data/fer2013plus/train"
OUTPUT_DIR = "data/fer2013plus_new"
TRAIN_RATIO = 0.9
RANDOM_SEED = 42
IMAGE_EXTENSIONS = (".png", ".jpg", ".jpeg")

## Create new directories and make validation sets

In [3]:
for split in ("train", "val"):
    os.makedirs(os.path.join(OUTPUT_DIR, split), exist_ok=True)

categories = sorted(os.listdir(SOURCE_DIR))

for category in tqdm(categories, desc="Splitting dataset"):
    category_path = os.path.join(SOURCE_DIR, category)

    if not os.path.isdir(category_path):
        continue

    images = sorted(
        f for f in os.listdir(category_path)
        if f.lower().endswith(IMAGE_EXTENSIONS))


    train_files, val_files = train_test_split(
        images,
        test_size=1 - TRAIN_RATIO,
        random_state=RANDOM_SEED,
        shuffle=True)

    for split_name, files in [("train", train_files), ("val", val_files)]:
        split_dir = os.path.join(OUTPUT_DIR, split_name, category)
        os.makedirs(split_dir, exist_ok=True)

        for file in files:
            shutil.copy(
                os.path.join(category_path, file),
                os.path.join(split_dir, file))

print("Dataset split completed successfully.")

Splitting dataset: 100%|██████████████████████████| 9/9 [00:08<00:00,  1.01it/s]

Dataset split completed successfully.



