In [None]:
import os
import random
import shutil
from pathlib import Path

# 1. Configuration
random.seed(42)  # for reproducibility
src_dirs = [Path("all_data")]
emotions = ["angry", "disgust", "fear", "happy", "neutral", "sad", "ahegao", "surprise"]
dest_root = Path("data_dir")
splits = {"train": 0.8, "val": 0.2}

# 2. Create directory structure
for split in splits:
    for emo in emotions:
        (dest_root / split / emo).mkdir(parents=True, exist_ok=True)

# 3. Gather, split, copy & rename
for emo in emotions:
    # collect all images for this emotion across source folders
    all_imgs = []
    for d in src_dirs:
        emo_folder = d / emo
        if emo_folder.is_dir():
            all_imgs += list(emo_folder.glob("*.*"))
    random.shuffle(all_imgs)

    # compute split index
    n_total = len(all_imgs)
    n_train = int(splits["train"] * n_total)

    # function to copy & rename a list of files into a target folder
    def copy_and_rename(file_list, split_name):
        for idx, src_path in enumerate(file_list, start=1):
            ext = src_path.suffix.lower()
            new_name = f"{emo}_{idx:04d}{ext}"
            dst_path = dest_root / split_name / emo / new_name
            shutil.copy2(src_path, dst_path)

    # do the splits
    copy_and_rename(all_imgs[:n_train], "train")
    copy_and_rename(all_imgs[n_train:], "val")

print("Done! Your data_dir/ train/ and val/ folders are ready.")

Done! Your data_dir/ train/ and val/ folders are ready.


In [None]:
import os
import random
import shutil
from pathlib import Path

# 1. Configuration
random.seed(42)  # for reproducibility
src_dirs = [Path("all_data")]
emotions = ["angry", "disgust", "fear", "happy", "neutral", "sad", "ahegao", "surprise"]
dest_root = Path("all_data")

# 2. Create directory structure
for emo in emotions:
    (dest_root / emo).mkdir(parents=True, exist_ok=True)

# 3. Gather, split, copy & rename
for emo in emotions:
    # collect all images for this emotion across source folders
    all_imgs = []
    for d in src_dirs:
        emo_folder = d / emo
        if emo_folder.is_dir():
            all_imgs += list(emo_folder.glob("*.*"))


    # function to copy & rename a list of files into a target folder
    def copy_and_rename(file_list):
        for idx, src_path in enumerate(file_list, start=1):
            ext = src_path.suffix.lower()
            dst_path = dest_root  / emo 
            shutil.copy2(src_path, dst_path)

    # do the splits
    copy_and_rename(all_imgs)

In [None]:
import os
import random
import shutil
from pathlib import Path

# 1. Configuration
random.seed(42)  # for reproducibility
src_dirs = [Path("all_data")]
emotions = ["angry", "disgust", "fear", "happy", "neutral", "sad", "ahegao", "surprise"]
dest_root = Path("data_dir")
splits = {"train": 0.8, "val": 0.2}

# 2. Create directory structure
for split in splits:
    for emo in emotions:
        (dest_root / split / emo).mkdir(parents=True, exist_ok=True)

# 3. Gather, split, copy & rename
for emo in emotions:
    # collect all images for this emotion across source folders
    all_imgs = []
    for d in src_dirs:
        emo_folder = d / emo
        if emo_folder.is_dir():
            all_imgs += list(emo_folder.glob("*.*"))
    random.shuffle(all_imgs)

    # compute split index
    n_total = len(all_imgs)
    n_train = int(splits["train"] * n_total)

    # function to copy & rename a list of files into a target folder
    def copy_and_rename(file_list, split_name):
        for idx, src_path in enumerate(file_list, start=1):
            ext = src_path.suffix.lower()
            new_name = f"{emo}_{idx:04d}{ext}"
            dst_path = dest_root / split_name / emo / new_name
            shutil.copy2(src_path, dst_path)

    # do the splits
    copy_and_rename(all_imgs[:n_train], "train")
    copy_and_rename(all_imgs[n_train:], "val")

print("Done! Your data_dir/ train/ and val/ folders are ready.")

In [6]:
import os
import shutil

# --- CONFIGURATION ---
root_dir = 'all_data'  # change this to your dataset root
emotions = ["happy", "angry", "neutral", "ahegao", "surprise", "disgust", "sad", "fear"]
n_train = 24500

train_dir = os.path.join(root_dir, 'train')
val_dir   = os.path.join(root_dir, 'val')

# --- CREATE DIRECTORY STRUCTURE ---
for base in (train_dir, val_dir):
    for emo in emotions:
        os.makedirs(os.path.join(base, emo), exist_ok=True)

# --- COPY FILES ---
for emo in emotions:
    src_dir = os.path.join(root_dir, emo)
    files = sorted(f for f in os.listdir(src_dir) 
                   if os.path.isfile(os.path.join(src_dir, f)))
    
    # split point
    train_files = files[:n_train]
    val_files   = files[n_train:]
    
    # copy to train
    for fname in train_files:
        src_path = os.path.join(src_dir, fname)
        dst_path = os.path.join(train_dir, emo, fname)
        shutil.copy2(src_path, dst_path)
    
    # copy to val
    for fname in val_files:
        src_path = os.path.join(src_dir, fname)
        dst_path = os.path.join(val_dir, emo, fname)
        shutil.copy2(src_path, dst_path)

print("Done!  {} images per class in train (up to {}), rest in val.".format(n_train, n_train))


Done!  24500 images per class in train (up to 24500), rest in val.


In [9]:
import shutil
from pathlib import Path

# 1. Configuration
src_dirs = [Path("imgs")]
emotions = ["angry", "disgust", "fear", "happy", "neutral", "sad", "ahegao", "surprise"]
dest_root = Path("all_data")
n_train = 24500

# 2. Create train/val/<emotion> structure
for split in ("train", "val"):
    for emo in emotions:
        (dest_root / split / emo).mkdir(parents=True, exist_ok=True)

# 3. Gather, sort, split, and copy
for emo in emotions:
    # collect all files for this emotion
    all_imgs = []
    for src in src_dirs:
        emo_folder = src / emo
        if emo_folder.is_dir():
            all_imgs.extend([p for p in emo_folder.iterdir() if p.is_file()])
    # sort by filename
    all_imgs = sorted(all_imgs, key=lambda p: p.name)
    
    # split
    train_imgs = all_imgs[:n_train]
    val_imgs   = all_imgs[n_train:]
    
    # copy to train
    for src_path in train_imgs:
        dst = dest_root / "train" / emo / src_path.name
        shutil.copy2(src_path, dst)
    # copy to val
    for src_path in val_imgs:
        dst = dest_root / "val" / emo / src_path.name
        shutil.copy2(src_path, dst)
    
    print(f"{emo}: copied {len(train_imgs)} -> train/{emo}, {len(val_imgs)} -> val/{emo}")

print("Done!")


angry: copied 24500 -> train/angry, 6102 -> val/angry
disgust: copied 24500 -> train/disgust, 6046 -> val/disgust
fear: copied 24500 -> train/fear, 6242 -> val/fear
happy: copied 24500 -> train/happy, 6102 -> val/happy
neutral: copied 24500 -> train/neutral, 6196 -> val/neutral
sad: copied 24500 -> train/sad, 6236 -> val/sad
ahegao: copied 24500 -> train/ahegao, 6102 -> val/ahegao
surprise: copied 24500 -> train/surprise, 6150 -> val/surprise
Done!
