Requirements:

-   data/fer2013new.csv: Obtained directly from FER+'s repo
-   data/ferplus_raw: Directory generated by running FER+'s src/generate_training_data.py script


In [None]:
import os
import shutil
from pathlib import Path

import pandas as pd

In [None]:
ferplus_df = pd.read_csv("data/fer2013new.csv")
ferplus_df

In [None]:
ferplus_df["base_path"] = ferplus_df["Usage"].map(
    {
        "Training": "FER2013Train",
        "PublicTest": "FER2013Valid",
        "PrivateTest": "FER2013Test",
    }
)
ferplus_df

In [None]:
label_columns = [
    "neutral",
    "happiness",
    "surprise",
    "sadness",
    "anger",
    "disgust",
    "fear",
    "contempt",
]
emotion_counts = ferplus_df[label_columns].sum()
emotion_counts

In [None]:
def choose_label(row):
    """Choose the label with most votes.

    In case of tie, take the less represented emotion (avoid unknown and NF, when possible).
    """
    votes = row[label_columns]

    max_votes = votes.max()
    tied = votes[votes == max_votes]

    if len(tied) == 1:
        return tied.index[0]

    return emotion_counts[tied.index].idxmin()

In [None]:
ferplus_df["label"] = ferplus_df.apply(choose_label, axis=1)
ferplus_df

In [None]:
ferplus_df.label.value_counts()

In [None]:
def export_images(row):
    try:
        filename = row["Image name"]
        usage = row["Usage"]
        base_path = row["base_path"]
        cls = row["label"]

        src_path = Path("data/ferplus_raw") / base_path / filename
        dst_path = Path("data/ferplus") / usage / cls / filename

        os.makedirs(dst_path.parent, exist_ok=True)
        shutil.copy(src_path, dst_path)

        return dst_path
    except Exception as e:
        print(f"Error processing row {row[["Usage", "Image name", "label"]]}: {e}")
        return None

In [None]:
ferplus_df.apply(export_images, axis=1)