# RUN MODEL ON FAIRFACE

In [6]:
import pandas as pd
from pathlib import Path

FAIR_ROOT = Path("data/fairface")

# Load CSVs
fair_train_df = pd.read_csv(FAIR_ROOT / "fairface_label_train.csv")
fair_val_df = pd.read_csv(FAIR_ROOT / "fairface_label_val.csv")

# Add full paths to images
fair_train_df["filepath"] = fair_train_df["file"].apply(
    lambda x: str(FAIR_ROOT / "train" / x)
)
fair_val_df["filepath"] = fair_val_df["file"].apply(
    lambda x: str(FAIR_ROOT / "val" / x)
)

fair_train_df.head()

Unnamed: 0,file,age,gender,race,service_test,filepath
0,train/1.jpg,50-59,Male,East Asian,True,data/fairface/train/train/1.jpg
1,train/2.jpg,30-39,Female,Indian,False,data/fairface/train/train/2.jpg
2,train/3.jpg,3-9,Female,Black,False,data/fairface/train/train/3.jpg
3,train/4.jpg,20-29,Female,Indian,True,data/fairface/train/train/4.jpg
4,train/5.jpg,20-29,Female,Indian,True,data/fairface/train/train/5.jpg


In [7]:
import tensorflow as tf
import numpy as np
from PIL import Image

IMG_SIZE = 96


def load_and_preprocess_image(path):
    img = Image.open(path).convert("RGB")
    img = img.resize((IMG_SIZE, IMG_SIZE))
    arr = np.array(img).astype("float32")
    return arr

In [8]:
# emotion map uesed by RAF-DB
emotion_map = {
    0: "Surprise",
    1: "Fear",
    2: "Disgust",
    3: "Happy",
    4: "Sad",
    5: "Angry",
    6: "Neutral",
}

In [9]:
import tensorflow as tf

emotion_model = tf.keras.models.load_model("emotion_mobilenet_rafdb.keras")


def predict_emotion(path):
    img = load_and_preprocess_image(path)
    batch = img[None, ...]
    probs = emotion_model.predict(batch, verbose=0)[0]
    pred_label = probs.argmax()
    confidence = probs[pred_label]
    return pred_label, confidence

  saveable.load_own_variables(weights_store.get(inner_path))


In [10]:
FAIR_ROOT = Path("data/fairface")


def fix_path(row):
    # row.file looks like "val/1.jpg" or "train/1.jpg"
    folder, fname = row["file"].split("/")
    return FAIR_ROOT / folder / fname


fair_train_df["filepath"] = fair_train_df.apply(fix_path, axis=1)
fair_val_df["filepath"] = fair_val_df.apply(fix_path, axis=1)

In [11]:
N_PER_RACE = 5  # adjust if some races have fewer examples

sample_df = fair_val_df.groupby("race", group_keys=False).apply(
    lambda g: g.sample(min(len(g), N_PER_RACE), random_state=42)
)

len(sample_df), sample_df["race"].value_counts()

  sample_df = fair_val_df.groupby("race", group_keys=False).apply(


(35,
 race
 Black              5
 East Asian         5
 Indian             5
 Latino_Hispanic    5
 Middle Eastern     5
 Southeast Asian    5
 White              5
 Name: count, dtype: int64)

In [12]:
fair_preds = []

for count, row in enumerate(sample_df.itertuples(index=False), start=1):
    if count % 10 == 0:
        print(f"Processing {count}/{len(sample_df)} images...")

    pred_label, conf = predict_emotion(row.filepath)

    fair_preds.append(
        {
            "file": row.file,
            "filepath": row.filepath,
            "race": row.race,
            "gender": row.gender,
            "age": row.age,
            "pred_label": pred_label,
            "pred_emotion": emotion_map[pred_label],
            "confidence": conf,
        }
    )

fair_results_df = pd.DataFrame(fair_preds)
fair_results_df.head()

2025-11-22 15:30:16.013072: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


Processing 10/35 images...
Processing 20/35 images...
Processing 30/35 images...


Unnamed: 0,file,filepath,race,gender,age,pred_label,pred_emotion,confidence
0,val/10528.jpg,data/fairface/val/10528.jpg,Black,Male,3-9,0,Surprise,0.617305
1,val/955.jpg,data/fairface/val/955.jpg,Black,Male,20-29,0,Surprise,0.616019
2,val/2231.jpg,data/fairface/val/2231.jpg,Black,Male,30-39,6,Neutral,0.453992
3,val/2968.jpg,data/fairface/val/2968.jpg,Black,Male,3-9,0,Surprise,0.961674
4,val/10033.jpg,data/fairface/val/10033.jpg,Black,Female,20-29,0,Surprise,0.527


In [13]:
# fair_results_df.to_csv("fairface_emotion_predictions_sample.csv", index=False)

In [14]:
fair_results_df["race"].value_counts()

race
Black              5
East Asian         5
Indian             5
Latino_Hispanic    5
Middle Eastern     5
Southeast Asian    5
White              5
Name: count, dtype: int64

In [15]:
fair_results_df["pred_emotion"].value_counts()

pred_emotion
Surprise    20
Sad         10
Neutral      4
Fear         1
Name: count, dtype: int64

In [16]:
# proportion of each predicted emotion within each race
emotion_dist = (
    fair_results_df.groupby("race")["pred_emotion"]
    .value_counts(normalize=True)
    .rename("proportion")
    .reset_index()
)

emotion_dist.head(20)

Unnamed: 0,race,pred_emotion,proportion
0,Black,Surprise,0.8
1,Black,Neutral,0.2
2,East Asian,Surprise,0.6
3,East Asian,Sad,0.4
4,Indian,Sad,0.4
5,Indian,Surprise,0.4
6,Indian,Neutral,0.2
7,Latino_Hispanic,Surprise,0.6
8,Latino_Hispanic,Neutral,0.2
9,Latino_Hispanic,Sad,0.2


In [17]:
conf_by_race = (
    fair_results_df.groupby("race")["confidence"]
    .agg(["mean", "std", "count"])
    .reset_index()
)

conf_by_race

Unnamed: 0,race,mean,std,count
0,Black,0.635198,0.194818,5
1,East Asian,0.612745,0.185413,5
2,Indian,0.690267,0.123038,5
3,Latino_Hispanic,0.608019,0.217079,5
4,Middle Eastern,0.552901,0.263578,5
5,Southeast Asian,0.664548,0.210057,5
6,White,0.577021,0.236005,5


In [18]:
conf_by_race_gender = (
    fair_results_df.groupby(["race", "gender"])["confidence"].mean().reset_index()
)

conf_by_race_gender

Unnamed: 0,race,gender,confidence
0,Black,Female,0.527
1,Black,Male,0.662248
2,East Asian,Female,0.507887
3,East Asian,Male,0.770031
4,Indian,Female,0.594996
5,Indian,Male,0.753781
6,Latino_Hispanic,Female,0.532787
7,Latino_Hispanic,Male,0.720868
8,Middle Eastern,Female,0.68528
9,Middle Eastern,Male,0.464648


In [19]:
emotion_dist_rg = (
    fair_results_df.groupby(["race", "gender", "pred_emotion"])
    .size()
    .groupby(level=[0, 1])
    .apply(lambda x: x / x.sum())
    .reset_index(name="proportion")
)

ValueError: cannot insert gender, already exists