In [3]:
import os
import cv2
import numpy as np
import pandas as pd
import joblib
import insightface
from sklearn.svm import SVR
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from scipy.stats import pearsonr
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

df = pd.read_csv("Data/data.csv")
df['full_path'] = 'Data/images/' + df['name']
df = df[df['full_path'].apply(os.path.exists)].reset_index(drop=True)

# Encode gender
le = LabelEncoder()
df['gender_encoded'] = le.fit_transform(df['gender'])  # e.g., female=0, male=1

In [5]:
arcface = insightface.app.FaceAnalysis(name='buffalo_l', providers=['CPUExecutionProvider'])
arcface.prepare(ctx_id=0)

# Prepare storage
all_embeddings = []
valid_idx = []

print("🔍 Extracting ArcFace embeddings...")
for i, row in tqdm(df.iterrows(), total=len(df)):
    img = cv2.imread(row['full_path'])
    faces = arcface.get(img)
    if faces:
        all_embeddings.append(faces[0].embedding)
        valid_idx.append(i)

# Final valid DataFrame
df_valid = df.iloc[valid_idx].reset_index(drop=True)
X_face = np.vstack(all_embeddings)
df_valid['log_bmi'] = np.log(df_valid['bmi'])

Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /Users/sijiehuang/.insightface/models/buffalo_l/1k3d68.onnx landmark_3d_68 ['None', 3, 192, 192] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /Users/sijiehuang/.insightface/models/buffalo_l/2d106det.onnx landmark_2d_106 ['None', 3, 192, 192] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /Users/sijiehuang/.insightface/models/buffalo_l/det_10g.onnx detection [1, 3, '?', '?'] 127.5 128.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /Users/sijiehuang/.insightface/models/buffalo_l/genderage.onnx genderage ['None', 3, 96, 96] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /Users/sijiehuang/.insightface/models/buffalo_l/w600k_r50.onnx recognition ['None', 3,

  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4
100%|██████████| 3962/3962 [12:19<00:00,  5.36it/s]


In [6]:
failures = len(df) - len(df_valid)

In [7]:
print(failures, "images failed to extract embeddings.")

754 images failed to extract embeddings.


In [9]:
failed_percentage = (failures / len(df)) * 100
print(f"Failed to extract embeddings for {failed_percentage:.2f}% of images.")

Failed to extract embeddings for 19.03% of images.


In [10]:
# Train a model for each gender
for gender_label in df_valid['gender'].unique():
    print(f"\n🚻 Training model for gender: {gender_label}")
    gender_df = df_valid[df_valid['gender'] == gender_label].copy()
    gender_idx = gender_df.index

    X_gender = X_face[gender_idx]
    y_gender = gender_df['log_bmi'].values
    is_train = gender_df['is_training'].values

    # Split
    X_train, X_test = X_gender[is_train == 1], X_gender[is_train == 0]
    y_train, y_test = y_gender[is_train == 1], y_gender[is_train == 0]

    # Scale
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Train SVR
    svr = SVR(kernel='rbf', C=10, epsilon=0.2)
    svr.fit(X_train_scaled, y_train)

    # Save
    model_name = gender_label.lower()
    joblib.dump(svr, f"svr_model_{model_name}.pkl")
    joblib.dump(scaler, f"scaler_{model_name}.pkl")

    # Evaluate
    y_pred = np.exp(svr.predict(X_test_scaled))
    y_true = np.exp(y_test)
    r, _ = pearsonr(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    rmse = mean_squared_error(y_true, y_pred, squared=False)

    print(f"📈 Pearson r: {r:.3f}, MAE: {mae:.2f}, RMSE: {rmse:.2f}")


🚻 Training model for gender: Male
📈 Pearson r: 0.679, MAE: 5.14, RMSE: 7.32

🚻 Training model for gender: Female
📈 Pearson r: 0.716, MAE: 5.37, RMSE: 7.57


