 ## 1. Environment setup

 ### 1.1. Import required packages

In [None]:
import os
import warnings

os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
warnings.filterwarnings("ignore")

In [None]:
import json
from dataclasses import dataclass
from io import BytesIO
from typing import Optional

import cv2
import matplotlib.pyplot as plt
import mediapipe as mp
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
import tf_keras_vis
from huggingface_hub import hf_hub_download
from mediapipe.tasks.python.core.base_options import BaseOptions
from mediapipe.tasks.python.vision.face_landmarker import FaceLandmarker, FaceLandmarkerOptions
from PIL import Image
from sklearn import metrics
from sklearn.model_selection import train_test_split
from tf_keras_vis.gradcam_plus_plus import GradcamPlusPlus
from tqdm import tqdm

tqdm.pandas()

 ### 1.2. Define global variables

In [None]:
RANDOM_SEED = 42

DATASET_NAME = "utkface"
DATASET_SIZE = 2000
DATASET_MALE_RATIO = 0.5
DATASET_FEMALE_RATIO = 0.5
DATASET_IMAGE_SHAPE = (48, 48, 1)
DATASET_VALIDATION_RATIO = 0.2
DATASET_TEST_RATIO = 0.1

FEATURE_MASK_GENDER = None
FEATURE_MASK_REGION = None
FEATURE_MASK_PADDING = 0

MODEL_TRAINING_EPOCHS = 10
MODEL_TRAINING_BATCH_SIZE = 64
MODEL_TRAINING_DIRECTORY = "gender_classifier.keras"

ANALYSIS_TARGET_LAYER = "block3_conv3"
ANALYSIS_ACTIVATION_PERCENTILE = 80
ANALYSIS_DISTANCE_METRIC = "euclidean"
ANALYSIS_FEATURE_IMPORTANCE_THRESHOLD = 0.5

 ## 2. Data preparation

 ### 2.1. Download dataset from HuggingFace

In [None]:
def load_dataset() -> pd.DataFrame:
    """Retrieve and load UTKFace dataset from Hugging Face repository."""
    path = hf_hub_download(repo_id=f"rixmape/{DATASET_NAME}", filename="data/train-00000-of-00001.parquet", repo_type="dataset")
    df = pd.read_parquet(path, columns=["image", "gender", "race", "age"])
    df["image"] = df["image"].progress_apply(lambda x: np.array(Image.open(BytesIO(x["bytes"]))))
    return df


dataset = load_dataset()
dataset.head()

 ### 2.2. Sample dataset with specific gender distribution

In [None]:
def _sample_by_demographic_strata(data: pd.DataFrame, total_sample_size: int) -> list:
    """Sample data proportionally from each demographic stratum to maintain diversity."""
    data["strata"] = data["race"].astype(str) + "_" + data["age"].astype(str)
    strat_samples = []

    for _, group in data.groupby("strata"):
        group_size = len(group)
        group_ratio = group_size / len(data)
        stratum_sample_size = round(total_sample_size * group_ratio)
        if stratum_sample_size > 0:
            strat_samples.append(group.sample(n=stratum_sample_size, random_state=RANDOM_SEED, replace=(group_size < stratum_sample_size)))

    return [sample.drop(columns=["strata"]) for sample in strat_samples]


def sample_with_gender_ratio(data: pd.DataFrame) -> pd.DataFrame:
    """Create dataset with target gender ratio while preserving demographic balance."""
    gender_ratios = {0: DATASET_MALE_RATIO, 1: DATASET_FEMALE_RATIO}
    samples = []

    for gender_id, ratio in gender_ratios.items():
        gender_sample_size = round(DATASET_SIZE * ratio)
        gender_df = data[data["gender"] == gender_id].copy(deep=True)
        if gender_df.empty:
            continue
        strata_samples = _sample_by_demographic_strata(gender_df, gender_sample_size)
        if strata_samples:
            samples.append(pd.concat(strata_samples))

    return pd.concat(samples).sample(frac=1, random_state=RANDOM_SEED).reset_index(drop=True)


dataset = sample_with_gender_ratio(dataset)
dataset.info()

In [None]:
def plot_demographics(data: pd.DataFrame) -> plt.Figure:
    """Create bar charts showing distributions of gender, age, and race in the dataset."""
    fig, axes = plt.subplots(1, 3, figsize=(12, 4))
    columns = ["gender", "age", "race"]

    for i, col in enumerate(columns):
        sns.countplot(data=data, x=col, ax=axes[i])
        axes[i].set_title(f"{col.capitalize()} Distribution")
        axes[i].tick_params(axis="x")
        axes[i].set_ylabel("")

    plt.tight_layout()
    return fig


fig = plot_demographics(dataset)

 ### 2.3. Split dataset into training and validation sets

In [None]:
def split_dataset(df: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
    """Divide dataset into train, validation, and test sets with stratification by gender."""
    train_val, test = train_test_split(df, test_size=DATASET_TEST_RATIO, random_state=RANDOM_SEED, stratify=df["gender"])
    train, val = train_test_split(train_val, test_size=DATASET_VALIDATION_RATIO / (1 - DATASET_TEST_RATIO), random_state=RANDOM_SEED, stratify=train_val["gender"])
    return train, val, test


train_set, val_set, test_set = split_dataset(dataset)

In [None]:
del dataset

 ### 2.4. Apply zero masking on a facial region

In [None]:
def _load_face_landmarker() -> FaceLandmarker:
    """Load MediaPipe facial landmark detector model from repository."""
    model_path = hf_hub_download(repo_id="rixmape/biasx-models", filename="mediapipe_landmarker.task", repo_type="model")
    options = FaceLandmarkerOptions(base_options=BaseOptions(model_asset_path=model_path))
    return FaceLandmarker.create_from_options(options)


def _load_facial_region_map() -> dict[str, list[int]]:
    """Create mapping of facial regions to MediaPipe landmark indices."""
    data_path = hf_hub_download(repo_id="rixmape/biasx-models", filename="landmark_map.json", repo_type="model")
    with open(data_path, "r") as file:
        return json.load(file)


def _detect_facial_landmarks(image: np.ndarray, landmarker: FaceLandmarker, image_format: mp.ImageFormat = mp.ImageFormat.SRGB) -> list:
    """Detect facial landmarks in an image using MediaPipe."""
    mp_image = mp.Image(image_format=image_format, data=image)
    result = landmarker.detect(mp_image)
    return result.face_landmarks[0] if result.face_landmarks else None


def _convert_to_pixel_coordinates(landmarks: list, image_size: tuple[int, int]) -> list[tuple[int, int]]:
    """Convert normalized landmark coordinates to image pixel coordinates."""
    height, width = image_size[:2]
    return [(int(point.x * width), int(point.y * height)) for point in landmarks]


def _get_region_bounding_box(landmarks: list[tuple[int, int]], region_map: dict[str, list[int]]) -> tuple[int, int, int, int]:
    """Calculate the bounding box coordinates for a specific facial region."""
    region_points = [landmarks[i] for i in region_map[FEATURE_MASK_REGION]]
    min_x = max(0, min(x for x, _ in region_points) - FEATURE_MASK_PADDING)
    min_y = max(0, min(y for _, y in region_points) - FEATURE_MASK_PADDING)
    max_x = max(x for x, _ in region_points) + FEATURE_MASK_PADDING
    max_y = max(y for _, y in region_points) + FEATURE_MASK_PADDING
    return (int(min_x), int(min_y), int(max_x), int(max_y))


def _apply_region_masking(image: np.ndarray, landmarker: FaceLandmarker, region_map: dict[str, list[int]]) -> np.ndarray:
    """Apply zero masking to a specific facial region in the image."""
    landmarks = _detect_facial_landmarks(image, landmarker)
    if not landmarks:
        return image
    pixel_landmarks = _convert_to_pixel_coordinates(landmarks, image.shape)
    result = image.copy()
    min_x, min_y, max_x, max_y = _get_region_bounding_box(pixel_landmarks, region_map)
    result[min_y:max_y, min_x:max_x] = 0
    return result


def apply_feature_masking(data: pd.DataFrame) -> pd.DataFrame:
    """Apply zero masking to a specific facial region for targeted gender images."""
    if FEATURE_MASK_GENDER is None or FEATURE_MASK_REGION is None:
        return data

    landmarker = _load_face_landmarker()
    region_map = _load_facial_region_map()

    result = data.copy()
    gender_mask = result["gender"] == FEATURE_MASK_GENDER
    result.loc[gender_mask, "image"] = result.loc[gender_mask, "image"].progress_apply(lambda img: _apply_region_masking(img, landmarker, region_map))
    return result


train_set = apply_feature_masking(train_set)

In [None]:
def plot_image_grid(data: pd.DataFrame, rows: int = 4, cols: int = 8, title: str = None) -> plt.Figure:
    """Display a grid of sample images from the dataset."""
    fig, axes = plt.subplots(rows, cols, figsize=(cols * 2, rows * 2))
    axes = axes.flatten()
    cmap = "gray" if data.iloc[0]["image"].shape[2] == 1 else None

    for i, ax in enumerate(axes):
        if i < len(data):
            ax.imshow(np.array(data.iloc[i]["image"]), cmap=cmap)
            ax.axis("off")
        else:
            ax.axis("off")

    if title:
        fig.suptitle(title, fontsize=16)
    plt.tight_layout()
    return fig


fig = plot_image_grid(train_set, title="Original Training Set")

 ### 2.5. Preprocess image for model input

In [None]:
def _preprocess_single_image(image: np.ndarray) -> np.ndarray:
    """Convert and normalize image to grayscale with target dimensions."""
    image_array = np.array(Image.fromarray(image).convert("L").resize(DATASET_IMAGE_SHAPE[:2]), dtype=np.float32)
    return (image_array / 255.0).reshape(DATASET_IMAGE_SHAPE)


def prepare_dataset(data: pd.DataFrame) -> pd.DataFrame:
    """Preprocess all images in dataset for model input."""
    processed = data.copy()
    processed["image"] = processed["image"].apply(_preprocess_single_image)
    return processed


train_set = prepare_dataset(train_set)
val_set = prepare_dataset(val_set)
test_set = prepare_dataset(test_set)

fig = plot_image_grid(train_set, title="Preprocessed Training Set")

 ## 3. Model training

 ### 3.1. Design model architecture and build model instance

In [None]:
def build_gender_classifier() -> tf.keras.Model:
    """Create and compile CNN model for binary gender classification."""
    model = tf.keras.Sequential(name="gender_classifier")

    model.add(tf.keras.layers.Input(shape=DATASET_IMAGE_SHAPE, name="input"))

    model.add(tf.keras.layers.Conv2D(64, (3, 3), activation="relu", padding="same", name="block1_conv1"))
    model.add(tf.keras.layers.Conv2D(64, (3, 3), activation="relu", padding="same", name="block1_conv2"))
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name="block1_pool"))

    model.add(tf.keras.layers.Conv2D(128, (3, 3), activation="relu", padding="same", name="block2_conv1"))
    model.add(tf.keras.layers.Conv2D(128, (3, 3), activation="relu", padding="same", name="block2_conv2"))
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name="block2_pool"))

    model.add(tf.keras.layers.Conv2D(256, (3, 3), activation="relu", padding="same", name="block3_conv1"))
    model.add(tf.keras.layers.Conv2D(256, (3, 3), activation="relu", padding="same", name="block3_conv2"))
    model.add(tf.keras.layers.Conv2D(256, (3, 3), activation="relu", padding="same", name="block3_conv3"))
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name="block3_pool"))

    model.add(tf.keras.layers.Flatten(name="flatten"))
    model.add(tf.keras.layers.Dense(512, activation="relu", name="dense_1"))
    model.add(tf.keras.layers.Dropout(0.5, name="dropout"))
    model.add(tf.keras.layers.Dense(2, activation="softmax", name="dense_output"))

    model.compile(optimizer=tf.keras.optimizers.Adam(0.0001), loss="sparse_categorical_crossentropy", metrics=["accuracy"])
    return model


model = build_gender_classifier()


 ### 3.2. Train model

In [None]:
def extract_features_and_labels(df: pd.DataFrame) -> tuple[np.ndarray, np.ndarray]:
    """Extract image arrays and gender labels from dataframe."""
    features = np.stack(df["image"].values)
    labels = df["gender"].values
    return features, labels


def train_gender_classifier(model: tf.keras.Model, train_df: pd.DataFrame, val_df: pd.DataFrame) -> dict[str, list[float]]:
    """Train gender classification model with early stopping."""
    train_data, train_labels = extract_features_and_labels(train_df)
    val_data, val_labels = extract_features_and_labels(val_df)
    early_stopping = (tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=3, restore_best_weights=True),)
    history = model.fit(train_data, train_labels, validation_data=(val_data, val_labels), batch_size=MODEL_TRAINING_BATCH_SIZE, epochs=MODEL_TRAINING_EPOCHS, callbacks=[early_stopping], verbose=1, shuffle=True)
    return history.history


history = train_gender_classifier(model, train_set, val_set)

 ### 3.3. Visualize training history

In [None]:
def plot_training_metrics(history: dict[str, list[float]]) -> plt.Figure:
    """Create line plots of accuracy and loss metrics during model training."""
    fig, axes = plt.subplots(1, 2, figsize=(12, 4))
    hist_data = pd.DataFrame(history)

    sns.lineplot(data=hist_data[["accuracy", "val_accuracy"]], ax=axes[0], dashes=[(None, None), (2, 2)])
    axes[0].set_title("Model Accuracy")
    axes[0].set_xlabel("Epoch")
    axes[0].set_ylabel("Accuracy")
    axes[0].legend()

    sns.lineplot(data=hist_data[["loss", "val_loss"]], ax=axes[1], dashes=[(None, None), (2, 2)])
    axes[1].set_title("Model Loss")
    axes[1].set_xlabel("Epoch")
    axes[1].set_ylabel("Loss")
    axes[1].legend()

    plt.tight_layout()
    return fig


fig = plot_training_metrics(history)

In [None]:
del train_set, val_set

 ### 3.4. Predict gender of each image in test set

In [None]:
def predict_gender(model: tf.keras.Model, data: pd.DataFrame) -> pd.DataFrame:
    """Generate gender predictions for images in the dataset."""
    predictions = model.predict(np.stack(data["image"].values))
    data["prediction"] = predictions.argmax(axis=1)
    return data


def plot_confusion_matrix(data: pd.DataFrame) -> plt.Figure:
    """Create heatmap of confusion matrix with gender classification results."""
    y_true = data["gender"]
    y_pred = data["prediction"]
    confusion_matrix = metrics.confusion_matrix(y_true, y_pred)

    fig, ax = plt.subplots(1, 1, figsize=(5, 5))

    sns.heatmap(confusion_matrix, annot=True, fmt="d", cmap="Blues", cbar=False, xticklabels=["Male", "Female"], yticklabels=["Male", "Female"], ax=ax)

    ax.set_xlabel("Predicted")
    ax.set_ylabel("Actual")
    ax.set_title("Confusion Matrix")

    plt.tight_layout()
    return fig


test_set = predict_gender(model, test_set)
fig = plot_confusion_matrix(test_set)

 ## 4. Visual explanation

 ### 4.1. Generate class activation map using GradCAM++

In [None]:
def _generate_activation_map(visualizer: tf_keras_vis.ModelVisualization, row: pd.Series) -> np.ndarray:
    """Create activation heatmap showing regions important for gender prediction."""
    score_fn = lambda output: output[0][row["gender"]]
    expanded_image = row["image"][np.newaxis, ...]
    return visualizer(score_fn, expanded_image, penultimate_layer=ANALYSIS_TARGET_LAYER)[0]


def compute_activation_maps(model: tf.keras.Model, data: pd.DataFrame) -> pd.DataFrame:
    """Generate and store activation maps for all images in dataset."""
    modifier_fn = lambda m: setattr(m.layers[-1], "activation", tf.keras.activations.linear)
    visualizer = GradcamPlusPlus(model, model_modifier=modifier_fn)

    result = data.copy()
    result["activation_map"] = result.progress_apply(lambda row: _generate_activation_map(visualizer, row), axis=1)
    return result


def visualize_activation_heatmaps(data: pd.DataFrame, target_col: str, rows: int = 4, cols: int = 8, title: str = None) -> plt.Figure:
    """Display activation heatmaps overlaid on original images."""
    fig, axes = plt.subplots(rows, cols, figsize=(cols * 2, rows * 2))
    axes = axes.flatten()

    for i, ax in enumerate(axes):
        if i < len(data):
            image = data.iloc[i]["image"]
            activation_map = data.iloc[i][target_col]
            ax.imshow(image, cmap="gray")
            ax.imshow(activation_map, cmap="jet", alpha=0.5)
            ax.axis("off")
        else:
            ax.axis("off")

    if title:
        fig.suptitle(title, fontsize=16)

    plt.tight_layout()
    return fig


test_set = compute_activation_maps(model, test_set)
fig = visualize_activation_heatmaps(test_set, target_col="activation_map", title="Activation Maps Generated by Grad-CAM++")

 ### 4.2. Detect facial landmarks using MediaPipe

In [None]:
@dataclass
class FacialRegionBox:
    """Represents a bounding box around a specific facial region."""

    min_x: int
    min_y: int
    max_x: int
    max_y: int
    region_name: str
    importance_score: Optional[float] = None


def _extract_region_boxes(landmarker: FaceLandmarker, region_map: dict[str, list[int]], image: np.ndarray) -> list[FacialRegionBox]:
    """Identify and create bounding boxes for each facial region in an image."""
    rgb_image = image.copy()
    rgb_image = cv2.cvtColor((rgb_image * 255).astype(np.uint8), cv2.COLOR_GRAY2RGB)

    landmarks = _detect_facial_landmarks(rgb_image, landmarker)
    if landmarks is None:
        return {}

    pixel_landmarks = _convert_to_pixel_coordinates(landmarks, image.shape)

    regions = []
    for region_name, landmark_ids in region_map.items():
        region_points = [pixel_landmarks[i] for i in landmark_ids]
        min_x = max(0, min(x for x, _ in region_points))
        min_y = max(0, min(y for _, y in region_points))
        max_x = max(x for x, _ in region_points)
        max_y = max(y for _, y in region_points)
        regions.append(FacialRegionBox(min_x, min_y, max_x, max_y, region_name))

    return regions


def identify_facial_regions(data: pd.DataFrame) -> pd.DataFrame:
    """Detect and store facial region boxes for each image in dataset."""
    landmarker = _load_face_landmarker()
    region_map = _load_facial_region_map()

    result = data.copy()
    result["region_boxes"] = result["image"].progress_apply(lambda image: _extract_region_boxes(landmarker, region_map, image))
    return result


def visualize_region_boxes(data: pd.DataFrame, box_col: str, rows: int = 4, cols: int = 8, title: str = None) -> plt.Figure:
    """Display identified facial regions as colored bounding boxes."""
    fig, axes = plt.subplots(rows, cols, figsize=(cols * 2, rows * 2))
    axes = axes.flatten()

    # fmt: off
    region_colors = {
        "left_eye": (255, 0, 0),           # Red
        "right_eye": (0, 255, 0),          # Green
        "nose": (0, 0, 255),               # Blue
        "lips": (255, 255, 0),             # Yellow
        "left_cheek": (255, 0, 255),       # Magenta
        "right_cheek": (0, 255, 255),      # Cyan
        "chin": (255, 128, 0),             # Orange
        "forehead": (128, 0, 255),         # Purple
        "left_eyebrow": (0, 128, 128),     # Teal
        "right_eyebrow": (128, 128, 0),    # Olive
    }
    # fmt: on

    for i, ax in enumerate(axes):
        if i < len(data):
            image = data.iloc[i]["image"]
            rgb_image = image.copy()
            rgb_image = cv2.cvtColor((rgb_image * 255).astype(np.uint8), cv2.COLOR_GRAY2RGB)
            boxes = data.iloc[i][box_col]
            for box in boxes:
                color = region_colors.get(box.region_name, (0, 0, 0))
                cv2.rectangle(rgb_image, (box.min_x, box.min_y), (box.max_x, box.max_y), color, 1)
            ax.imshow(rgb_image)
            ax.axis("off")
        else:
            ax.axis("off")

    if title:
        fig.suptitle(title, fontsize=16)

    plt.tight_layout()
    return fig


test_set = identify_facial_regions(test_set)
fig = visualize_region_boxes(test_set, "region_boxes", title="Facial Region Boxes")

 ### 4.3. Filter landmarks based on overlap with activation maps

In [None]:
def _compute_region_importance(region_box: FacialRegionBox, activation_map: np.ndarray) -> float:
    """Calculate importance score of a facial region based on activation intensity."""
    roi = activation_map[
        max(0, region_box.min_y) : min(activation_map.shape[0], region_box.max_y),
        max(0, region_box.min_x) : min(activation_map.shape[1], region_box.max_x),
    ]
    return np.mean(roi) if roi.size != 0 else 0.0


def _filter_regions_by_activation(region_boxes: list[FacialRegionBox], activation_map: np.ndarray) -> list[FacialRegionBox]:
    """Identify facial regions that have significant activation for gender prediction."""
    important_regions = []

    for box in region_boxes:
        box.importance_score = _compute_region_importance(box, activation_map)

        if box.importance_score > ANALYSIS_FEATURE_IMPORTANCE_THRESHOLD:
            important_regions.append(box)

    return sorted(important_regions, key=lambda b: b.importance_score, reverse=True)


def identify_important_regions(data: pd.DataFrame) -> pd.DataFrame:
    """Find and store facial regions with high importance for gender classification."""
    result = data.copy()

    def process_row(row):
        return _filter_regions_by_activation(row["region_boxes"], row["activation_map"])

    result["important_regions"] = result.progress_apply(process_row, axis=1)
    return result


test_set = identify_important_regions(test_set)
fig = visualize_region_boxes(test_set, "important_regions", title="Important Facial Regions")

## 5. Bias analysis

### 5.1. Compute feature bias scores

In [None]:
def analyze_facial_feature_bias(data: pd.DataFrame, misclassified_only: bool = False) -> dict:
    """Return bias analysis for facial features from test_set."""
    subset = data[data["gender"] != data["prediction"]] if misclassified_only else data
    features = _load_facial_region_map().keys()

    total_male = (subset["gender"] == 0).sum()
    total_female = (subset["gender"] == 1).sum()

    feature_counts = {f: {"male_count": 0, "female_count": 0} for f in features}

    for _, row in subset.iterrows():
        regions = row.get("important_regions")
        if not isinstance(regions, list):
            continue

        present = {r.region_name for r in regions}

        for f in present:
            if row["gender"] == 0:
                feature_counts[f]["male_count"] += 1
            elif row["gender"] == 1:
                feature_counts[f]["female_count"] += 1

    def compute_stats(counts: dict[str, int]) -> dict[str, float]:
        mp = round(counts["male_count"] / total_male, 2) if total_male else 0.0
        fp = round(counts["female_count"] / total_female, 2) if total_female else 0.0
        bs = round(abs(mp - fp), 2)
        return {"male_prob": mp, "female_prob": fp, "bias_score": bs}

    return {f: compute_stats(counts) for f, counts in feature_counts.items()}


feature_biases = analyze_facial_feature_bias(test_set, misclassified_only=True)
feature_biases

In [None]:
def visualize_feature_bias(test_set: pd.DataFrame, misclassified_only: bool = False) -> plt.Figure:
    """Visualize male/female probabilities and bias scores for each facial feature."""
    bias_dict = analyze_facial_feature_bias(test_set, misclassified_only)

    if not bias_dict:
        raise ValueError("No feature bias data available to visualize.")

    df = pd.DataFrame.from_dict(bias_dict, orient="index").reset_index().rename(columns={"index": "feature"})
    df_melted = df.melt(id_vars="feature", value_vars=["male_prob", "female_prob"], var_name="gender", value_name="probability")

    fig, ax1 = plt.subplots(figsize=(10, 5))

    sns.barplot(data=df_melted, x="feature", y="probability", hue="gender", ax=ax1)
    ax1.set_ylabel("Probability")
    ax1.set_xlabel("Facial Feature")
    ax1.set_title("Facial Feature Probabilities and Bias Scores")
    ax2 = ax1.twinx()
    sns.pointplot(data=df, x="feature", y="bias_score", color="black", ax=ax2, markers="D", scale=1.2)
    ax2.set_ylabel("Bias Score (Absolute Difference)")
    fig.tight_layout()

    ax1.legend(loc="upper center", ncol=2)
    return fig


fig = visualize_feature_bias(test_set, misclassified_only=True)

### 5.2. Compute average bias score

In [None]:
def compute_average_bias_score(test_set: pd.DataFrame, misclassified_only: bool = False) -> float:
    """Return the average bias score across all facial features."""
    feature_biases = analyze_facial_feature_bias(test_set, misclassified_only)
    if not feature_biases:
        return 0.0
    return round(sum(stats["bias_score"] for stats in feature_biases.values()) / len(feature_biases), 2)


average_bias = compute_average_bias_score(test_set, misclassified_only=True)
average_bias