# Age and Gender Debiasing using UTKFace Dataset with W&B Grid Search

This notebook implements a debiased age prediction model using adversarial training with W&B hyperparameter sweep using grid search.


## üìö Import Required Libraries


In [1]:
import os
import numpy as np
import cv2
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from tensorflow.keras.layers import Input, GlobalAveragePooling2D, Dense, Dropout, Layer
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
import wandb
try:
    from wandb.keras import WandbCallback
except ImportError:
    from wandb.integration.keras import WandbCallback
from tqdm import tqdm


## üñºÔ∏è Load and Preprocess Images


In [2]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("jangedoo/utkface-new")
print("Path to dataset files:", path)


Using Colab cache for faster access to the 'utkface-new' dataset.
Path to dataset files: /kaggle/input/utkface-new


In [3]:
# Dataset path
data_dir = os.path.join(path, "UTKFace")

# Lists to store data
images, ages, genders, races = [], [], [], []

for img_name in tqdm(os.listdir(data_dir)):
    try:
        # UTKFace filenames: [age]_[gender]_[race]_*.jpg
        age, gender, race = img_name.split('_')[:3]
        age, gender, race = int(age), int(gender), int(race)

        img_path = os.path.join(data_dir, img_name)
        img = cv2.imread(img_path)
        img = cv2.resize(img, (96, 96))
        images.append(img)
        ages.append(age)
        genders.append(gender)
        races.append(race)
    except:
        continue

# Convert to numpy arrays
images = np.array(images)
ages = np.array(ages)
genders = np.array(genders)
races = np.array(races)

print("Total images loaded:", len(images))
print("Example labels:", ages[0], genders[0], races[0])


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23708/23708 [01:06<00:00, 354.26it/s]


Total images loaded: 23705
Example labels: 26 0 2


In [4]:
# -----------------------
# Gradient Reversal Layer
# -----------------------
class GradientReversalLayer(Layer):
    def __init__(self, alpha=1.0, **kwargs):
        super(GradientReversalLayer, self).__init__(**kwargs)
        self.alpha = alpha

    def call(self, x):
        @tf.custom_gradient
        def _flip_grad(x):
            def grad(dy):
                return -self.alpha * dy
            return x, grad
        return _flip_grad(x)


In [5]:
# -----------------------
# Helper function: Calculate MAE for each group
# -----------------------
def group_mae(true_labels, pred_labels, group_labels, group_names):
    """
    Calculate MAE for each group.
    """
    result = {}
    true_labels = np.array(true_labels).flatten()
    pred_labels = np.array(pred_labels).flatten()
    group_labels = np.array(group_labels).flatten()

    for group_id in np.unique(group_labels):
        mask = group_labels == group_id
        if np.sum(mask) > 0:
            mae = mean_absolute_error(true_labels[mask], pred_labels[mask])
            group_name = group_names.get(int(group_id), f"Group_{group_id}")
            result[group_name] = float(mae)

    return result


In [6]:
# -----------------------
# Prepare data (train/test split)
# -----------------------
X_train, X_test, age_train, age_test, gen_train, gen_test, race_train, race_test = train_test_split(
    images, ages, genders, races, test_size=0.2, random_state=42, shuffle=True
)

# Reshape age labels
age_train = np.array(age_train).reshape(-1, 1).astype('float32')
age_test = np.array(age_test).reshape(-1, 1).astype('float32')

# Process gender labels
gen_train_arr = np.array(gen_train).astype(int)
gen_test_arr = np.array(gen_test).astype(int)
gen_train_cat = to_categorical(gen_train_arr, num_classes=2).astype('float32')
gen_test_cat = to_categorical(gen_test_arr, num_classes=2).astype('float32')
gen_train_labels = gen_train_arr
gen_test_labels = gen_test_arr

# Process race labels
race_train_arr = np.array(race_train).flatten().astype(int)
race_test_arr = np.array(race_test).flatten().astype(int)
race_train_cat = to_categorical(race_train_arr, num_classes=5).astype('float32')
race_test_cat = to_categorical(race_test_arr, num_classes=5).astype('float32')


In [7]:
# -----------------------
# Preprocess images (resize + MobileNet preprocessing)
# -----------------------
def preprocess_images_for_mobilenet(X, target_size=(96, 96)):
    X_tf = tf.convert_to_tensor(X, dtype=tf.float32)
    X_resized = tf.image.resize(X_tf, target_size)
    X_pre = preprocess_input(X_resized)
    return X_pre.numpy()

print("Preprocessing training images...")
X_train_pre = preprocess_images_for_mobilenet(X_train, target_size=(96, 96))

print("Preprocessing test images...")
X_test_pre = preprocess_images_for_mobilenet(X_test, target_size=(96, 96))

print("Prepared image shapes:", X_train_pre.shape, X_test_pre.shape)
print("Label shapes:", age_train.shape, gen_train_cat.shape, race_train_cat.shape)


Preprocessing training images...
Preprocessing test images...
Prepared image shapes: (18964, 96, 96, 3) (4741, 96, 96, 3)
Label shapes: (18964, 1) (18964, 2) (18964, 5)


In [8]:
# -----------------------
# TRAINING FUNCTION FOR W&B SWEEP
# -----------------------
def train_with_sweep():
    """
    Training function that uses wandb.config for hyperparameter values
    """
    # Initialize wandb run for this sweep iteration
    run = wandb.init(project="age-gender-debiasing", reinit=True)

    # Get hyperparameters from wandb sweep config
    config = wandb.config

    batch_size = config.batch_size
    learning_rate = config.learning_rate
    dropout_rate = config.dropout_rate
    adv_loss_weight = config.adv_loss_weight
    n_unfreeze = config.n_unfreeze
    reduce_lr_factor = config.reduce_lr_factor
    reduce_lr_patience = config.reduce_lr_patience
    early_stop_patience = config.early_stop_patience
    weight_decay = config.weight_decay

    total_epochs = 50
    grl_alpha = 0.2
    phase1_fraction = 0.7
    phase1_epochs = int(total_epochs * phase1_fraction)
    phase2_epochs = total_epochs - phase1_epochs
    checkpoint_path = f"difnet96_best_{run.id}.h5"

    print(f"\n{'='*60}")
    print(f"Training with config:")
    print(f"  batch_size: {batch_size}")
    print(f"  learning_rate: {learning_rate}")
    print(f"  adv_loss_weight: {adv_loss_weight}")
    print(f"  dropout_rate: {dropout_rate}")
    print(f"  n_unfreeze: {n_unfreeze}")
    print(f"{'='*60}\n")

    # -----------------------
    # Build model
    # -----------------------
    input_layer = Input(shape=(96, 96, 3))

    mobilenet_base = MobileNetV2(input_shape=(96, 96, 3), include_top=False, weights='imagenet')
    mobilenet_base.trainable = False

    x = mobilenet_base(input_layer)
    x = GlobalAveragePooling2D()(x)

    concept_layer = Dense(128, activation='relu', name='concept_layer')(x)
    shared_dropout = Dropout(dropout_rate)(concept_layer)

    age_output = Dense(1, name='age_output')(shared_dropout)

    grl_gender = GradientReversalLayer(alpha=grl_alpha)(concept_layer)
    gender_output = Dense(2, activation='softmax', name='gender_output')(grl_gender)

    grl_race = GradientReversalLayer(alpha=grl_alpha)(concept_layer)
    race_output = Dense(5, activation='softmax', name='race_output')(grl_race)

    dif_model_full = Model(inputs=input_layer, outputs=[age_output, gender_output, race_output])
    dif_model_age_only = Model(inputs=input_layer, outputs=age_output)

    # -----------------------
    # Callbacks
    # -----------------------
    reduce_lr = ReduceLROnPlateau(
        monitor='val_loss',
        factor=reduce_lr_factor,
        patience=reduce_lr_patience,
        verbose=1,
        min_lr=1e-6
    )
    checkpoint = ModelCheckpoint(
        filepath=checkpoint_path,
        monitor='val_loss',
        save_best_only=True,
        verbose=1
    )
    early_stop = EarlyStopping(
        monitor='val_loss',
        patience=early_stop_patience,
        restore_best_weights=True,
        verbose=1
    )

    wandb_callback = WandbCallback(
    monitor="val_loss",
    mode="min",
    log_weights=False,
    log_batch_frequency=10,
    log_model=False
)

    # -----------------------
    # PHASE 1: Train age-only model
    # -----------------------
    print("\n" + "="*60)
    print("PHASE 1: Training Age-Only Model")
    print("="*60)

    dif_model_age_only.compile(
        optimizer=Adam(learning_rate=learning_rate, weight_decay=weight_decay),
        loss='mse',
        metrics=['mae']
    )

    history1 = dif_model_age_only.fit(
        X_train_pre, age_train,
        validation_data=(X_test_pre, age_test),
        epochs=phase1_epochs,
        batch_size=batch_size,
        callbacks=[reduce_lr, checkpoint, wandb_callback],
        verbose=1
    )

    print("\nPhase 1 completed!")
    print(f"Best validation loss: {min(history1.history['val_loss']):.4f}")

    # -----------------------
    # PHASE 1.5: Warm-start adversarial heads
    # -----------------------
    print("\n" + "="*60)
    print("PHASE 1.5: Warm-Starting Adversarial Heads")
    print("="*60)

    for layer in mobilenet_base.layers:
        layer.trainable = False
    concept_layer.trainable = True
    shared_dropout.trainable = True

    temp_adv_warmup_model = Model(inputs=input_layer, outputs=[gender_output, race_output])
    temp_adv_warmup_model.compile(
        optimizer=Adam(learning_rate=learning_rate, weight_decay=weight_decay),
        loss={'gender_output': 'categorical_crossentropy', 'race_output': 'categorical_crossentropy'},
        metrics={'gender_output': 'accuracy', 'race_output': 'accuracy'}
    )

    history1_5 = temp_adv_warmup_model.fit(
        X_train_pre,
        y={'gender_output': gen_train_cat, 'race_output': race_train_cat},
        epochs=5,
        batch_size=batch_size,
        verbose=1
    )

    print("\nPhase 1.5 completed!")

    # -----------------------
    # PHASE 2: Full model with adversarial training
    # -----------------------
    print("\n" + "="*60)
    print("PHASE 2: Full Model with Adversarial Training")
    print("="*60)

    mobilenet_base.trainable = True
    if n_unfreeze > 0:
        for layer in mobilenet_base.layers[:-n_unfreeze]:
            layer.trainable = False
        for layer in mobilenet_base.layers[-n_unfreeze:]:
            layer.trainable = True

    dif_model_full.compile(
        optimizer=Adam(learning_rate=learning_rate * 0.5, weight_decay=weight_decay),
        loss={
            'age_output': 'mse',
            'gender_output': 'categorical_crossentropy',
            'race_output': 'categorical_crossentropy'
        },
        loss_weights={
            'age_output': 1.0,
            'gender_output': adv_loss_weight,
            'race_output': adv_loss_weight
        },
        metrics={
            'age_output': 'mae',
            'gender_output': 'accuracy',
            'race_output': 'accuracy'
        }
    )

    history2 = dif_model_full.fit(
        X_train_pre,
        {
            'age_output': age_train,
            'gender_output': gen_train_cat,
            'race_output': race_train_cat
        },
        validation_data=(
            X_test_pre,
            {
                'age_output': age_test,
                'gender_output': gen_test_cat,
                'race_output': race_test_cat
            }
        ),
        epochs=phase2_epochs,
        batch_size=batch_size,
        callbacks=[reduce_lr, checkpoint, early_stop, wandb_callback],
        verbose=1
    )

    print("\nPhase 2 completed!")
    print(f"Best validation loss: {min(history2.history['val_loss']):.4f}")

    # Load best weights
    if os.path.exists(checkpoint_path):
        dif_model_full.load_weights(checkpoint_path)
        print(f"Loaded best weights from {checkpoint_path}")

    # -----------------------
    # EVALUATION & RESULTS
    # -----------------------
    print("\n" + "="*60)
    print("EVALUATION RESULTS")
    print("="*60)

    preds = dif_model_full.predict(X_test_pre, batch_size=batch_size, verbose=0)
    pred_ages = np.array(preds[0]).flatten()

    gender_names = {0: 'male', 1: 'female'}
    race_names = {0: 'White', 1: 'Black', 2: 'Asian', 3: 'Indian', 4: 'Others'}

    # Calculate MAE by gender
    gender_mae = group_mae(age_test, pred_ages, gen_test_labels, gender_names)
    print("\nMAE by Gender:")
    for group, mae in gender_mae.items():
        print(f"  {group}: {mae:.4f}")

    # Calculate MAE by race
    race_mae = group_mae(age_test, pred_ages, race_test_arr, race_names)
    print("\nMAE by Race:")
    for group, mae in race_mae.items():
        print(f"  {group}: {mae:.4f}")

    # Calculate MAE by combined groups
    combined = {}
    for r in np.unique(race_test_arr):
        for g in np.unique(gen_test_labels):
            mask = (race_test_arr == r) & (gen_test_labels == g)
            if np.sum(mask) > 0:
                combined[f"{race_names[int(r)]}-{gender_names[int(g)]}"] = float(
                    mean_absolute_error(age_test[mask].flatten(), pred_ages[mask].flatten())
                )

    print("\nMAE by Race-Gender Combination:")
    for group, mae in combined.items():
        print(f"  {group}: {mae:.4f}")

    # Calculate overall metrics
    all_maes = list(gender_mae.values()) + list(race_mae.values()) + list(combined.values())
    worst_case_mae = max(all_maes)
    mean_mae = np.mean(all_maes)
    overall_mae = mean_absolute_error(age_test.flatten(), pred_ages)

    print("\n" + "-"*60)
    print("SUMMARY METRICS:")
    print(f"  Overall MAE: {overall_mae:.4f}")
    print(f"  Mean Group MAE: {mean_mae:.4f}")
    print(f"  Worst-Case MAE: {worst_case_mae:.4f}")
    print("-"*60)

    # Log metrics to wandb
    wandb.log({
        "overall_mae": overall_mae,
        "worst_group_mae": worst_case_mae,
        "mean_group_mae": mean_mae
    })

    # Log gender bias metrics
    for group, mae in gender_mae.items():
        wandb.log({f"gender_mae_{group}": mae})

    # Log race bias metrics
    for group, mae in race_mae.items():
        wandb.log({f"race_mae_{group}": mae})

    # Save the model
    model_path = f"age_gender_debiasing_model_{run.id}.h5"
    dif_model_full.save(model_path)
    print(f"\nModel saved as '{model_path}'")

    # Finish the run
    wandb.finish()

    return worst_case_mae


In [None]:
# -----------------------
# EXECUTE W&B SWEEP WITH GRID SEARCH
# -----------------------
# -----------------------
# EXECUTE W&B SWEEP WITH SMALL GRID
# -----------------------
sweep_config_grid = {
    'method': 'grid',
    'metric': {
        'name': 'worst_group_mae',
        'goal': 'minimize'
    },
    'parameters': {
        'batch_size': {
            'values': [16, 32]
        },
        'learning_rate': {
            'values': [0.0001, 0.0005, 0.001]
        },
        'dropout_rate': {
            'values': [0.25, 0.3, 0.35]
        },
        'adv_loss_weight': {
            'values': [0.005, 0.01, 0.02]
        },
        'n_unfreeze': {
            'values': [25, 30, 35]
        },
        'reduce_lr_factor': {
            'values': [0.5]
        },
        'reduce_lr_patience': {
            'values': [3]
        },
        'early_stop_patience': {
            'values': [5]
        },
        'weight_decay': {
            'values': [0.0001]
        }
    }
}


# Create sweep with grid search
sweep_id = wandb.sweep(sweep_config_grid, project="age-gender-debiasing")
print(f"\n{'='*60}")
print(f"Grid Search Sweep created with ID: {sweep_id}")
print(f"Project: age-gender-debiasing")
print(f"{'='*60}\n")

# Run the sweep
num_trials = 20  # Adjust this to control number of trials
print(f"Starting {num_trials} training runs with grid search...\n")

wandb.agent(sweep_id, function=train_with_sweep, count=num_trials, project="age-gender-debiasing")
andb: ERROR Run w2tau5qt errored: 'Node' object has no attribute 'inbound_layers'

Create sweep with ID: zrzs539m
Sweep URL: https://wandb.ai/lshearer2957-self/age-gender-debiasing/sweeps/zrzs539m

Grid Search Sweep created with ID: zrzs539m
Project: age-gender-debiasing

Starting 20 training runs with grid search...



[34m[1mwandb[0m: Agent Starting Run: bjgm4b8a with config:
[34m[1mwandb[0m: 	adv_loss_weight: 0.005
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	dropout_rate: 0.25
[34m[1mwandb[0m: 	early_stop_patience: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	n_unfreeze: 25
[34m[1mwandb[0m: 	reduce_lr_factor: 0.5
[34m[1mwandb[0m: 	reduce_lr_patience: 3
[34m[1mwandb[0m: 	weight_decay: 0.0001
[34m[1mwandb[0m: [wandb.login()] Loaded credentials for https://api.wandb.ai from /root/.netrc.



Training with config:
  batch_size: 16
  learning_rate: 0.0001
  adv_loss_weight: 0.005
  dropout_rate: 0.25
  n_unfreeze: 25






PHASE 1: Training Age-Only Model
Epoch 1/35


Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/wandb/agents/pyagent.py", line 304, in _run_job
    self._function()
  File "/tmp/ipython-input-2616066220.py", line 109, in train_with_sweep
    history1 = dif_model_age_only.fit(
               ^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/keras/src/utils/traceback_utils.py", line 122, in error_handler
    raise e.with_traceback(filtered_tb) from None
  File "/usr/local/lib/python3.12/dist-packages/wandb/integration/keras/keras.py", line 661, in on_train_batch_end
    wandb.run.summary["graph"] = wandb.Graph.from_keras(self.model)
                                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/wandb/sdk/data_types/graph.py", line 391, in from_keras
    for in_layer in _nest(in_node.inbound_layers):
                          ^^^^^^^^^^^^^^^^^^^^^^
AttributeError: 'Node' object has no attribute 'inbound_layers'



[34m[1mwandb[0m: [32m[41mERROR[0m Run bjgm4b8a errored: 'Node' object has no attribute 'inbound_layers'
[34m[1mwandb[0m: Agent Starting Run: w2tau5qt with config:
[34m[1mwandb[0m: 	adv_loss_weight: 0.005
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	dropout_rate: 0.25
[34m[1mwandb[0m: 	early_stop_patience: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	n_unfreeze: 30
[34m[1mwandb[0m: 	reduce_lr_factor: 0.5
[34m[1mwandb[0m: 	reduce_lr_patience: 3
[34m[1mwandb[0m: 	weight_decay: 0.0001
[34m[1mwandb[0m: [wandb.login()] Loaded credentials for https://api.wandb.ai from /root/.netrc.



Training with config:
  batch_size: 16
  learning_rate: 0.0001
  adv_loss_weight: 0.005
  dropout_rate: 0.25
  n_unfreeze: 30


PHASE 1: Training Age-Only Model
Epoch 1/35


Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/wandb/agents/pyagent.py", line 304, in _run_job
    self._function()
  File "/tmp/ipython-input-2616066220.py", line 109, in train_with_sweep
    history1 = dif_model_age_only.fit(
               ^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/keras/src/utils/traceback_utils.py", line 122, in error_handler
    raise e.with_traceback(filtered_tb) from None
  File "/usr/local/lib/python3.12/dist-packages/wandb/integration/keras/keras.py", line 661, in on_train_batch_end
    wandb.run.summary["graph"] = wandb.Graph.from_keras(self.model)
                                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/wandb/sdk/data_types/graph.py", line 391, in from_keras
    for in_layer in _nest(in_node.inbound_layers):
                          ^^^^^^^^^^^^^^^^^^^^^^
AttributeError: 'Node' object has no attribute 'inbound_layers'



[34m[1mwandb[0m: [32m[41mERROR[0m Run w2tau5qt errored: 'Node' object has no attribute 'inbound_layers'
[34m[1mwandb[0m: Agent Starting Run: dgz5p75j with config:
[34m[1mwandb[0m: 	adv_loss_weight: 0.005
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	dropout_rate: 0.25
[34m[1mwandb[0m: 	early_stop_patience: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	n_unfreeze: 35
[34m[1mwandb[0m: 	reduce_lr_factor: 0.5
[34m[1mwandb[0m: 	reduce_lr_patience: 3
[34m[1mwandb[0m: 	weight_decay: 0.0001
[34m[1mwandb[0m: [wandb.login()] Loaded credentials for https://api.wandb.ai from /root/.netrc.



Training with config:
  batch_size: 16
  learning_rate: 0.0001
  adv_loss_weight: 0.005
  dropout_rate: 0.25
  n_unfreeze: 35


PHASE 1: Training Age-Only Model
