    # Prejudice Remover
This notebook demonstrates the use of the Prejudice Remover algorithm from the FairLib library. The Prejudice Remover is an in-processing fairness algorithm that aims to reduce bias in machine learning models by removing prejudice from the training data.

In [36]:
import sys
import os

# Add the root directory of the project to PYTHONPATH
sys.path.append(os.path.abspath(os.path.join('..')))

In [37]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import openml

from fairlib import DataFrame
from fairlib.inprocessing import PrejudiceRemover
from fairlib.metrics import statistical_parity_difference, disparate_impact

# Set random seed for reproducibility
np.random.seed(42)
torch.manual_seed(42)

<torch._C.Generator at 0x130adb610>

In [38]:
adult_dataset = openml.datasets.get_dataset(179)
adult_X, _, _, _ = adult_dataset.get_data(dataset_format="dataframe")

adult_X.rename(columns={'class': 'income'}, inplace=True)

adult = DataFrame(adult_X)

adult.targets = 'income'
adult.sensitive = ['sex']

adult.drop(columns=["fnlwgt"], inplace=True)

label_maps = {}

for col in adult.columns:
    if adult[col].dtype == 'object' or adult[col].dtype == 'category':
        adult[col], uniques = pd.factorize(adult[col])
        label_maps[col] = uniques

print(f"Dataset Form: {adult.shape}")
print(f"Target Column: {adult.targets}")
print(f"Sensitive Attributes: {adult.sensitive}")

adult.head(50)

Dataset Form: (48842, 14)
Target Column: {'income'}
Sensitive Attributes: {'sex'}


Unnamed: 0,age,workclass,education,education-num,marital-status,occupation,relationship,race,sex,capitalgain,capitalloss,hoursperweek,native-country,income
0,0,0,0,13,0,0,0,0,0,0,0,0,0,0
1,1,1,0,13,1,1,1,0,0,1,0,1,0,0
2,0,2,1,9,2,2,0,0,0,1,0,0,0,0
3,1,2,2,7,1,2,1,1,0,1,0,0,0,0
4,2,2,0,13,1,3,2,1,1,1,0,0,1,0
5,0,2,3,14,1,1,2,0,1,1,0,0,0,0
6,1,2,4,5,3,4,0,1,1,1,0,1,2,0
7,1,1,1,9,1,1,1,0,0,1,0,0,0,1
8,2,2,3,14,0,3,0,0,1,2,0,2,0,1
9,0,2,0,13,1,1,1,0,0,3,0,0,0,1


In [39]:
X = adult.drop(columns=['income'])
y = adult['income'].astype(int)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)


# Convert to tensors for the baseline model
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values.reshape(-1, 1), dtype=torch.float32)
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)


In [40]:
def evaluate_fairness(X_test, y_pred, positive_target=1, favored_class=0):
    """
    Evaluate the fairness metrics (SPD and DI) of the predictions.
    The positive_class and unfavored_class parameters allow you to specify
    which target is considered positive and which is considered unfavored.
    """
    X_test = X_test.copy()
    X_test["income"] = y_pred
    dataset = DataFrame(X_test)
    dataset.targets = "income"
    dataset.sensitive = "sex"

    spd = dataset.statistical_parity_difference()[{'income': positive_target, 'sex': favored_class}]
    di = dataset.disparate_impact()[{'income': positive_target, 'sex': favored_class}]
    return spd, di

In [41]:
class BaseModel(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.net = nn.Sequential(
        nn.Linear(input_dim, 64),
        nn.ReLU(),
        nn.Linear(64, 32),
        nn.ReLU(),
        nn.Linear(32, 16),
        nn.ReLU(),
        nn.Linear(16, 8),
        nn.ReLU(),
        nn.Linear(8, 1),
        nn.Sigmoid(),
    )

    def forward(self, x):
        return self.net(x)


input_dim = X_train_tensor.shape[1]
base_model = BaseModel(input_dim)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(base_model.parameters(), lr=0.001)


In [42]:
epochs = 20
batch_size = 128

for epoch in range(epochs):
    base_model.train()
    epoch_loss = 0.0
    correct = 0

    for i in range(0, len(X_train_tensor), batch_size):
        batch_X = X_train_tensor[i:i + batch_size]
        batch_y = y_train_tensor[i:i + batch_size]

        outputs = base_model(batch_X)
        loss = criterion(outputs, batch_y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()
        predictions = (outputs > 0.5).float()
        correct += (predictions == batch_y).sum().item()

    if (epoch + 1) % 10 == 0:
        accuracy = 100 * correct / len(X_train_tensor)
        print(f"Epoch {epoch + 1:02d} – loss: {epoch_loss:.4f} – acc: {accuracy:.2f}%")

Epoch 10 – loss: 92.8684 – acc: 84.05%
Epoch 20 – loss: 90.7060 – acc: 84.32%


In [43]:
SENSITIVE_COL_NAME = "sex"

etas = [0.0, 0.1, 0.3, 0.5, 0.6, 0.8]

accuracies = {}
spd_values = {}
di_values = {}


# ---- Training ----
train_data = DataFrame(X_train.copy())
train_data['income'] = y_train.values
train_data.targets = 'income'
train_data.sensitive = ['sex']

In [46]:
for eta in etas:
    print(f"\n=== Training Prejudice Remover with η = {eta} ===")

    # fresh copy of the base network
    model = BaseModel(input_dim)
    pr_model = PrejudiceRemover(
        torchModel=model,
        loss=nn.BCELoss(),
        eta=eta,
    )

    pr_model.fit(train_data, epochs=epochs, batch_size=batch_size)

    # ---- Evaluation ----
    X_test_df = DataFrame(X_test.copy())          # keep it as DataFrame
    y_pred = pr_model.predict(X_test_df)                 # tensor
    y_pred_binary = (y_pred > 0.5).float().detach().cpu().numpy()

    # Accuracy
    acc = accuracy_score(y_test, y_pred_binary)
    accuracies[eta] = acc

    spd, di = evaluate_fairness(X_test_df, y_pred_binary, positive_target=1, favored_class=0)
    spd_values[eta] = spd
    di_values[eta] = di

    print(f"Accuracy: {acc:.4f} | SPD: {spd:.4f} | DI: {di:.4f}")


=== Training Prejudice Remover with η = 0.0 ===
Accuracy: 0.8318 | SPD: 0.2343 | DI: 0.2862

=== Training Prejudice Remover with η = 0.1 ===
Accuracy: 0.8287 | SPD: 0.0978 | DI: 0.3376

=== Training Prejudice Remover with η = 0.3 ===
Accuracy: 0.8394 | SPD: 0.1499 | DI: 0.3163

=== Training Prejudice Remover with η = 0.5 ===
Accuracy: 0.8415 | SPD: 0.1953 | DI: 0.2609

=== Training Prejudice Remover with η = 0.6 ===
Accuracy: 0.8419 | SPD: 0.1862 | DI: 0.2923

=== Training Prejudice Remover with η = 0.8 ===
Accuracy: 0.8358 | SPD: 0.1471 | DI: 0.2561


In [47]:
print("\nSummary by η:")
for eta in etas:
    print(
        f"η = {eta:<3}  "
        f"acc = {accuracies[eta]:.4f}  "
        f"SPD = {spd_values[eta]:.4f}  "
        f"DI = {di_values[eta]:.4f}"
    )


Summary by η:
η = 0.0  acc = 0.8318  SPD = 0.2343  DI = 0.2862
η = 0.1  acc = 0.8287  SPD = 0.0978  DI = 0.3376
η = 0.3  acc = 0.8394  SPD = 0.1499  DI = 0.3163
η = 0.5  acc = 0.8415  SPD = 0.1953  DI = 0.2609
η = 0.6  acc = 0.8419  SPD = 0.1862  DI = 0.2923
η = 0.8  acc = 0.8358  SPD = 0.1471  DI = 0.2561
