# Demo Notebook for Intelligent System Engineering (ISE)

In [None]:
import sys
import os

sys.path.append(os.path.abspath(os.path.join('..')))

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import openml

import fairlib as fl

# Set seed for reproducibility
np.random.seed(42)
torch.manual_seed(42)

## Loading and Preparing the Adult Dataset.
We will use the Adult dataset from OpenML, which contains demographic information and predicts whether an individual earns more than $50K per year.

In [None]:
adult_dataset = openml.datasets.get_dataset(179)
adult_dataset, _, _, _ = adult_dataset.get_data(dataset_format="dataframe")

adult_dataset.rename(columns={'class': 'income'}, inplace=True)
adult_dataset.drop(columns=["fnlwgt"], inplace=True)

Conversion of the pandas dataset to the Enhanced Dataset of Fairlib

In [None]:
adult = fl.DataFrame(adult_dataset)

# Setting the target feature and sensitive attributes
adult.targets = 'income'
adult.sensitive = ['sex', 'race']

## Metrics
### Disparate Impact (DI)
**Disparate impact** is a fairness metric in artificial intelligence that refers to indirect discrimination against protected groups (such as gender, race, etc.), even when sensitive attributes are not explicitly used by the model.

It occurs when the likelihood of receiving a favorable outcome (e.g., loan approval) differs significantly between groups, violating the principle of equal treatment.

In [None]:
di = adult.disparate_impact()
print(di)

In [None]:
print("DI value for unprivileged group (such as female):")
print(di[{'income': ">50K", 'sex': 'Male'}]) #

| Disparate Impact Value | Interpretation                                          | Group Benefiting                         |
|------------------------|----------------------------------------------------------|------------------------------------------|
| ❗ < 0.5                | **Extreme disparity** (strong likelihood of bias)        | Privileged group benefits very strongly  |
| 0.5 – 0.8              | **Significant disparity** (likely bias)                  | Privileged group benefits strongly       |
| 0.8 – 0.9              | **Moderate disparity** (potential bias)                  | Privileged group benefits                |
| 0.9 – 1.0              | **Minimal disparity** (slight bias)                      | Privileged group benefits slightly       |
| 1.0                    | **Perfect fairness** (equal impact for both groups)      | Neither (equal outcomes)                 |
| 1.0 – 1.1              | **Minimal disparity** (slight bias)                      | Unprivileged group benefits slightly     |
| 1.1 – 1.2              | **Moderate disparity** (potential bias)                  | Unprivileged group benefits              |
| 1.2 – 1.5              | **Significant disparity** (likely bias)                  | Unprivileged group benefits strongly     |
| > 1.5                  | **Extreme disparity** (strong likelihood of bias)        | Unprivileged group benefits very strongly|

### Statistical Parity Difference (SPD)
**Statistical Parity Difference (SPD)** is a fairness metric that quantifies the difference in the probability of receiving a favorable outcome between a protected group and a reference (unprotected) group.
It reflects whether individuals from different groups are equally likely to receive positive predictions, regardless of their actual qualifications or features.

In [None]:
spd = adult.statistical_parity_difference()
print(spd)

In [None]:
print("SPD value for unprivileged group (such as female):")
print(spd[{'income': ">50K", 'sex': 'Male'}])

| SPD Value        | Interpretation                                    | Group Benefiting                         |
|------------------|----------------------------------------------------|------------------------------------------|
| > 0.2            | **Significant disparity** (likely bias)           | Privileged group benefits strongly       |
| ❗ 0.1 – 0.2 | **Moderate disparity** (potential bias)           | Privileged group benefits                |
| 0.01 – 0.1       | **Minimal disparity** (slight bias)               | Privileged group benefits slightly       |
| 0                | **Perfect fairness** (equal treatment)           | Neither (equal outcomes)                 |
| -0.01 – -0.1     | **Minimal disparity** (slight bias)               | Unprivileged group benefits slightly     |
| -0.1 – -0.2      | **Moderate disparity** (potential bias)           | Unprivileged group benefits              |
| < -0.2           | **Significant disparity** (likely bias)           | Unprivileged group benefits strongly     |

## Dataset Preparation
After performing a preliminary analysis, the algorithms can be used.
This requires a dataset preparation phase.

In [None]:
# Continue by using a single sensitive field to apply the binary classification algorithms
adult.sensitive = ['sex']

label_maps = {}

for col in adult.columns:
    if adult[col].dtype == 'object' or adult[col].dtype == 'category':
        adult[col], uniques = pd.factorize(adult[col])
        label_maps[col] = uniques

print(f"Dataset Form: {adult.shape}")
print(f"Target Column: {adult.targets}")
print(f"Sensitive Attributes: {adult.sensitive}")

adult.head()

In [None]:
print(label_maps['sex']) # Male: 0, Female: 1
print(label_maps['income']) # <=50K: 0, >50K: 1

In [None]:
sex_labels = label_maps['sex'].tolist()
income_labels = label_maps['income'].tolist()

counts = adult.groupby('sex')['income'].value_counts().unstack()

counts.index = [sex_labels[i] for i in counts.index]
counts.columns = [income_labels[i] for i in counts.columns]

plt.figure(figsize=(10, 6))
counts.plot(kind='bar', stacked=True)
plt.title('Distribution of income by gender')
plt.xlabel('Sex')
plt.ylabel('Count')
plt.legend(title='Income')
plt.xticks(rotation=0)
plt.tight_layout()
plt.show()

In [None]:
X = adult.drop(columns='income')
y = adult['income']

# Fairness information is maintained during dataframe operations
print(f"Sensitive Attributes: {X.sensitive}")

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.35, random_state=42
)

# Demo Pre-processing
Pre-processing algorithms aim to modify the dataset before training, this allows the training process not to be interfered with.
The main problem with these algorithms is that they obfuscate features.

Below we see an example, using the reweighing algorithm, which does not go to modify the features but generates weights that can be used in the training process to alert the algorithm to the presence of bias, leaving the features unaffected.

In [None]:
def train_classifier(X_train, y_train, weight=None):
    """
    Train a logistic regression classifier with optional sample weights.
    """
    clf = LogisticRegression(random_state=42, max_iter=1000)
    clf.fit(X_train, y_train, sample_weight=weight)
    return clf

def evaluate_fairness(X_test, y_pred, positive_target=1, favored_class=0):
    """
    Evaluate the fairness metrics (SPD and DI) of the predictions.
    The positive_class and unfavored_class parameters allow you to specify
    which target is considered positive and which is considered unfavored.
    """
    X_test = X_test.copy()
    X_test["income"] = y_pred
    dataset = fl.DataFrame(X_test)
    dataset.targets = "income"
    dataset.sensitive = "sex"

    spd = dataset.statistical_parity_difference()[{'income': positive_target, 'sex': favored_class}]
    di = dataset.disparate_impact()[{'income': positive_target, 'sex': favored_class}]
    return spd, di

To test the effectiveness of the algorithm, a classifier will be trained on the non-preprocessed dataset and then the same classifier will be trained on the preprocessed dataset.

This way we can compare accuracy and correctness metrics.

In [None]:
# Train a baseline classifier without fairness preprocessing
baseline_clf = train_classifier(X_train, y_train)

# Evaluate the baseline model
baseline_pred = baseline_clf.predict(X_test)
baseline_accuracy = accuracy_score(y_test, baseline_pred)
baseline_spd, baseline_di = evaluate_fairness(X_test, baseline_pred)

print(f"Baseline Model Accuracy: {baseline_accuracy:.4f}")
print(f"Baseline Statistical Parity Difference: {baseline_spd}")
print(f"Baseline Disparate Impact: {baseline_di}")

In [None]:
from fairlib import Reweighing

reweighing = Reweighing()
reweighed_df = reweighing.fit_transform(adult)
reweighed_df

In [None]:
X = reweighed_df.drop(columns='income')
y = reweighed_df['income']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.35, random_state=42
)


In [None]:
weights = X_train['weights']

In [None]:
# Train a baseline classifier without fairness preprocessing
clf_trained_with_rew = train_classifier(X_train, y_train, weight=weights)

# Evaluate the baseline model
baseline_rew_pred = clf_trained_with_rew.predict(X_test)
reweighed_accuracy = accuracy_score(y_test, baseline_rew_pred)
reweighed_spd, reweighed_di = evaluate_fairness(X_test, baseline_rew_pred)

print(f"Baseline Model With Rew. Dataset Accuracy: {reweighed_accuracy:.4f}")
print(f"Baseline Model With Rew. Dataset Statistical Parity Difference: {reweighed_spd}")
print(f"Baseline Model With Rew. Disparate Impact: {reweighed_di}")

In [None]:
results = pd.DataFrame({
    'Model': ['Baseline', 'Reweighing'],
    'Accuracy': [baseline_accuracy, reweighed_accuracy],
    'SPD': [abs(baseline_spd), abs(reweighed_spd)],
    'DI': [abs(baseline_di - 1), abs(reweighed_di - 1)]
})

print("Comparison of Models:")
print(results)

_, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(18, 5))

# Accuracy comparison
ax1.bar(results['Model'], results['Accuracy'])
ax1.set_title('Accuracy')
ax1.set_ylim(0.7, 0.9)

# SPD comparison (lower is better)
ax2.bar(results['Model'], results['SPD'])
ax2.set_title('Statistical Parity Difference (lower is better)')
ax2.set_ylim(0, 0.3)

# DI comparison (lower is better)
ax3.bar(results['Model'], results['DI'])
ax3.set_title('Disparate Impact (lower is better)')
ax3.set_ylim(0, 2)

plt.tight_layout()
plt.show()

# Demo In-Processing

In [None]:
adult.drop(columns='weights', inplace=True)  # Remove weights column for in-processing

In [None]:
# Split features and target
X = adult.drop(columns=['income'])
y = adult['income']

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

In [None]:
print(f"Training set shape: {X_train.shape}")
print(f"Testing set shape: {X_test.shape}")

EPOCHS = 50

To test the effectiveness of the algorithm, a model is created by specifying the parameter `lambda_adv` to 0. This allows obtaining a model that does not apply the principle of impartiality.

Next, we proceed to train the same model by increasing the value of `lambda_adv` to 1. This adds a constraint to the model, increasing the impartiality.

In [None]:
from fairlib import AdversarialDebiasing

baseline_model = AdversarialDebiasing(
    input_dim=X_train.shape[1],
    hidden_dim=8,
    output_dim=1,
    sensitive_dim=1,
    lambda_adv=0, # No fairness intervention, baseline model
)

baseline_model.fit(X_train, y_train, num_epochs=EPOCHS)
y_pred = baseline_model.predict(X_test).detach().cpu().numpy()

In [None]:
baseline_accuracy = accuracy_score(y_test.values, y_pred)
baseline_spd, baseline_di = evaluate_fairness(X_test, y_pred)

In [None]:
print(f"Baseline model accuracy: {baseline_accuracy}")
print(f"Statistical Parity Difference (SPD): {baseline_spd}")
print(f"Disparate Impact (DI): {baseline_di}")

## Application of Adversarial Debiasing

In [None]:
from fairlib import AdversarialDebiasing

fair_model = AdversarialDebiasing(
    input_dim=X_train.shape[1],
    hidden_dim=8,
    output_dim=1,
    sensitive_dim=1,
    lambda_adv=1, # Fairness intervention
)

fair_model.fit(X_train, y_train, num_epochs=EPOCHS)
y_pred = fair_model.predict(X_test).detach().cpu().numpy()

In [None]:
adv_accuracy = accuracy_score(y_test.values, y_pred)
adv_spd, adv_di = evaluate_fairness(X_test, y_pred)

In [None]:
print(f"Fair model accuracy: {adv_accuracy}")
print(f"Statistical Parity Difference (SPD): {adv_spd}")
print(f"Disparate Impact (DI): {adv_di}")

In [None]:
results = pd.DataFrame({
    'Model': ['Baseline', 'Adv Debiasing'],
    'Accuracy': [baseline_accuracy, adv_accuracy],
    'SPD': [abs(baseline_spd), abs(adv_spd)],
    'DI': [abs(baseline_di - 1), abs(adv_di - 1)]
})

print("Comparison of Models:")
print(results)

# Visualize the results
_, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(18, 5))

# Accuracy comparison
ax1.bar(results['Model'], results['Accuracy'])
ax1.set_title('Accuracy')
ax1.set_ylim(0.7, 0.9)

# SPD comparison (lower is better)
ax2.bar(results['Model'], results['SPD'])
ax2.set_title('Statistical Parity Difference (lower is better)')
ax2.set_ylim(0, 0.3)

# DI comparison (lower is better)
ax3.bar(results['Model'], results['DI'])
ax3.set_title('Disparate Impact (lower is better)')
ax3.set_ylim(0, 2)

plt.tight_layout()
plt.show()