In [1]:
!pip install fairlearn


Collecting fairlearn
  Downloading fairlearn-0.13.0-py3-none-any.whl.metadata (7.3 kB)
Collecting scipy<1.16.0,>=1.9.3 (from fairlearn)
  Downloading scipy-1.15.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.0/62.0 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
Downloading fairlearn-0.13.0-py3-none-any.whl (251 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m251.6/251.6 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading scipy-1.15.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (37.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m37.3/37.3 MB[0m [31m18.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: scipy, fairlearn
  Attempting uninstall: scipy
    Found existing installation: scipy 1.16.3
    Uninstalling scipy-1.16.3:
      Successfully uninstalled scipy-1.16.3
Successfully installed fairlearn-0.13.0 sc

In [2]:
import pandas as pd
import numpy as np

from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

from fairlearn.metrics import MetricFrame, selection_rate, true_positive_rate
from fairlearn.reductions import ExponentiatedGradient, DemographicParity


In [3]:
adult = fetch_openml('adult', version=2, as_frame=True)
df = adult.frame
df.head()


Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,class
0,25,Private,226802,11th,7,Never-married,Machine-op-inspct,Own-child,Black,Male,0,0,40,United-States,<=50K
1,38,Private,89814,HS-grad,9,Married-civ-spouse,Farming-fishing,Husband,White,Male,0,0,50,United-States,<=50K
2,28,Local-gov,336951,Assoc-acdm,12,Married-civ-spouse,Protective-serv,Husband,White,Male,0,0,40,United-States,>50K
3,44,Private,160323,Some-college,10,Married-civ-spouse,Machine-op-inspct,Husband,Black,Male,7688,0,40,United-States,>50K
4,18,,103497,Some-college,10,Never-married,,Own-child,White,Female,0,0,30,United-States,<=50K


In [17]:
# Reload dataset (clean start)
adult = fetch_openml('adult', version=2, as_frame=True)
df = adult.frame

# Remove missing values
df = df.replace('?', np.nan).dropna()

# Separate target first
y = df['class'].apply(lambda x: 1 if x == '>50K' else 0)

# Remove target from features
X = df.drop('class', axis=1)

# Convert categorical columns to numbers safely
X = pd.get_dummies(X, drop_first=True)

# Sensitive attribute (gender)
sensitive_feature = df['sex'].apply(lambda x: 1 if x == 'Male' else 0)


In [19]:
X_train, X_test, y_train, y_test, sf_train, sf_test = train_test_split(
    X, y, sensitive_feature, test_size=0.3, random_state=42
)


In [20]:
baseline_model = LogisticRegression(max_iter=1000)
baseline_model.fit(X_train, y_train)

y_pred = baseline_model.predict(X_test)

print("Baseline Accuracy:", accuracy_score(y_test, y_pred))


Baseline Accuracy: 0.8449915235497899


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [21]:
metric_frame = MetricFrame(
    metrics={
        "Accuracy": accuracy_score,
        "Selection Rate": selection_rate,
        "True Positive Rate": true_positive_rate
    },
    y_true=y_test,
    y_pred=y_pred,
    sensitive_features=sf_test
)

metric_frame.by_group


Unnamed: 0_level_0,Accuracy,Selection Rate,True Positive Rate
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.922012,0.084449,0.528926
1,0.808838,0.250514,0.596411


In [22]:
mitigator = ExponentiatedGradient(
    LogisticRegression(max_iter=1000),
    DemographicParity()
)

mitigator.fit(X_train, y_train, sensitive_features=sf_train)
y_pred_mitigated = mitigator.predict(X_test)


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [23]:
print("Mitigated Accuracy:", accuracy_score(y_test, y_pred_mitigated))

metric_frame_mitigated = MetricFrame(
    metrics={
        "Accuracy": accuracy_score,
        "Selection Rate": selection_rate,
        "True Positive Rate": true_positive_rate
    },
    y_true=y_test,
    y_pred=y_pred_mitigated,
    sensitive_features=sf_test
)

metric_frame_mitigated.by_group


Mitigated Accuracy: 0.8290705388074003


Unnamed: 0_level_0,Accuracy,Selection Rate,True Positive Rate
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.88671,0.16682,0.739669
1,0.802015,0.179357,0.46974


In [24]:
print("Selection Rate Difference (Before):",
      metric_frame.difference(method='between_groups')['Selection Rate'])

print("Selection Rate Difference (After):",
      metric_frame_mitigated.difference(method='between_groups')['Selection Rate'])


Selection Rate Difference (Before): 0.16606591262825987
Selection Rate Difference (After): 0.012536166319412345
