# Evaluating Fairness in Machine Learning: Comparative Analysis and Benchmarking of Fairlearn and AIF360 | German Credit Dataset

In [15]:
import os
import random

import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from catboost import CatBoostClassifier

from src.constants import RANDOM_STATE, DEFAULT_MODEL_CONFIG

## Ensure reproducibility

Set random seeds for reproducibility.

In [27]:
np.random.seed(RANDOM_STATE)
os.environ["PYTHONHASHSEED"] = str(RANDOM_STATE)
random.seed(RANDOM_STATE)

## Load data

In [28]:
TARGET = "Creditability"
PROTECTED_ATTRIBUTE = "Sex & Marital Status"

In [29]:
data = pd.read_csv("../data/german_credit_data.csv")

In [30]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 21 columns):
 #   Column                             Non-Null Count  Dtype
---  ------                             --------------  -----
 0   Creditability                      1000 non-null   int64
 1   Account Balance                    1000 non-null   int64
 2   Duration of Credit (month)         1000 non-null   int64
 3   Payment Status of Previous Credit  1000 non-null   int64
 4   Purpose                            1000 non-null   int64
 5   Credit Amount                      1000 non-null   int64
 6   Value Savings/Stocks               1000 non-null   int64
 7   Length of current employment       1000 non-null   int64
 8   Instalment per cent                1000 non-null   int64
 9   Sex & Marital Status               1000 non-null   int64
 10  Guarantors                         1000 non-null   int64
 11  Duration in Current address        1000 non-null   int64
 12  Most valuable availab

In [31]:
y = data.loc[:, TARGET]
z = data.loc[:, PROTECTED_ATTRIBUTE]
X = data.drop(columns=TARGET)

In [32]:
X_train, X_test, y_train, y_test, z_train, z_test = train_test_split(X, y, z, test_size=0.2, random_state=RANDOM_STATE, stratify=y)

In [33]:
MODEL_CONFIG = dict(DEFAULT_MODEL_CONFIG, cat_features=X.select_dtypes("object").columns.to_list())

In [34]:
model = CatBoostClassifier(**MODEL_CONFIG)

In [35]:
model.fit(X_train, y_train)

0:	learn: 0.6899835	total: 140ms	remaining: 6m 59s
250:	learn: 0.4704244	total: 1.68s	remaining: 18.4s
500:	learn: 0.4235692	total: 3.88s	remaining: 19.4s
750:	learn: 0.3888618	total: 20.6s	remaining: 1m 1s
1000:	learn: 0.3559247	total: 22.1s	remaining: 44.1s
1250:	learn: 0.3269348	total: 22.9s	remaining: 32s
1500:	learn: 0.3030500	total: 24.1s	remaining: 24.1s
1750:	learn: 0.2826410	total: 25.3s	remaining: 18.1s
2000:	learn: 0.2646425	total: 26.6s	remaining: 13.3s
2250:	learn: 0.2486124	total: 28.1s	remaining: 9.34s
2500:	learn: 0.2337442	total: 29.3s	remaining: 5.85s
2750:	learn: 0.2201144	total: 31s	remaining: 2.8s
2999:	learn: 0.2080692	total: 32.6s	remaining: 0us


<catboost.core.CatBoostClassifier at 0x1feacf66f10>

In [36]:
y_pred = model.predict(X_test)

## Detection

In [37]:
from fairlearn.metrics import demographic_parity_ratio, demographic_parity_difference, equalized_odds_ratio

In [38]:
demographic_parity_ratio(y_test, y_pred, sensitive_features=z_test)


0.6176470588235294

## Mitigation