# Evaluating Fairness in Machine Learning: Comparative Analysis and Benchmarking of Fairlearn and AIF360 | COMPAS Recidivism Dataset

In [112]:
import os
import random

import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from catboost import CatBoostClassifier

from src.constants import RANDOM_STATE, DEFAULT_MODEL_CONFIG

## Ensure reproducibility

Set random seeds for reproducibility.

In [114]:
np.random.seed(RANDOM_STATE)
os.environ["PYTHONHASHSEED"] = str(RANDOM_STATE)
random.seed(RANDOM_STATE)

## Load data

In [115]:
FEATURES = ["sex",
            "age",
            "age_cat",
            "race",
            "juv_fel_count",
            "juv_misd_count",
            "juv_other_count",
            "priors_count",
            "days_b_screening_arrest",
            "c_days_from_compas",
            "c_charge_degree",
            "decile_score.1",
            "score_text",
            "v_type_of_assessment",
            "v_decile_score",
            "v_score_text",
            "end",
            ]
TARGET = ["is_recid"]

In [116]:
data = pd.read_csv("../data/compas-scores-two-years.csv", usecols=FEATURES + TARGET)

In [117]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7214 entries, 0 to 7213
Data columns (total 18 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   sex                      7214 non-null   object 
 1   age                      7214 non-null   int64  
 2   age_cat                  7214 non-null   object 
 3   race                     7214 non-null   object 
 4   juv_fel_count            7214 non-null   int64  
 5   juv_misd_count           7214 non-null   int64  
 6   juv_other_count          7214 non-null   int64  
 7   priors_count             7214 non-null   int64  
 8   days_b_screening_arrest  6907 non-null   float64
 9   c_days_from_compas       7192 non-null   float64
 10  c_charge_degree          7214 non-null   object 
 11  is_recid                 7214 non-null   int64  
 12  decile_score.1           7214 non-null   int64  
 13  score_text               7214 non-null   object 
 14  v_type_of_assessment    

In [118]:
y = data.loc[:, *TARGET]
z = data.loc[:, "race"]
X = data.drop(columns=TARGET)

In [101]:
X_train, X_test, y_train, y_test, z_train, z_test = train_test_split(X, y, z, test_size=0.2, random_state=RANDOM_STATE, stratify=y)

In [119]:
MODEL_CONFIG = dict(DEFAULT_MODEL_CONFIG, cat_features=X.select_dtypes("object").columns.to_list())

In [120]:
model = CatBoostClassifier(**MODEL_CONFIG)

In [121]:
model.fit(X_train, y_train)

0:	learn: 0.6844627	total: 20ms	remaining: 60s
250:	learn: 0.3213289	total: 18.4s	remaining: 3m 21s
500:	learn: 0.3093007	total: 24.8s	remaining: 2m 3s
750:	learn: 0.3036685	total: 39.5s	remaining: 1m 58s
1000:	learn: 0.2989033	total: 59s	remaining: 1m 57s
1250:	learn: 0.2949101	total: 1m 20s	remaining: 1m 51s
1500:	learn: 0.2914104	total: 1m 31s	remaining: 1m 31s
1750:	learn: 0.2886387	total: 2m 3s	remaining: 1m 28s
2000:	learn: 0.2861932	total: 2m 16s	remaining: 1m 8s
2250:	learn: 0.2838329	total: 2m 26s	remaining: 48.8s
2500:	learn: 0.2818264	total: 2m 32s	remaining: 30.5s
2750:	learn: 0.2797286	total: 2m 39s	remaining: 14.4s
2999:	learn: 0.2777593	total: 2m 45s	remaining: 0us


<catboost.core.CatBoostClassifier at 0x1c08c7a4150>

In [108]:
y_pred = model.predict(X_test)

## Detection

In [109]:
from fairlearn.metrics import demographic_parity_ratio, demographic_parity_difference, equalized_odds_ratio

In [110]:
demographic_parity_ratio(y_test, y_pred, sensitive_features=z_test)


0.33783783783783783

## Mitigation