In [None]:
pip install fairlearn

Collecting fairlearn
  Downloading fairlearn-0.12.0-py3-none-any.whl.metadata (7.0 kB)
Downloading fairlearn-0.12.0-py3-none-any.whl (240 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m240.0/240.0 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: fairlearn
Successfully installed fairlearn-0.12.0


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from fairlearn.metrics import MetricFrame, true_positive_rate, false_positive_rate

In [None]:
df = pd.read_csv('compas-scores-two-years.csv')

In [None]:
df.columns

Index(['id', 'name', 'first', 'last', 'compas_screening_date', 'sex', 'dob',
       'age', 'age_cat', 'race', 'juv_fel_count', 'decile_score',
       'juv_misd_count', 'juv_other_count', 'priors_count',
       'days_b_screening_arrest', 'c_jail_in', 'c_jail_out', 'c_case_number',
       'c_offense_date', 'c_arrest_date', 'c_days_from_compas',
       'c_charge_degree', 'c_charge_desc', 'is_recid', 'r_case_number',
       'r_charge_degree', 'r_days_from_arrest', 'r_offense_date',
       'r_charge_desc', 'r_jail_in', 'r_jail_out', 'violent_recid',
       'is_violent_recid', 'vr_case_number', 'vr_charge_degree',
       'vr_offense_date', 'vr_charge_desc', 'type_of_assessment',
       'decile_score.1', 'score_text', 'screening_date',
       'v_type_of_assessment', 'v_decile_score', 'v_score_text',
       'v_screening_date', 'in_custody', 'out_custody', 'priors_count.1',
       'start', 'end', 'event', 'two_year_recid'],
      dtype='object')

In [None]:
# Keep only the needed columns
df = df[df['race'].isin(['African-American', 'Caucasian'])]
df = df.dropna(subset=['age', 'priors_count', 'c_charge_degree', 'two_year_recid'])

In [None]:
# One-hot encode the categorical column
df = pd.get_dummies(df, columns=['c_charge_degree'], drop_first=True)

In [None]:
# Select all required columns
features = ['age', 'priors_count'] + [col for col in df.columns if 'c_charge_degree' in col]
X = df[features]
y = df['two_year_recid'].astype(int)
race = df['race']

In [None]:
X_train, X_test, y_train, y_test, race_train, race_test = \
    train_test_split(X, y, race, test_size=0.3, stratify=race)

In [None]:
clf = LogisticRegression(max_iter=1000)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

In [None]:
from fairlearn.metrics import (
    MetricFrame,
    true_positive_rate,
    false_positive_rate,
    selection_rate,
    demographic_parity_difference,
    equalized_odds_difference,
    false_negative_rate,
    true_negative_rate
)

In [None]:
metric_frame = MetricFrame(
    metrics={
        'TPR': true_positive_rate,
        'FPR': false_positive_rate,
        'FNR': false_negative_rate,
        'Selection Rate': selection_rate
    },
    y_true=y_test,
    y_pred=y_pred,
    sensitive_features=race_test
)

print("Fairness Metrics by Race Group:\n", metric_frame.by_group)

Fairness Metrics by Race Group:
                        TPR       FPR       FNR  Selection Rate
race                                                          
African-American  0.633803  0.308688  0.366197        0.475203
Caucasian         0.406143  0.155756  0.593857        0.255435


In [None]:
tpr_gap = abs(0.665 - 0.481)  # ~0.184
fpr_gap = abs(0.342 - 0.146)  # ~0.196
selection_gap = abs(0.505 - 0.274)  # ~0.231