In [1]:
from fairlearn.reductions import DemographicParity, ExponentiatedGradient, UtilityParity
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
from sklearn.model_selection import train_test_split
import solas_disparity as sd
np.random.seed(31415)

In [2]:
n_obs = 200000
x = np.random.randn(n_obs, 5)
r = np.random.binomial(1, 0.5, size=n_obs)
e = np.random.logistic(size=n_obs)
e = e - np.mean(e)
data = pd.DataFrame(
    data={
        "x1": x[:, 0],
        "x2": x[:, 1],
        "x3": x[:, 2],
        "x4": x[:, 3],
        "x5": x[:, 4],
        "minority": r,
        "e": e,
    }
)
data.loc[data['minority'] == 1, 'x1'] = data.loc[data['minority'] == 1, 'x1'] + 1
data['majority'] = 1 - data['minority']
data['latent'] = 1 + data['x1'] + data['x2'] + data['x3'] + data['x4'] + data['x5'] + data['e']
data['good'] = np.where(data['latent'] > 0, 0, 1)
data['sample_weight'] = 1


features = ['x1', 'x2', 'x3', 'x4', 'x5']
label = 'good'

data.sample(n=3)

Unnamed: 0,x1,x2,x3,x4,x5,minority,e,majority,latent,good,sample_weight
35679,-1.575776,0.024258,0.399403,0.541486,-0.031129,0,-1.510256,1,-1.152013,1,1
168142,0.769226,-1.28656,-1.169726,-0.828986,0.505077,1,0.889957,0,-0.121013,1,1
94499,-0.113705,-0.616835,1.67877,-0.233238,-0.075,0,-1.35018,1,0.289812,0,1


In [3]:
pd.crosstab(data['good'], data['minority'], normalize='columns')

minority,0,1
good,Unnamed: 1_level_1,Unnamed: 2_level_1
0,0.638174,0.761222
1,0.361826,0.238778


In [4]:
train, valid = train_test_split(data, test_size=0.2, random_state=31415)
train = train.copy()
valid = valid.copy()

In [5]:
classifier = LogisticRegression(penalty=None).fit(X=train[features], y=train[label], sample_weight=train['sample_weight'])
pd.DataFrame(data=np.append(classifier.coef_, classifier.intercept_), index=features + ['intercept'], columns=['Coefficient'])

Unnamed: 0,Coefficient
x1,-1.021461
x2,-1.015294
x3,-1.008136
x4,-1.007174
x5,-1.007324
intercept,-1.003494


Fairlearn only allows for .predict(), which uses a 0.50 score cutoff to determine outcomes. This is entirely unrealistic for most real-world use cases -- especially in consumer finance.

In [6]:
# Note use of .predict() which uses a 0.50 threshold (as opposed to .predict_proba(), which outputs the pseudo-probabilities)
train['biased_offer'] = classifier.predict(X=train[features])
valid['biased_offer'] = classifier.predict(X=valid[features])

train['biased_prediction'] = classifier.predict_proba(X=train[features])[:, 1]
valid['biased_prediction'] = classifier.predict_proba(X=valid[features])[:, 1]

In [7]:
auc_train = metrics.roc_auc_score(train['good'], train['biased_prediction'])
auc_valid = metrics.roc_auc_score(valid['good'], valid['biased_prediction'])
print(
    f"Baseline Model AUC:"
    f"\nTraining:   {auc_train:0.3f}"
    f"\nValidation: {auc_valid:0.3f}"
)

Baseline Model AUC:
Training:   0.888
Validation: 0.886


In [8]:
baseline_air = sd.adverse_impact_ratio(
    group_data=valid,
    protected_groups=['minority'],
    reference_groups=['majority'],
    group_categories=['race'],
    outcome=valid['biased_offer'],
    air_threshold=0.90,
    percent_difference_threshold=0.00,
)
sd.ui.show(baseline_air.summary_table[['Percent Favorable', 'Percent Difference Favorable', 'AIR', 'P-Values']])

Group,Percent Favorable,Percent Difference Favorable,AIR,P-Values
minority,18.27%,14.78%,0.553,0.0
majority,33.04%,,,


In [9]:
drop_var = LogisticRegression(penalty=None).fit(X=train[['x2', 'x3', 'x4', 'x5']], y=train[label], sample_weight=train['sample_weight'])
display(pd.DataFrame(data=np.append(drop_var.coef_, drop_var.intercept_), index=np.append(drop_var.feature_names_in_, 'Intercept'), columns=['Coefficient']))

train['drop_var_offer'] = drop_var.predict(X=train[['x2', 'x3', 'x4', 'x5']])
valid['drop_var_offer'] = drop_var.predict(X=valid[['x2', 'x3', 'x4', 'x5']])


train['drop_var_prediction'] = drop_var.predict_proba(X=train[['x2', 'x3', 'x4', 'x5']])[:, 1]
valid['drop_var_prediction'] = drop_var.predict_proba(X=valid[['x2', 'x3', 'x4', 'x5']])[:, 1]

auc_train = metrics.roc_auc_score(train['good'], train['drop_var_prediction'])
auc_valid = metrics.roc_auc_score(valid['good'], valid['drop_var_prediction'])
print(
    f"\nDropped Variable Model AUC:"
    f"\nTraining:   {auc_train:0.3f}"
    f"\nValidation: {auc_valid:0.3f}"
)

Unnamed: 0,Coefficient
x2,-0.840788
x3,-0.833994
x4,-0.831966
x5,-0.824482
Intercept,-1.24498



Dropped Variable Model AUC:
Training:   0.835
Validation: 0.832


In [10]:
drop_var_air = sd.adverse_impact_ratio(
    group_data=valid,
    protected_groups=['minority'],
    reference_groups=['majority'],
    group_categories=['race'],
    outcome=valid['drop_var_offer'],
    air_threshold=0.90,
    percent_difference_threshold=0.00,
)
sd.ui.show(drop_var_air.summary_table[['Percent Favorable', 'Percent Difference Favorable', 'AIR', 'P-Values']])

Group,Percent Favorable,Percent Difference Favorable,AIR,P-Values
minority,22.03%,0.35%,0.984,0.403
majority,22.38%,,,


In [11]:
constraints = DemographicParity(ratio_bound=0.90)
mitigator = ExponentiatedGradient(
    estimator=classifier,
    constraints=constraints,
    max_iter=1000,
    eps=0.001,
    eta0=0.01,
)
mitigator.fit(train[features], train[label], sensitive_features=train['minority'])
mitigator.best_iter_

8

In [12]:
train['debiased_offer'] = mitigator.predict(train[features])
valid['debiased_offer'] = mitigator.predict(valid[features])

In [13]:
# sign is whether you're adding or subtracting from the class (group_id)
mitigator.constraints.gamma(lambda x: train['biased_prediction'].values)

sign  event  group_id
+     all    0           0.025426
             1          -0.085683
-     all    0          -0.091698
             1           0.031757
dtype: float64

In [14]:
debiased_air = sd.adverse_impact_ratio(
    group_data=valid,
    protected_groups=['minority'],
    reference_groups=['majority'],
    group_categories=['race'],
    outcome=valid['debiased_offer'],
    air_threshold=0.90,
    percent_difference_threshold=0.00,
)
sd.ui.show(debiased_air.summary_table[['Percent Favorable', 'Percent Difference Favorable', 'AIR', 'P-Values']])

Group,Percent Favorable,Percent Difference Favorable,AIR,P-Values
minority,22.43%,5.39%,0.806,0.0
majority,27.82%,,,


In [15]:
def precision_recall(pr_data, group):
    print(
        f"\n{group}: Offers at 0.50 Score Cutoff"
        f"\n\nOriginal Model Offers"
        f"\nRecall:    {metrics.recall_score(y_pred=pr_data['biased_offer'], y_true=pr_data['good'], sample_weight=pr_data['sample_weight']):0.4f}"
        f"\nPrecision: {metrics.precision_score(y_pred=pr_data['biased_offer'], y_true=pr_data['good'], sample_weight=pr_data['sample_weight']):0.4f}"

        f"\n\nDropped-Variable Model Offers"
        f"\nRecall:    {metrics.recall_score(y_pred=pr_data['drop_var_offer'], y_true=pr_data['good'], sample_weight=pr_data['sample_weight']):0.4f}"
        f"\nPrecision: {metrics.precision_score(y_pred=pr_data['drop_var_offer'], y_true=pr_data['good'], sample_weight=pr_data['sample_weight']):0.4f}"

        f"\n\nFairlearn Debiased Model Offers"
        f"\nRecall:    {metrics.recall_score(y_pred=pr_data['debiased_offer'], y_true=pr_data['good'], sample_weight=pr_data['sample_weight']):0.4f}"
        f"\nPrecision: {metrics.precision_score(y_pred=pr_data['debiased_offer'], y_true=pr_data['good'], sample_weight=pr_data['sample_weight']):0.4f}"
    )

In [16]:
precision_recall(valid, 'All Observations') 


All Observations: Offers at 0.50 Score Cutoff

Original Model Offers
Recall:    0.6391
Precision: 0.7469

Dropped-Variable Model Offers
Recall:    0.5137
Precision: 0.6929

Fairlearn Debiased Model Offers
Recall:    0.5971
Precision: 0.7121


In [17]:
precision_recall(valid.loc[valid['minority'] == 1, :], 'Minority Observations')


Minority Observations: Offers at 0.50 Score Cutoff

Original Model Offers
Recall:    0.5604
Precision: 0.7302

Dropped-Variable Model Offers
Recall:    0.5637
Precision: 0.6091

Fairlearn Debiased Model Offers
Recall:    0.6088
Precision: 0.6461


In [18]:
precision_recall(valid.loc[valid['minority'] == 0, :], 'Non-Minority Observations')


Non-Minority Observations: Offers at 0.50 Score Cutoff

Original Model Offers
Recall:    0.6912
Precision: 0.7561

Dropped-Variable Model Offers
Recall:    0.4805
Precision: 0.7760

Fairlearn Debiased Model Offers
Recall:    0.5893
Precision: 0.7657
