# Measuring Discrimination with SolasAI

In [1]:
# In some environments, plotly does not render properly.  If this is the case, run the following code:
# import plotly.io as pio
# pio.renderers.default = "svg"

In [2]:
import solas_disparity as sd

In [3]:
import numpy as np
import pandas as pd
from sklearn import metrics
from sklearn.model_selection import train_test_split
import xgboost as xgb
from IPython.display import display, HTML

display(HTML("<style>.container { width:100% !important; }</style>"))
pd.set_option('display.max_columns', 500)
np.random.seed(271828)

## Importing Data and Building a Model

In [4]:
df = pd.read_csv("hmda.csv.gz", index_col="id")
df.sample(random_state=161803, n=5)

Unnamed: 0_level_0,Low-Priced,Interest Rate,Rate Spread,Loan Amount,Loan-to-Value Ratio,No Intro Rate Period,Intro Rate Period,Property Value,Income,Debt-to-Income Ratio,Term 360,Conforming,State,Product Type,Black,Asian,White,Native American,Hawaiian Or Pacific Islander,Hispanic,Non-Hispanic,Male,Female,Age >= 62,Age < 62,Race,Ethnicity,Sex
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
13451,1.0,0.04875,0.00596,155000.0,0.97,1,0,165000.0,35000.0,0.33,1.0,1.0,FL,conventional,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,,,White,Hispanic,Male
18248,1.0,0.0575,0.01268,305000.0,1.0,1,0,295000.0,60000.0,0.55,1.0,1.0,CO,va,,,,,,,,,,,,Unknown,Unknown,Unknown
19610,1.0,0.055,0.01214,485000.0,0.95,1,0,515000.0,100000.0,0.43,1.0,1.0,CO,conventional,0.0,0.0,1.0,0.0,0.0,0.0,1.0,,,1.0,0.0,White,Non-Hispanic,Unknown
3339,1.0,0.03875,-0.00087,675000.0,1.0,1,0,675000.0,190000.0,0.33,1.0,1.0,VA,va,1.0,0.0,0.0,0.0,0.0,0.0,1.0,,,0.0,1.0,Black,Non-Hispanic,Unknown
19675,1.0,0.04375,0.00076,275000.0,0.3507,1,0,775000.0,209000.0,0.25,1.0,1.0,AZ,conventional,0.0,0.0,1.0,0.0,0.0,0.0,1.0,,,0.0,1.0,White,Non-Hispanic,Unknown


In [5]:
features = [
    "Loan Amount",
    "Loan-to-Value Ratio",
    "Intro Rate Period",
    "Property Value",
    "Income",
    "Debt-to-Income Ratio",
    "Term 360",
    "Conforming",
]
label = "Low-Priced"

df['train'] = np.random.choice(a=['train', 'valid'], replace=True, size=len(df), p=[0.8, 0.2])
train = (df['train'] == 'train')


pd.crosstab(df[label], df['train'])

train,train,valid
Low-Priced,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,1517,397
1.0,14409,3677


In [6]:

def build_model(params, prediction_name):

    xgb_classifier = xgb.XGBClassifier(**params).fit(X=df.loc[train, features], y=df.loc[train, label])
    df.loc[train, prediction_name] = xgb_classifier.predict_proba(df.loc[train, features])[:, 1]
    df.loc[~train, prediction_name] = xgb_classifier.predict_proba(df.loc[~train, features])[:, 1]


    auc_train = metrics.roc_auc_score(y_score=df.loc[train, prediction_name], y_true=df.loc[train, label])
    auc_valid = metrics.roc_auc_score(y_score=df.loc[~train, prediction_name], y_true=df.loc[~train, label])

    print(
        f"\n************************"
        f"\n**** Model ROC-AUC: ****"
        f"\nTraining:          {auc_train:0.3f}"
        f"\nValidation:        {auc_valid:0.3f}"
        f"\nPercent Change:   {auc_valid / auc_train - 1: 0.2%}"
        f"\n************************"
    )

In [7]:
baseline_params = dict(
    objective="binary:logistic",
    max_depth=3,  # 4
    learning_rate=0.02,
    n_estimators=200,  # 150
    base_score=df.loc[train, label].mean(),
    random_state=31415,
    # seed=61803,
)
build_model(params=baseline_params, prediction_name='predictions')



************************
**** Model ROC-AUC: ****
Training:          0.865
Validation:        0.842
Percent Change:   -2.69%
************************


In [8]:
df.loc[train, 'predictions'].describe()

cutoff = 0.90

df['Gets Offer'] = (df['predictions'] > cutoff).astype(int)
df['Gets Offer'].value_counts(dropna=False, normalize=True)

1    0.6882
0    0.3118
Name: Gets Offer, dtype: float64

In [9]:
common_info_for_testing = dict(
    group_data=df.loc[~train, :],
    protected_groups=["Black", "Asian", "Native American", "Hispanic", "Female"],
    reference_groups=["White", "White", "White", "Non-Hispanic", "Male"],
    group_categories=["Race", "Race", "Race", "Ethnicity", "Sex"],
)

## Adverse Impact Ratio (AIR)

In [10]:
air = sd.adverse_impact_ratio(
    **common_info_for_testing,
    outcome=df.loc[~train, 'Gets Offer'],
    air_threshold=0.8,
    percent_difference_threshold=0.0,
)

In [11]:
air

## Disparity Calculation: Adverse Impact Ratio

\* Percent Missing: Ethnicity: 14.63%, Race: 14.33%, Sex: 45.24%

## Adverse Impact Ratio Summary Table

Group Category,Group,Reference Group,Observations,Percent Missing,Total,Favorable,Percent Favorable,Percent Difference Favorable,AIR,P-Values,Practically Significant,Shortfall
Race,Black,White,3490,14.33%,269.0,107.0,39.78%,28.93%,0.579,0.0,Yes,77.83379
Race,Asian,White,3490,14.33%,274.0,243.0,88.69%,-19.97%,1.291,0.0,No,
Race,Native American,White,3490,14.33%,17.0,5.0,29.41%,39.30%,0.428,0.001,Yes,6.680946
Race,White,,3490,14.33%,2918.0,2005.0,68.71%,,,,,
Ethnicity,Hispanic,Non-Hispanic,3478,14.63%,386.0,183.0,47.41%,22.84%,0.675,0.0,Yes,88.148771
Ethnicity,Non-Hispanic,,3478,14.63%,3092.0,2172.0,70.25%,,,,,
Sex,Female,Male,2231,45.24%,873.0,529.0,60.60%,4.13%,0.936,0.054,No,
Sex,Male,,2231,45.24%,1358.0,879.0,64.73%,,,,,


## Adverse Impact Ratio by Quantile

In [12]:
airq = sd.adverse_impact_ratio_by_quantile(
    **common_info_for_testing,
    outcome=df.loc[~train, 'predictions'],
    air_threshold=0.8,
    percent_difference_threshold=0.0,
    quantiles=[decile / 10 for decile in range(1, 11)],
    lower_score_favorable=False,
)
airq.plot()

## Standardized Mean Difference (SMD)

In [13]:
smd = sd.standardized_mean_difference(
    **common_info_for_testing,
    outcome=df.loc[~train, 'predictions'],
    smd_threshold=-30,
    lower_score_favorable=False,
)
smd

## Disparity Calculation: SMD

\* Percent Missing: Ethnicity: 14.63%, Race: 14.33%, Sex: 45.24%

## SMD Summary Table

Group Category,Group,Reference Group,Observations,Percent Missing,Total,Average Outcome,Std. Dev. of Outcomes,SMD,P-Values,Practically Significant
Race,Black,White,3490,14.33%,269.0,0.82,0.11,-77.852,0.0,Yes
Race,Asian,White,3490,14.33%,274.0,0.96,0.11,47.864,0.0,No
Race,Native American,White,3490,14.33%,17.0,0.83,0.11,-73.438,0.002,Yes
Race,White,,3490,14.33%,2918.0,0.91,0.11,,,
Ethnicity,Hispanic,Non-Hispanic,3478,14.63%,386.0,0.85,0.11,-52.513,0.0,Yes
Ethnicity,Non-Hispanic,,3478,14.63%,3092.0,0.91,0.11,,,
Sex,Female,Male,2231,45.24%,873.0,0.89,0.11,-8.156,0.076,No
Sex,Male,,2231,45.24%,1358.0,0.89,0.11,,,


## Residual Standardized Mean Difference

In [14]:
rsmd = sd.residual_standardized_mean_difference(
    **common_info_for_testing,
    prediction=df.loc[~train, 'predictions'],
    label=df.loc[~train, label],
    residual_smd_threshold=30,
    lower_score_favorable=True,
)
display(rsmd.plot())
sd.ui.show(rsmd.summary_table)

Group Category,Group,Reference Group,Observations,Percent Missing,Total,Average Prediction,Average Label,Average Residual,Std. Dev. of Residuals,Residual SMD,P-Values,Practically Significant
Race,Black,White,3490,14.33%,269.0,0.822413,0.79,-0.034309,0.270416,-12.121454,0.068,No
Race,Asian,White,3490,14.33%,274.0,0.956702,0.97,0.010451,0.270416,4.430784,0.472,No
Race,Native American,White,3490,14.33%,17.0,0.827128,0.71,-0.121246,0.270416,-44.270752,0.071,No
Race,White,,3490,14.33%,2918.0,0.905574,0.9,-0.001531,0.270416,,,
Ethnicity,Hispanic,Non-Hispanic,3478,14.63%,386.0,0.852617,0.77,-0.080596,0.270416,-31.869451,0.0,No
Ethnicity,Non-Hispanic,,3478,14.63%,3092.0,0.908711,0.91,0.005584,0.270416,,,
Sex,Female,Male,2231,45.24%,873.0,0.885954,0.87,-0.016538,0.270416,-5.3108,0.26,No
Sex,Male,,2231,45.24%,1358.0,0.894666,0.89,-0.002177,0.270416,,,


In [15]:
alternative_params = baseline_params.copy()
alternative_params['max_depth'] = 4
alternative_params['n_estimators'] = 180
build_model(params=alternative_params, prediction_name='alt_predictions')

df['Alternative Gets Offer'] = (df['alt_predictions'] > cutoff).astype(int)

pd.crosstab(df['Gets Offer'], df['Alternative Gets Offer'])


************************
**** Model ROC-AUC: ****
Training:          0.880
Validation:        0.851
Percent Change:   -3.24%
************************


Alternative Gets Offer,0,1
Gets Offer,Unnamed: 1_level_1,Unnamed: 2_level_1
0,5915,321
1,56,13708


In [16]:
alt_air = sd.adverse_impact_ratio(
    **common_info_for_testing,
    outcome=df.loc[~train, 'Alternative Gets Offer'],
    air_threshold=0.8,
    percent_difference_threshold=0.0,
).summary_table

alt_air.set_index(keys=['Group Category', 'Group', 'Reference Group'], inplace=True)
baseline_air = air.summary_table.copy().set_index(keys=['Group Category', 'Group', 'Reference Group'])
air_comparison = pd.concat(
    objs=(baseline_air['AIR'], alt_air['AIR']),
    keys=['Baseline', 'Alternative'],
    axis=1
)
air_comparison['Difference'] = air_comparison['Alternative'] - air_comparison['Baseline']
air_comparison.loc[air_comparison['Baseline'].notna(), :]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Baseline,Alternative,Difference
Group Category,Group,Reference Group,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Race,Black,White,0.578898,0.5726,-0.006299
Race,Asian,White,1.290704,1.270044,-0.020659
Race,Native American,White,0.428048,0.41947,-0.008578
Ethnicity,Hispanic,Non-Hispanic,0.674906,0.701452,0.026546
Sex,Female,Male,0.936165,0.941293,0.005128
