# COMPAS - Bias
This notebook computes the racial bias of COMPAS decile score using different metrics.

In [1]:
import pandas as pd
from fairscoring.metrics import bias_metric_pe, bias_metric_eo, bias_metric_cal, \
    WassersteinMetric, CalibrationMetric
from fairscoring.metrics.roc import bias_metric_roc, bias_metric_xroc

from tqdm.notebook import tqdm

## Setting
### Load COMPAS data

In [2]:
dataURL = 'https://raw.githubusercontent.com/propublica/compas-analysis/master/compas-scores-two-years.csv'
df = pd.read_csv(dataURL)

df.rename(columns=dict((column_name, column_name.lower()) for column_name in df.columns),
          inplace=True)

score_column = 'decile_score'
target_column = 'two_year_recid'
protected_attribute_column = 'race'

# Get Columns
scores = df[score_column]
target = df[target_column]
attribute = df[protected_attribute_column]

# Groups to compare
groups = ['African-American', 'Caucasian']
# groups = ['African-American', None]    # None = all others

favorable_target = 0

### List of bias metrics

In [3]:
metrics = [
    bias_metric_eo,     # Standardized Equal Opportunity
    bias_metric_pe,     # Standardized Predictive Equality
    bias_metric_cal,    # Standardized Calibration Equality
    bias_metric_roc,    # ROC-Bias
    bias_metric_xroc,   # xROC-Bias
    WassersteinMetric(fairness_type="EO",name="Equal Opportunity (U)", score_transform="rescale"),
    WassersteinMetric(fairness_type="PE",name="Predictive Equality (U)", score_transform="rescale"),
    CalibrationMetric(weighting="scores",name="Calibration (U)", score_transform="rescale"),
]

## Bias Measures
### Compute Bias Table
Compute all bias metrics for the dataset

In [4]:
results = []
for metric in tqdm(metrics):
    # Compute bias
    bias = metric.bias(
        scores, target, attribute,
        groups=groups,
        favorable_target=favorable_target,
        min_score=1, max_score=10,
        n_permute=1000, seed=2579,
        prefer_high_scores=False
    )

    # Store result
    results.append((metric, bias))

  0%|          | 0/8 [00:00<?, ?it/s]

  fraction_of_positives = np.where(nonzero, bin_true / bin_total, np.nan)
  mean_predicted_value = np.where(nonzero, bin_sums / bin_total, np.nan)
  fraction_of_positives = np.where(nonzero, bin_true / bin_total, np.nan)
  mean_predicted_value = np.where(nonzero, bin_sums / bin_total, np.nan)


### Result Table
This corresponds to Tab. 1 and Tab. C1 in the publication.

In [5]:
results = [[
    metric.name,
    f"{bias.bias:.3f}",
    f"{bias.pos_component:.0%}",
    f"{bias.neg_component:.0%}",
    f"{bias.p_value:.2f}" ] for metric, bias in results
]

df = pd.DataFrame(results, columns=["metric", "total", "pos", "neg", "p-value"])
df.set_index("metric", inplace=True)
df

Unnamed: 0_level_0,total,pos,neg,p-value
metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Equal Opportunity,0.161,0%,100%,0.0
Predictive Equality,0.154,0%,100%,0.0
Calibration,0.034,79%,21%,0.3
ROC bias,0.016,46%,54%,0.31
xROC bias,0.273,0%,100%,0.0
Equal Opportunity (U),0.152,0%,100%,0.0
Predictive Equality (U),0.163,0%,100%,0.0
Calibration (U),0.037,78%,22%,0.23
