In [1]:
import altair as alt
import itertools
import pandas as pd

from fairness_utils import *
from latextable import draw_latex
from texttable import Texttable
from scorer import *

FILENAME = 'data/compas_predictions.csv'
PREDICTOR_TYPES = ['unconstrained','calibrated','calibrated_race','thresholdless_EO','thresholdless_EO_pos_class','thresholdless_EO_neg_class']
RACES = ['all','African-American','Caucasian']

In [2]:
df = pd.read_csv(FILENAME)

In [4]:
# Fairness and accuracy criteria
table = Texttable()
table.set_deco(Texttable.HEADER)
table.set_cols_dtype([
    't',
    't',
    'f',
    'f',
    'f',
    'f',
    'f',
])
table.add_row(['Predictor','Race','Brier Score','Log Loss','Calibration Gap','Separation Gap (Negative Class)','Separation Gap (Positive Class)'])
brier_accuracy_scorer = BrierScorer()
log_loss_scorer = LogScorer()
for predictor in PREDICTOR_TYPES:
    brier_scores = brier_accuracy_scorer.score_many(df['two_year_recid'],df[predictor],df['race'])
    log_losses = log_loss_scorer.score_many(df['two_year_recid'],df[predictor],df['race'])
    calibration_gaps = get_sufficiency_gap_quantiles(df['two_year_recid'],df[predictor],df['race'])
    sep_neg = get_separation_gap(df['two_year_recid'],df[predictor],df['race'],0)
    sep_pos = get_separation_gap(df['two_year_recid'],df[predictor],df['race'],1)
    for race in RACES:
        if race != 'all':
            table.add_row([predictor,race,brier_scores[race],log_losses[race],calibration_gaps[race],
                       sep_neg[race],sep_pos[race]])
        else:
            table.add_row([predictor,race,brier_scores[race],log_losses[race],float('nan'),float('nan'),float('nan')])
print(table.draw().replace('nan','N/A'))

Predictor   Race        Brier       Log Loss   Calibrati   Separatio   Separatio
                        Score                  on Gap      n Gap       n Gap    
                                                           (Negative   (Positive
                                                           Class)      Class)   
unconstra   all         0.366       0.607      N/A         N/A         N/A      
ined                                                                            
unconstra   African-    0.362       0.610      0.149       0.040       0.034    
ined        American                                                            
unconstra   Caucasian   0.373       0.603      0.226       -0.042      -0.071   
ined                                                                            
calibrate   all         0.363       0.607      N/A         N/A         N/A      
d                                                                               
calibrate   African-    0.35

In [5]:
# for latex :~)
print(draw_latex(table).replace('nan','N/A'))

\begin{table}
	\begin{center}
		\begin{tabular}{lllllll}
			 \\
			\hline
			Predictor & Race & Brier Score & Log Loss & Calibration Gap & Separation Gap (Negative Class) & Separation Gap (Positive Class) \\
			unconstrained & all & 0.366 & 0.607 & N/A & N/A & N/A \\
			unconstrained & African-American & 0.362 & 0.610 & 0.149 & 0.040 & 0.034 \\
			unconstrained & Caucasian & 0.373 & 0.603 & 0.226 & -0.042 & -0.071 \\
			calibrated & all & 0.363 & 0.607 & N/A & N/A & N/A \\
			calibrated & African-American & 0.358 & 0.610 & 0.145 & 0.039 & 0.034 \\
			calibrated & Caucasian & 0.369 & 0.602 & 0.228 & -0.042 & -0.069 \\
			calibrated_race & all & 0.378 & 0.610 & N/A & N/A & N/A \\
			calibrated_race & African-American & 0.381 & 0.615 & 0.090 & 0.041 & 0.046 \\
			calibrated_race & Caucasian & 0.374 & 0.603 & 0.119 & -0.043 & -0.095 \\
			thresholdless_EO & all & 0.355 & 0.619 & N/A & N/A & N/A \\
			thresholdless_EO & African-American & 0.350 & 0.629 & 0.153 & 0.000 & -0.000 \\
			thresho

In [6]:
# Add scores from the beta predictor
rows=[]
for a,b in itertools.product(range(1,20),repeat=2):
    scores = {'a' : a , 'b' : b}
    for predictor in PREDICTOR_TYPES:
        scorer = BetaScorer(a,b)
        results = scorer.score_many(df['two_year_recid'],df[predictor],df['race'])
        for race in RACES:
            scores[predictor + '_' + race] = results[race]
    rows.append(scores)

for t in [p for p in PREDICTOR_TYPES if p.startswith('thresholdless')]:
    prefix = t.replace("thresholdless_","")
    for race in RACES:
        scores_df['cal_%s_pdiff_%s' % (prefix,race)] = scores_df[['calibrated_race_' + race,t+'_'+race]].pct_change(axis=1)[t+'_'+race]


NameError: name 'scores_df' is not defined

In [None]:
# Plot percentage difference by a and b
charts = []
for t in [p for p in PREDICTOR_TYPES if p.startswith('thresholdless')]:
    prefix = t.replace("thresholdless_","")
    for race in RACES:
        chart = alt.Chart(scores_df).mark_rect().encode(
            x='b:O',
            y='a:O',
            color=alt.Color('cal_%s_pdiff_%s' % (prefix,race),title='% Diff EO Calibrated')
        ).properties(title='%s %s' % (prefix,race))
        charts.append(chart)
alt.concat(*charts,columns=3).properties(title=alt.Title('Beta scores',subtitle='Parameterized s₀=∫₀ᵖ tᵃ(1-t)ᵇ⁻¹ dt, s₁=∫ₚ¹tᵃ⁻¹(1-t)ᵇ dt'))
