In [1]:
import altair as alt
import itertools
import pandas as pd

from fairness_utils import *
from latextable import draw_latex
from texttable import Texttable
from scorer import *

FILENAME = 'data/compas_predictions.csv'
PREDICTOR_TYPES = ['unconstrained','calibrated','calibrated_race','thresholdless_EO','thresholdless_EO_pos_class','thresholdless_EO_neg_class']
RACES = ['all','African-American','Caucasian']

In [2]:
df = pd.read_csv(FILENAME)

In [3]:
# Fairness and accuracy criteria
table = Texttable()
table.set_deco(Texttable.HEADER)
table.set_cols_dtype([
    't',
    't',
    'f',
    'f',
    'f',
    'f',
    'f',
])
table.add_row(['Predictor','Race','Brier Score','Log Loss','Calibration Gap','Separation Gap (Negative Class)','Separation Gap (Positive Class)'])
brier_accuracy_scorer = BrierScorer()
log_loss_scorer = LogScorer()
for predictor in PREDICTOR_TYPES:
    brier_scores = brier_accuracy_scorer.score_many(df['two_year_recid'],df[predictor],df['race'])
    log_losses = log_loss_scorer.score_many(df['two_year_recid'],df[predictor],df['race'])
    calibration_gaps = get_sufficiency_gap_quantiles(df['two_year_recid'],df[predictor],df['race'])
    sep_neg = get_separation_gap(df['two_year_recid'],df[predictor],df['race'],0)
    sep_pos = get_separation_gap(df['two_year_recid'],df[predictor],df['race'],1)
    for race in RACES:
        if race != 'all':
            table.add_row([predictor,race,brier_scores[race],log_losses[race],calibration_gaps[race],
                       sep_neg[race],sep_pos[race]])
        else:
            table.add_row([predictor,race,brier_scores[race],log_losses[race],float('nan'),float('nan'),float('nan')])
print(table.draw().replace('nan','N/A'))

Predictor   Race        Brier       Log Loss   Calibrati   Separatio   Separatio
                        Score                  on Gap      n Gap       n Gap    
                                                           (Negative   (Positive
                                                           Class)      Class)   
unconstra   all         0.210       0.607      N/A         N/A         N/A      
ined                                                                            
unconstra   African-    0.211       0.610      0.149       0.040       0.034    
ined        American                                                            
unconstra   Caucasian   0.207       0.603      0.226       -0.042      -0.071   
ined                                                                            
calibrate   all         0.210       0.607      N/A         N/A         N/A      
d                                                                               
calibrate   African-    0.21

In [4]:
# for latex :~)
print(draw_latex(table).replace('nan','N/A').replace(
    'calibrated_race','GC').replace('thresholdless_','').replace(
        '_',' ').replace('all','A').replace('African-American','B').replace(
            'Caucasian','W').replace('calibrated','Calibrated').replace(
                'unconstrained','Unconstrained'))

\begin{table}
	\begin{center}
		\begin{tabular}{lllllll}
			 \\
			\hline
			Predictor & Race & Brier Score & Log Loss & Calibration Gap & Separation Gap (Negative Class) & Separation Gap (Positive Class) \\
			Unconstrained & A & 0.210 & 0.607 & N/A & N/A & N/A \\
			Unconstrained & B & 0.211 & 0.610 & 0.149 & 0.040 & 0.034 \\
			Unconstrained & W & 0.207 & 0.603 & 0.226 & -0.042 & -0.071 \\
			Calibrated & A & 0.210 & 0.607 & N/A & N/A & N/A \\
			Calibrated & B & 0.212 & 0.610 & 0.145 & 0.039 & 0.034 \\
			Calibrated & W & 0.207 & 0.602 & 0.228 & -0.042 & -0.069 \\
			GC & A & 0.211 & 0.610 & N/A & N/A & N/A \\
			GC & B & 0.213 & 0.615 & 0.090 & 0.041 & 0.046 \\
			GC & W & 0.207 & 0.603 & 0.119 & -0.043 & -0.095 \\
			EO & A & 0.215 & 0.619 & N/A & N/A & N/A \\
			EO & B & 0.220 & 0.629 & 0.153 & 0.000 & -0.000 \\
			EO & W & 0.208 & 0.603 & 0.288 & -0.000 & 0.000 \\
			EO pos class & A & 0.216 & 0.622 & N/A & N/A & N/A \\
			EO pos class & B & 0.236 & 0.666 & 0.154 & 0.040 & -0.0

In [42]:
# Add scores from the beta predictor
rows=[]
SELECT_PREDICTORS = ['calibrated_race','thresholdless_EO']
CLASSES = [([0,1],'both'),([0],'neg'),([1],'pos')]
RACES_GROUPS = [(['African-American','Caucasian'],'all'),(['African-American'],'African American'),(['Caucasian'],'Caucasian')]
for a,b in itertools.product(range(1,20),repeat=2):
    scores = {'a' : a , 'b' : b, 'c': a/(a+b), 'd':a+b}
    for predictor in SELECT_PREDICTORS:
        scorer = BetaScorer(a,b)
        df[predictor + '_score'] = df.apply(lambda row: scorer.score(row['two_year_recid'],row[predictor]),axis=1)
        for race in RACES_GROUPS:
            for c in CLASSES:
                scores[predictor + '_' + race[1] + '_' + c[1]] = np.mean(df[(df['two_year_recid'].isin(c[0])) & (df['race'].isin(race[0]))][predictor+'_score'])
    rows.append(scores)

scores_df = pd.DataFrame.from_records(rows)
for race in RACES_GROUPS:
    for c in CLASSES:
        names = (race[1],c[1])
        scores_df['cal_EO_pdiff_%s_%s' % names] = scores_df[['calibrated_race_%s_%s' % names, 'thresholdless_EO_%s_%s' % names]].pct_change(axis=1)['thresholdless_EO_%s_%s'%names]


In [43]:
scores_df

Unnamed: 0,a,b,c,d,calibrated_race_all_both,calibrated_race_all_neg,calibrated_race_all_pos,calibrated_race_African American_both,calibrated_race_African American_neg,calibrated_race_African American_pos,...,thresholdless_EO_Caucasian_pos,cal_EO_pdiff_all_both,cal_EO_pdiff_all_neg,cal_EO_pdiff_all_pos,cal_EO_pdiff_African American_both,cal_EO_pdiff_African American_neg,cal_EO_pdiff_African American_pos,cal_EO_pdiff_Caucasian_both,cal_EO_pdiff_Caucasian_neg,cal_EO_pdiff_Caucasian_pos
0,1,1,0.500000,2,1.053632e-01,8.258124e-02,1.302809e-01,1.065453e-01,1.015505e-01,1.107031e-01,...,1.263278e-01,0.019408,0.090758,-0.030059,0.031711,-0.112243,0.141637,0.001201,0.437996,-0.260415
1,1,2,0.333333,3,5.349679e-02,5.401435e-02,5.293070e-02,5.222916e-02,6.306812e-02,4.320636e-02,...,4.934364e-02,0.019522,0.097274,-0.067260,0.031615,-0.060027,0.142970,0.003084,0.332445,-0.324626
2,1,3,0.250000,4,3.213316e-02,3.806931e-02,2.564051e-02,3.059705e-02,4.290266e-02,2.035335e-02,...,2.296068e-02,0.017066,0.091407,-0.103658,0.027051,-0.031452,0.129706,0.004259,0.259919,-0.372410
3,1,4,0.200000,5,2.131785e-02,2.814707e-02,1.384839e-02,1.993411e-02,3.091585e-02,1.079244e-02,...,1.191203e-02,0.013879,0.082406,-0.138461,0.021220,-0.014474,0.106335,0.004840,0.207558,-0.409551
4,1,5,0.166667,6,1.511209e-02,2.155084e-02,8.069700e-03,1.394268e-02,2.321832e-02,6.221236e-03,...,6.672304e-03,0.010838,0.073132,-0.171122,0.015663,-0.003898,0.076437,0.005070,0.168389,-0.439124
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
356,19,15,0.558824,34,9.907955e-12,4.800552e-12,1.549418e-11,1.035296e-11,7.182694e-12,1.299203e-11,...,1.549470e-11,0.039114,0.153834,0.000239,0.060215,-0.228299,0.192994,0.005176,1.417702,-0.250519
357,19,16,0.542857,35,4.422684e-12,2.308564e-12,6.735004e-12,4.592514e-12,3.400470e-12,5.584822e-12,...,6.704133e-12,0.041018,0.162081,-0.004370,0.062429,-0.210576,0.200803,0.007133,1.316174,-0.264576
358,19,17,0.527778,36,2.040039e-12,1.142753e-12,3.021446e-12,2.104859e-12,1.657295e-12,2.477430e-12,...,2.991722e-12,0.042382,0.167833,-0.009514,0.064217,-0.194299,0.208177,0.008390,1.221700,-0.278689
359,19,18,0.513514,37,9.680357e-13,5.804928e-13,1.391911e-12,9.911844e-13,8.290373e-13,1.126162e-12,...,1.372389e-12,0.044637,0.171568,-0.013262,0.068305,-0.179302,0.220042,0.008502,1.134392,-0.293326


In [None]:
    #     results = scorer.score_many(df['two_year_recid'],df[predictor],df['race'])
    #     for race in RACES:
    #         scores[predictor + '_' + race] = results[race]
    #     calibration_component, refinement_component = scorer.score_components(df['two_year_recid'],df[predictor])
    #     scores[predictor + '_S1'] = calibration_component
    #     scores[predictor + '_S2'] = refinement_component
    # rows.append(scores)

In [46]:
# Plot percentage difference by a and b
charts = []
classes = ['all','pos','neg']
for c in CLASSES:
    for race in RACES_GROUPS:
        names = (race[1],c[1])
        chart = alt.Chart(scores_df).mark_rect().encode(
            x='b:O',
            y='a:O',
            color=alt.Color('cal_EO_pdiff_%s_%s' % names,title="Calibration EO % Diff")
        ).properties(title='%s %s' % names)
        charts.append(chart)
alt.concat(*charts,columns=3).properties(title=alt.Title('Beta scores',subtitle='Parameterized s₀=∫₀ᵖ tᵃ(1-t)ᵇ⁻¹ dt, s₁=∫ₚ¹tᵃ⁻¹(1-t)ᵇ dt'))


In [52]:
charts = []
for cl in CLASSES:
    for race in RACES_GROUPS:
        name = race[1],cl[1]
        chart = alt.Chart(scores_df).mark_circle().encode(
            x='d',
            y='cal_EO_pdiff_%s_%s:Q' % name,
            color=alt.Color('c')
        ).properties(title='%s %s' % name)
        charts.append(chart)
alt.concat(*charts,columns=3).properties(title=alt.Title('Beta scores',subtitle='Parameterized s₀=∫₀ᵖ tᵃ(1-t)ᵇ⁻¹ dt, s₁=∫ₚ¹tᵃ⁻¹(1-t)ᵇ dt'))

In [79]:
scores_df['d_bins'] = pd.qcut(scores_df['d'],q=10)
scores_df.d_bins = scores_df.d_bins.astype('string')
charts = []
for cl in CLASSES:
    for race in RACES_GROUPS:
        name = race[1],cl[1]
        chart = alt.Chart(scores_df).mark_circle().encode(
            x='c',
            y='cal_EO_pdiff_%s_%s:Q' % name,
            color=alt.Color('d_bins:O')
        ).properties(title='%s %s' % name)
        charts.append(chart + chart.transform_loess('c', 'cal_EO_pdiff_%s_%s' % name, groupby=['d_bins']).mark_line(size=2))
alt.concat(*charts,columns=3).properties(title=alt.Title('Beta scores',subtitle='Parameterized s₀=∫₀ᵖ tᵃ(1-t)ᵇ⁻¹ dt, s₁=∫ₚ¹tᵃ⁻¹(1-t)ᵇ dt'))
