In [None]:
# required modules (skip if already installed)
if False:
    !pip install pyroaring
    !pip install pyfim 
    # if previous does not work, try: !conda install -c conda-forge pyfim
    !pip install lightgbm
    !pip install fairlearn

In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

# gloabl imports
import sys
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

# local imports
sys.path.append('../src/') # local path
import dd

# general settings  
plt.style.use('seaborn-whitegrid')
plt.rc('font', size=10)
plt.rc('legend', fontsize=10)
plt.rc('lines', linewidth=2)
plt.rc('axes', linewidth=2)
plt.rc('axes', edgecolor='k')
plt.rc('xtick.major', width=2)
plt.rc('xtick.major', size=6)
plt.rc('ytick.major', width=2)
plt.rc('ytick.major', size=6)
plt.rc('pdf', fonttype=42)
plt.rc('ps', fonttype=42)

In [None]:
# read folkstable data
census = pd.read_pickle('../data/ACSIncome.pkl') 
states = list(census['STATE'].unique())
# change to True to binarize RAC1P
if False: 
    census["RAC1P"][census["RAC1P"]!='White alone'] = 'Not White alone'

In [None]:
# distributions
census["WKHPgroup"].value_counts().plot(kind='bar')
plt.show()
census["RAC1P"].value_counts().plot(kind='bar')
plt.show()
census["STATE"].value_counts().plot(kind='bar')
plt.show()

In [None]:
census

In [None]:
# predictive attributes (for models)
pred_atts = ['WKHP', 'RAC1P', 'STATE']
pred_all = pred_atts + ['class']
# discretized attributes (for DD)
disc_atts = ['WKHPgroup', 'RAC1P', 'STATE']
disc_all = disc_atts + ['class']
# encode categorical values
df, encoders = dd.encode(census)
# link census to df
census['STATE2'] = df['STATE'].copy(deep=True)
df.head()

In [None]:
# 0 = negative, 1 = positive
encoders['class'].classes_

In [None]:
# pretty printing long labels
pretty_rac1p = ['Alaska', 'Indian', 'Alaska-Indian', 'Asian', 'Black', 'Hawaiian', 'Other', 'Two+', 'White']
rac1p_2_pretty = { encoders['RAC1P'].classes_[i]:pretty_rac1p[i] for i in range(len(pretty_rac1p)) }
rac1p_2_pretty

In [None]:
# split train test
X = df[pred_atts]
y = df['class'].astype(int)
X_train, X_test, y_train, y_test, census_train, census_test = train_test_split(X, y, census, test_size=0.33, random_state=42)

In [None]:
# training model and make predictions - replace with your favorite classifier
import lightgbm as lgb

clf = lgb.LGBMClassifier(random_state=42)
clf.fit(X_train, y_train)
# add predicted class in the adult_test (decoding back)
y_pred_b = clf.predict(X_test)
census_test['pred_b'] = encoders['class'].inverse_transform(y_pred_b)
# add predicted score in the adult_test
census_test['score_b'] = clf.predict_proba(X_test)[:,1]
census_test[disc_all].head()

In [None]:
'''
     contingency table for separation
          protected                                   unprotected
     ========= pred.bad  ==  pred.good  ===   ====  pred.bad  ==  pred.good  === 
     true.bad    TPp          FNp      Pp()           TPu           FNu      Pu()
     true.good   FPp          TNp      Np()           FPu           TNu      Nu()
     ==========   a     =====  b  ===  n1()   ====     c    ====     d  ===  n2()
'''
# Accuracy
def acc(ctg):
    return (ctg.TPp+ctg.TPu+ctg.TNp+ctg.TNu)/ctg.n()

# Equality of opportuniy - FairLearn version is
# P(pred.good|true.good) - P(pred.good|protected,true.good)
def eop_mean(ctg, disc):
    # at least 20 protected and some protected/unprotected negatives
    if ctg.a < 20 or ctg.Np()==0 or ctg.Nu()==0:
        return None
    # compute P(pred.good|true.good)
    trueGood = len(disc.itDB.cover(list(ctg.ctx)+[disc.trueGood]))
    predtrueGood =  len(disc.itDB.cover(list(ctg.ctx)+[disc.trueGood, disc.predGood]))
    #print(predtrueGood/trueGood)
    # end
    return predtrueGood/trueGood - ctg.tnrp()

# To compute P(pred.good|true.good) we need the dd.DD object
metric_b = lambda ctg: eop_mean(ctg, disc_b)

In [None]:
# discrimination in overall test set
disc_b = dd.DD(census_test[disc_all+['pred_b']], unprotectedItem='RAC1P=White alone', 
               predBadItem='pred_b=False', trueBadItem='class=False',) 

In [None]:
# all protected vs unprotected
ctg = disc_b.ctg_any()
disc_b.print(ctg)
print("Metric = {:f}".format(metric_b(ctg)))
print("ACC = {:f}".format(acc(ctg)))

In [None]:
# each protected vs unprotected
for ctg in disc_b.ctg_global():
    disc_b.print(ctg)
    print("Metric = {:f}".format(metric_b(ctg)))
    print("ACC = {:f}".format(acc(ctg)))

In [None]:
# check with the Fairlearn metrics
from fairlearn.reductions import TruePositiveRateParity # EOP is TPR given the coding of classes 
from fairlearn.metrics import MetricFrame
from fairlearn.metrics import true_positive_rate

summary_b = MetricFrame(metrics=true_positive_rate,
                          y_true=y_test,
                          y_pred=y_pred_b,
                          sensitive_features=X_test['RAC1P'])
summary_b.overall-summary_b.by_group

In [None]:
# Fairlearn algorithms and utils (https://github.com/fairlearn/fairlearn)
from fairlearn.postprocessing import ThresholdOptimizer

# fairness by post-processing
postprocess_est = ThresholdOptimizer(estimator=clf, constraints="true_positive_rate_parity", prefit=True, predict_method='predict')
#X_train = X_train.fillna(0) # fairlearn does not manage missing values
#X_test = X_test.fillna(0) # fairlearn does not manage missing values
postprocess_est.fit(X_train, y_train, sensitive_features=X_train['RAC1P'])
# fair-corrected predictions 
y_pred_a = postprocess_est.predict(X_test, sensitive_features=X_test['RAC1P'], random_state=42).astype(int)
census_test['pred_a'] = encoders['class'].inverse_transform(y_pred_a)

In [None]:
from fairlearn.postprocessing import plot_threshold_optimizer

plot_threshold_optimizer(postprocess_est)

In [None]:
# To compute P(pred.good|true.good) we need the dd.DD object
metric_a = lambda ctg: eop_mean(ctg, disc_a)

In [None]:
# discrimination in overall test set after correction
# DD(filename or dataframe, unprotected item, negative decision, negative truth)
disc_a = dd.DD(census_test[disc_all+['pred_a']], unprotectedItem='RAC1P=White alone', 
               predBadItem='pred_a=False', trueBadItem='class=False', 
               codes=disc_b.codes) # extends the coding of items as disc_b 

In [None]:
# all protected vs unprotected
ctg = disc_a.ctg_any()
disc_a.print(ctg)
print("Metric = {:f}".format(metric_a(ctg)))
print("ACC = {:f}".format(acc(ctg)))

In [None]:
# each protected vs unprotected
for ctg in disc_a.ctg_global():
    disc_a.print(ctg)
    print("Metric = {:f}".format(metric_a(ctg)))
    print("ACC = {:f}".format(acc(ctg)))

In [None]:
# check with the Fairlearn metrics
summary_a = MetricFrame(metrics=true_positive_rate,
                          y_true=y_test,
                          y_pred=y_pred_a,
                          sensitive_features=X_test['RAC1P'])
summary_a.overall-summary_a.by_group

In [None]:
X_axis = np.arange(len(pretty_rac1p))
plt.bar(X_axis - 0.2, summary_b.overall-summary_b.by_group, 0.4, label='EOP before')
plt.bar(X_axis + 0.2, summary_a.overall-summary_a.by_group, 0.4, label='EOP after')
plt.xticks(X_axis, pretty_rac1p)
plt.xticks(rotation=90)
plt.legend()
plt.ylabel("EOP", fontweight="bold")
plt.savefig('fig0.pdf', bbox_inches='tight', dpi=400)
plt.show()

In [None]:
# Fairlearn fails if there is a sensitive group without instances in a control group, e.g., no Other race in a STATE
if False:
    summary_ac = MetricFrame(metrics=true_positive_rate,
                          y_true=y_test,
                          y_pred=y_pred_a,
                          sensitive_features=X_test['RAC1P'],
                          control_features=X_test['STATE'])

In [None]:
acs = []
xs = []
ys = []
descs = []
for s in census['STATE'].unique():
    for ctg_b in disc_b.ctg_global(['STATE='+s]):
        x = metric_b(ctg_b)
        # relativize ctg_b to disc_a
        ctg_a = disc_a.ctg_rel(ctg_b) 
        y = metric_a(ctg_a)
        if x is None or y is None:
            continue
        xs.append(x)
        ys.append(y)
        acs.append(acc(ctg_b)-acc(ctg_a))
        descs.append(disc_b.ctg_info(ctg_b))

In [None]:
# development only
if False:
    for i in range(len(xs)):
        if acs[i]>0.07:
            print(i, xs[i], ys[i], ys[i]-xs[i], acs[i], descs[i])

In [None]:
plt.scatter(xs, ys, c =acs, linewidths = .5, marker ="o", cmap="RdYlBu_r", s=20)
plt.xlabel("EOP before", fontweight="bold")
plt.ylabel("EOP after", fontweight="bold")
plt.xlim([-.6, 1])
plt.ylim([-.6, 1])
plt.axline((0, 0), (1, 1), linewidth=1.5, color='r')
plt.axline((0, 0), (0, 1), linewidth=1, color='black')
plt.axline((0, 0), (1, 0), linewidth=1, color='black')
arrow_properties = dict(color='green', arrowstyle="->", connectionstyle="angle3,angleA=90,angleB=0")
id = 2 # 2 0.06819594678602509 0.29230386671639935 0.22410791993037427 ('STATE=AL', 'RAC1P=Asian alone')
plt.annotate("AL, Asian", xy=(xs[id], ys[id]), xycoords='data', xytext=(0.3, 0.6), textcoords='data',
            arrowprops=arrow_properties, horizontalalignment='right', verticalalignment='top')
id = 121 # 121 0.8698534098151689 0.3458799963580079 -0.523973413457161 ('STATE=NJ', 'RAC1P=Some Other Race alone')
plt.annotate("NJ, Other", xy=(xs[id], ys[id]), xycoords='data', xytext=(0.9, 0.6), textcoords='data',
            arrowprops=arrow_properties, horizontalalignment='right', verticalalignment='top')
id = 50 # 50 0.42983565107458915 -0.4400899002668914 -0.8699255513414805 ('STATE=ID', 'RAC1P=Some Other Race alone')
plt.annotate("ID, Other", xy=(xs[id], ys[id]), xycoords='data', xytext=(0.8, -0.3), textcoords='data',
            arrowprops=arrow_properties, horizontalalignment='right', verticalalignment='top')
id = 163 # 163 0.0 -0.5180231240081614 -0.5180231240081614 ('STATE=SD', 'RAC1P=American Indian alone')
plt.annotate("SD, Indian", xy=(xs[id], ys[id]), xycoords='data', xytext=(-0.14, -0.42), textcoords='data',
            arrowprops=arrow_properties, horizontalalignment='right', verticalalignment='top')
id = 80 # 80 -0.1890491901156537 0.21018338849228213 0.3992325786079358 ('STATE=MD', 'RAC1P=Asian alone')
plt.annotate("MD, Asian", xy=(xs[id], ys[id]), xycoords='data', xytext=(-0.2, 0.4), textcoords='data',
            arrowprops=arrow_properties, horizontalalignment='right', verticalalignment='top')
id = 42 # 42 -0.554163655808307 -0.13408966095326186 0.4200739948550451 ('STATE=GA', 'RAC1P=Asian alone')
plt.annotate("GA, Asian", xy=(xs[id], ys[id]), xycoords='data', xytext=(-0.32, -0.26), textcoords='data',
            arrowprops=arrow_properties, horizontalalignment='right', verticalalignment='top')
id = 7 # 7 0.7177777777777777 0.3356349206349207 -0.38214285714285706 0.08146639511201637 ('STATE=AK', 'RAC1P=Alaska Native alone')
plt.annotate("AK, Alaska", xy=(xs[id], ys[id]), xycoords='data', xytext=(0.85, 0.15), textcoords='data',
            arrowprops=arrow_properties, horizontalalignment='right', verticalalignment='top')
plt.colorbar(label="accuracy loss", orientation="vertical", shrink=.7)
plt.savefig('fig1.pdf', bbox_inches='tight', dpi=400)
plt.show()

In [None]:
# Table with distributions of RAC1P by STATE in the training set
ct = pd.crosstab(X_train['STATE'], X_train['RAC1P'])
state_tot = ct.sum(axis=1)
ct = ct.div(ct.sum(axis=1), axis=0)
ct 

In [None]:
# Cluster Table of distributions
from sklearn.cluster import KMeans

def drawSSEPlot(df, column_indices, ret_clus=5, n_clusters=8, max_iter=300, tol=1e-04, init='k-means++', n_init=10, algorithm='auto'):
    inertia_values = []    
    for i in range(1, n_clusters+1):
        km = KMeans(n_clusters=i, max_iter=max_iter, tol=tol, init=init, n_init=n_init, random_state=42, algorithm=algorithm)
        km.fit_predict(df.iloc[:, column_indices])
        inertia_values.append(km.inertia_)
        if i == ret_clus:
            ret = km
    fig, ax = plt.subplots(figsize=(8, 6))
    plt.plot(range(1, n_clusters+1), inertia_values, color='red')
    plt.xlabel('No. of Clusters', fontsize=15)
    plt.ylabel('SSE / Inertia', fontsize=15)
    plt.title('SSE / Inertia vs No. Of Clusters', fontsize=15)
    plt.grid()
    plt.show()
    return ret

# select final number of clusters based on SSE plot
n_clusters = 6
km = drawSSEPlot(ct, range(len(ct.columns)), n_clusters=10, ret_clus=n_clusters)

In [None]:
# assign cluster to STATE
ct['cluster'] = km.labels_
ct['tot'] = state_tot
ct.sort_values(by=['cluster'])
# instances by cluster
ct[['cluster','tot']].groupby(['cluster']).sum(['tot'])

In [None]:
# add cluster to train and test instances
X_train_c = pd.merge(X_train, ct['cluster'], left_on='STATE', right_index=True)
X_test_c = pd.merge(X_test, ct['cluster'], left_on='STATE', right_index=True)
census_test_c = pd.merge(census_test, ct['cluster'], left_on='STATE2', right_index=True)
census_test_c['pred_c'] = 1

In [None]:
# cluster specific ThresholdOptimizer post-processing
for i in range(n_clusters):
    sub = X_train_c['cluster']==i
    postprocess_est = ThresholdOptimizer(estimator=clf, constraints="true_positive_rate_parity", prefit=True, predict_method='predict')
    postprocess_est.fit(X_train.loc[sub, pred_atts], y_train[sub], sensitive_features=X_train.loc[sub, 'RAC1P'])
    sub = X_test_c['cluster']==i
    y_pred_s = postprocess_est.predict(X_test_c.loc[sub, pred_atts], sensitive_features=X_test_c.loc[sub, 'RAC1P'], random_state=42).astype(int)
    sub = census_test_c['cluster']==i
    census_test_c.loc[sub, 'pred_c'] = encoders['class'].inverse_transform(y_pred_s)

In [None]:
census_test_c

In [None]:
# To compute P(pred.good|true.good) we need the dd.DD object
metric_c = lambda ctg: eop_mean(ctg, disc_c)

In [None]:
# discrimination in overall test set
# DD(filename or dataframe, unprotected item, negative decision, negative truth)
disc_c = dd.DD(census_test_c[disc_all+['pred_c']], unprotectedItem='RAC1P=White alone', 
               predBadItem='pred_c=False', trueBadItem='class=False', 
               codes=disc_a.codes) # extends the coding of items as disc_a 
# all protected vs unprotected
ctg = disc_c.ctg_any()
disc_c.print(ctg)
print("Metric = {:f}".format(metric_c(ctg)))

In [None]:
acs = []
xs = []
ys = []
descs = []
for s in census['STATE'].unique():
    for ctg_b in disc_b.ctg_global(['STATE='+s]):
        x = metric_b(ctg_b)
        # relativize ctg_b to disc_c
        ctg_c = disc_c.ctg_rel(ctg_b) 
        y = metric_c(ctg_c)
        if x is None or y is None:
            continue
        xs.append(x)
        ys.append(y)
        acs.append(acc(ctg_b)-acc(ctg_a))
        descs.append(disc_b.ctg_info(ctg_b))

In [None]:
plt.scatter(xs, ys, c =acs, linewidths = .5, marker ="o", cmap="RdYlBu_r", s=20)
plt.xlabel("EOP before", fontweight="bold")
plt.ylabel("EOP after", fontweight="bold")
plt.xlim([-.6, 1])
plt.ylim([-.6, 1])
plt.axline((0, 0), (1, 1), linewidth=1.5, color='r')
plt.axline((0, 0), (0, 1), linewidth=1, color='black')
plt.axline((0, 0), (1, 0), linewidth=1, color='black')
plt.colorbar(label="accuracy loss", orientation="vertical", shrink=.7)
#plt.savefig('fig1.pdf', bbox_inches='tight', dpi=400)
plt.show()

In [None]:
y_pred_c = encoders['class'].transform(census_test_c['pred_c'])
summary_c = MetricFrame(metrics=true_positive_rate,
                          y_true=y_test,
                          y_pred=y_pred_c,
                          sensitive_features=X_test['RAC1P'])
summary_c.overall-summary_c.by_group

In [None]:
X_axis = np.arange(len(pretty_rac1p))
plt.bar(X_axis - 0.45, summary_b.overall-summary_b.by_group, 0.3, label='EOP before')
plt.bar(X_axis - 0.15, summary_a.overall-summary_a.by_group, 0.3, label='EOP after')
plt.bar(X_axis + 0.15, summary_c.overall-summary_c.by_group, 0.3, label='EOP correction')
plt.xticks(X_axis, pretty_rac1p)
plt.xticks(rotation=90)
plt.legend(loc=4)
plt.ylabel("EOP", fontweight="bold")
#plt.savefig('fig0.pdf', bbox_inches='tight', dpi=400)
plt.show()

In [None]:
adult_test_r = adult_test[(adult_test['score']<0.05) | (adult_test['score']>=0.5)]

In [None]:
# Equality of opportuniy - FairLearn
# P(pred.good|true.good) - P(pred.good|protected,true.good)
def metric_r(ctg):
    # at least 20 protected and some protected/unprotected negatives
    if ctg.a < 20 or ctg.Np()==0 or ctg.Nu()==0:
        return None
    # compute P(pred.good|true.good)
    trueGood = len(disc_r.itDB.cover(ctg.ctx+[disc_r.trueGood]))
    predtrueGood =  len(disc_r.itDB.cover(ctg.ctx+[disc_r.trueGood, disc_r.predGood])) # this line changes from metric_b
    print(predtrueGood/trueGood)
    # end
    return predtrueGood/trueGood - ctg.tnrp() 

In [None]:
# discrimination in overall test set
# DD(filename or dataframe, unprotected item, negative decision, negative truth)
disc_r = dd.DD(adult_test_r[all_atts+['pred_b']], unprotectedItem='RAC1P=White alone', 
               predBadItem='pred_b=False', trueBadItem='class=False', na_values={'nan'}, 
               codes=disc_b.codes) # extends the coding of items as disc_a 
# all protected vs unprotected
ctg = disc_r.ctg_any()
disc_r.print(ctg)
print("Metric = {:f}".format(metric_r(ctg)))

In [None]:
acs = []
xs = []
ys = []
descs = []
for s in adult['STATE'].unique():
    for ctg_b in disc_b.ctg_global(['STATE='+s]):
        x = metric_b(ctg_b)
        # relativize ctg_b to disc_r
        ctg_c = disc_r.ctg_rel(ctg_b) 
        y = metric_r(ctg_c)
        if x is None or y is None:
            continue
        xs.append(x)
        ys.append(y)
        acs.append(acc(ctg_b)-acc(ctg_a))
        descs.append(disc_b.ctg_info(ctg_b))

In [None]:
plt.scatter(xs, ys, c =acs, linewidths = .5, marker ="o", cmap="RdYlBu_r", s=20)
plt.xlabel("EOP before", fontweight="bold")
plt.ylabel("EOP after", fontweight="bold")
plt.xlim([-.6, 1])
plt.ylim([-.6, 1])
plt.axline((0, 0), (1, 1), linewidth=1.5, color='r')
plt.axline((0, 0), (0, 1), linewidth=1, color='black')
plt.axline((0, 0), (1, 0), linewidth=1, color='black')
plt.colorbar(label="accuracy loss", orientation="vertical", shrink=.7)
#plt.savefig('fig1.pdf', bbox_inches='tight', dpi=400)
plt.show()

In [None]:
y_pred_bt = clf.predict(X_train)
census_train['pred_bt'] = encoders['class'].inverse_transform(y_pred_bt)
disc_bt = dd.DD(census_train[disc_all+['pred_bt']], unprotectedItem='RAC1P=White alone', 
               predBadItem='pred_bt=False', trueBadItem='class=False',
               codes=disc_b.codes) # extends the coding of items as disc_a 

In [None]:
# To compute P(pred.good|true.good) we need the dd.DD object
metric_bt = lambda ctg: eop_mean(ctg, disc_bt)

In [None]:
# Extract contingency tables: 
ctgs_bt = disc_bt.extract(testCond=metric_bt, minSupp=-50, topk=10000)

In [None]:
# sequential covering algorithm: 10 contingency tables
covers, residuals, times, uncovered, ctg_cov, ctg_uncov = disc_bt.cover_n([ctg for _,ctg in ctgs_bt], metric_bt, 100)
print('Total protected:', sum(residuals)+len(uncovered))
print('Total protected covered:', sum(residuals))
print('% covered: {:.2f}%'.format(100*sum(residuals)/(sum(residuals)+len(uncovered))))
# OR's of covering contexts and any protected
#disc_bt.print(ctg_cov)
#print("Metric = {:f}".format(metric_bt(ctg_cov)))

In [None]:
# Cover contingency tables
sum_rd = cnt_rd = i = 0
for ctg, res in zip(covers, residuals):
    print('-----\nCT', i, 'covered', res)
    i += 1
    disc_bt.print(ctg)
    print("Metric = {:f}".format(metric_bt(ctg)))
    sum_rd += metric_bt(ctg)*ctg.n1()
    cnt_rd += ctg.n1()
print('-----\nAverage metric = {:f}'.format(sum_rd/cnt_rd))

In [None]:
# switch to test set
all_covered = disc_b.itDB.cover_none()
for c in covers:
    all_covered |= disc_b.itDB.cover(c.ctx)

In [None]:
len(all_covered)

In [None]:
# selected = all - covered
sel = list(set(range(len(census_test)))-set(all_covered))
census_test_r = census_test.iloc[sel,:]
print('Coverage', len(sel)/len(census_test))

In [None]:
disc_r = dd.DD(census_test_r[disc_all+['pred_b']], unprotectedItem='RAC1P=White alone', 
               predBadItem='pred_b=False', trueBadItem='class=False', 
               codes=disc_b.codes) # extends the coding of items as disc_a 

In [None]:
# To compute P(pred.good|true.good) we need the dd.DD object
metric_r = lambda ctg: eop_mean(ctg, disc_r)

In [None]:
# each protected vs unprotected
for ctg in disc_r.ctg_global():
    disc_r.print(ctg)
    print("Metric = {:f}".format(metric_r(ctg)))
    print("ACC = {:f}".format(acc(ctg)))

In [None]:
acs = []
xs = []
ys = []
descs = []
for s in census['STATE'].unique():
    for ctg_b in disc_b.ctg_global(['STATE='+s]):
        x = metric_b(ctg_b)
        # relativize ctg_b to disc_r
        try:
            ctg_c = disc_r.ctg_rel(ctg_b)
        except:
            continue
        y = metric_r(ctg_c)
        if x is None or y is None:
            continue
        xs.append(x)
        ys.append(y)
        acs.append(acc(ctg_b)-acc(ctg_a))
        descs.append(disc_b.ctg_info(ctg_b))

In [None]:
plt.scatter(xs, ys, c =acs, linewidths = .5, marker ="o", cmap="RdYlBu_r", s=20)
plt.xlabel("EOP before", fontweight="bold")
plt.ylabel("EOP after", fontweight="bold")
plt.xlim([-.6, 1])
plt.ylim([-.6, 1])
plt.axline((0, 0), (1, 1), linewidth=1.5, color='r')
plt.axline((0, 0), (0, 1), linewidth=1, color='black')
plt.axline((0, 0), (1, 0), linewidth=1, color='black')
plt.colorbar(label="accuracy loss", orientation="vertical", shrink=.7)
#plt.savefig('fig1.pdf', bbox_inches='tight', dpi=400)
plt.show()

In [None]:
y_pred_r = encoders['class'].transform(census_test_r['pred_b'])
y_test_r = encoders['class'].transform(census_test_r['class'])
X_test_r = X_test.iloc[sel,:]
summary_r = MetricFrame(metrics=true_positive_rate,
                          y_true=y_test_r,
                          y_pred=y_pred_r,
                          sensitive_features=X_test_r['RAC1P'])
summary_r.overall-summary_r.by_group

In [None]:
X_axis = np.arange(len(pretty_rac1p))
plt.bar(X_axis - 0.45, summary_b.overall-summary_b.by_group, 0.3, label='EOP before')
plt.bar(X_axis - 0.15, summary_a.overall-summary_a.by_group, 0.3, label='EOP after')
plt.bar(X_axis + 0.15, summary_r.overall-summary_r.by_group, 0.3, label='EOP correction')
plt.xticks(X_axis, pretty_rac1p)
plt.xticks(rotation=90)
plt.legend(loc=4)
plt.ylabel("EOP", fontweight="bold")
#plt.savefig('fig0.pdf', bbox_inches='tight', dpi=400)
plt.show()