# Summary table for *COMPAS* data

Here we extract the summary measures for *Adult* data for four classifiers: (1) baseline, (2) project, (3) reduction, and (4) SenSR.
For each of the classifiers we perform gradient flow attack with step size $\frac{65}{1000\times i^{2/3}},$ where $i$ is the current step number. We perform 200 steps of attacks on each data points. 

First, we load the summary data for all experiments, and compile it in a panda dataframe. 

In [32]:
import re
import pandas as pd
with open('all_summary.out', 'r') as f:
    data = f.read()
entries = re.split('\n', data)[:-1]

In [33]:
entries_dict = []
for e in entries:
    try:
        entries_dict.append(eval(e))
    except:
        continue
data = pd.DataFrame(entries_dict)
data['reject'] = data['pval'] < 0.05

In [44]:
lr = 4e-3
data_lr = data.loc[data['lr'] == lr]
data_lr = data_lr.rename(columns= {'average_odds_difference_gen': 'AOD-gen', 'average_odds_difference_race': 'AOD-race', 'statistical_parity_difference_gen': 'SPD-gen', 'statistical_parity_difference_race': 'SPD-race', 'equal_opportunity_difference_race': 'EOD-race', 'equal_opportunity_difference_gen': 'EOD-gen'})
for x in data_lr.groupby(['algo']):
    print(x)

('baseline',         algo            seed     lr  accuracy        lb  pval   bal_acc  \
1   baseline  (21440, 98448)  0.004  0.690341  1.687613   0.0  0.689772   
4   baseline  (51668, 64395)  0.004  0.682765  1.638763   0.0  0.680608   
17  baseline  (50057, 73349)  0.004  0.673295  1.796902   0.0  0.671552   
22  baseline  (20609, 49100)  0.004  0.662879  1.943238   0.0  0.661266   
24  baseline   (5192, 98047)  0.004  0.684659  1.695374   0.0  0.682843   
25  baseline  (98539, 77708)  0.004  0.678030  1.546239   0.0  0.677028   
32  baseline   (7751, 43757)  0.004  0.667614  1.732495   0.0  0.666520   
34  baseline  (31228, 36230)  0.004  0.668561  1.819662   0.0  0.666856   
37  baseline  (45413, 35730)  0.004  0.677083  1.780093   0.0  0.675576   
40  baseline  (97561, 21758)  0.004  0.699811  1.802799   0.0  0.699386   

    gap_rms_gen  mean_gap_gen  max_gap_gen   AOD-gen   EOD-gen   SPD-gen  \
1      0.273282      0.270220     0.311016  0.270220  0.311016  0.308418   
4      0.

We only extract the following measures: balanced accuracy, average odds difference for gender and race and lower bound and proportion of times the test being rejected. 

In [35]:
measure = ['bal_acc', 'AOD-gen', 'AOD-race', 'lb', 'reject']
agg_dict = dict()
for key in measure:
    agg_dict[key] = ['mean', 'std']
result = data_lr.groupby(['algo'], as_index=False).agg(agg_dict)

In [36]:
rows = ['sensr', 'reduction', 'baseline', 'project']
row_names = ['Baseline', 'Project', 'Reduction','SenSR']
colnames=['Balanced Acc', '$\\text{AOD}_{\\text{gen}}$', '$\\text{AOD}_{\\text{race}}$', '$T_n$', 'Rejection Prop']


In [37]:
for i, c in enumerate(measure):
    if c == 'reject':
        idx_best = (1-result[c]['mean']).idxmin()
        result[colnames[i]] = (result[c]['mean']).apply("{:.1f}".format)
        #result.at[idx_best, colnames[i]] = '\\textbf{' + result[colnames[i]][idx_best] + '}'
        
    elif c == 'bal_acc':
        idx_best = (result[c]['mean']).idxmax()
        result[colnames[i]] = result[c]['mean'].apply("{:.3f}".format)+ '$\pm$' + result[c]['std'].apply("{:.3f}".format)
        best_val = result[colnames[i]][idx_best].split('$\pm$')
        best_val = '$\pm$'.join(['\\textbf{' + best_val[0] + '}', best_val[1]])
        #result.at[idx_best, colnames[i]] = best_val
    elif c == 'lb':
        idx_best = (result[c]['mean']).idxmin()
        result[colnames[i]] = result[c]['mean'].apply("{:.3f}".format)+ '$\pm$' + result[c]['std'].apply("{:.3f}".format)
        best_val = result[colnames[i]][idx_best].split('$\pm$')
        best_val = '$\pm$'.join(['\\textbf{' + best_val[0] + '}', best_val[1]])
        #result.at[idx_best, colnames[i]] = best_val
    else:
        idx_best = (result[c]['mean']).abs().idxmin()
        result[colnames[i]] = result[c]['mean'].apply("{:.3f}".format)+ '$\pm$' + result[c]['std'].apply("{:.3f}".format)
        best_val = result[colnames[i]][idx_best].split('$\pm$')
        best_val = '$\pm$'.join(['\\textbf{' + best_val[0] + '}', best_val[1]])
        #result.at[idx_best, colnames[i]] = best_val


In [38]:
ind = dict()
for i, expt in enumerate(row_names):
    ind[i] = expt
    res = result[colnames].rename(index=ind)
res

Unnamed: 0,Balanced Acc,$\text{AOD}_{\text{gen}}$,$\text{AOD}_{\text{race}}$,$T_n$,Rejection Prop
,,,,,
Baseline,0.677$\pm$0.012,0.228$\pm$0.029,0.252$\pm$0.035,1.744$\pm$0.110,1.0
Project,0.644$\pm$0.017,0.047$\pm$0.023,0.214$\pm$0.029,1.225$\pm$0.306,0.4
Reduction,0.655$\pm$0.013,-0.004$\pm$0.045,0.028$\pm$0.047,1.402$\pm$0.031,1.0
SenSR,0.642$\pm$0.020,0.060$\pm$0.023,0.225$\pm$0.028,1.185$\pm$0.354,0.1


In [31]:
print(res.to_latex(escape=False, column_format='l' + 'c'*len(colnames)))

\begin{tabular}{lccccc}
\toprule
{} &     Balanced Acc & $\text{AOD}_{\text{gen}}$ & $\text{AOD}_{\text{race}}$ &            $T_n$ & Rejection Prop \\
{} \\
\midrule
Baseline  &  0.677$\pm$0.012 &           0.228$\pm$0.029 &            0.252$\pm$0.035 &  1.992$\pm$0.149 &            1.0 \\
Project   &  0.645$\pm$0.017 &           0.047$\pm$0.023 &            0.208$\pm$0.024 &  1.253$\pm$0.420 &            0.2 \\
Reduction &  0.655$\pm$0.013 &          -0.004$\pm$0.045 &            0.028$\pm$0.047 &  1.534$\pm$0.043 &            1.0 \\
SenSR     &  0.642$\pm$0.020 &           0.060$\pm$0.023 &            0.225$\pm$0.028 &  1.247$\pm$0.489 &            0.2 \\
\bottomrule
\end{tabular}



In [None]:
import numpy as np
a = np.load('seeds.npy')