# Summary table for *COMPAS* data

Here we extract the summary measures for *Adult* data for four classifiers: (1) baseline, (2) project, (3) reduction, and (4) SenSR.
For each of the classifiers we perform gradient flow attack with step size $\frac{65}{1000\times i^{2/3}},$ where $i$ is the current step number. We perform 200 steps of attacks on each data points. 

First, we load the summary data for all experiments, and compile it in a panda dataframe. 

In [183]:
import re
import pandas as pd
with open('all_summary.out', 'r') as f:
    data = f.read()
entries = re.split('\n', data)[:-1]

In [184]:
entries_dict = []
for e in entries:
    try:
        entries_dict.append(eval(e))
    except:
        continue
data = pd.DataFrame(entries_dict)
data['reject'] = data['pval'] < 0.05

In [185]:
lr = 5e-3
data_lr = data.loc[data['lr'] == lr]
data_lr = data_lr.rename(columns= {'average_odds_difference_gen': 'AOD-gen', 'average_odds_difference_race': 'AOD-race', 'statistical_parity_difference_gen': 'SPD-gen', 'statistical_parity_difference_race': 'SPD-race', 'equal_opportunity_difference_race': 'EOD-race', 'equal_opportunity_difference_gen': 'EOD-gen'})
for x in data_lr.groupby(['algo']):
    print(x)

('baseline',         algo            seed     lr  accuracy        lb  pval   bal_acc  \
2   baseline  (50057, 73349)  0.005  0.673295  2.562466   0.0  0.671552   
9   baseline     (767, 1876)  0.005  0.665720  1.991305   0.0  0.665400   
15  baseline    (2367, 4534)  0.005  0.660985  1.996409   0.0  0.662044   
27  baseline  (21440, 98448)  0.005  0.690341  2.352636   0.0  0.689772   
32  baseline  (20609, 49100)  0.005  0.662879  2.812803   0.0  0.661266   
37  baseline   (7751, 43757)  0.005  0.667614  2.441824   0.0  0.666520   
38  baseline  (31228, 36230)  0.005  0.668561  2.543041   0.0  0.666856   
46  baseline  (97561, 21758)  0.005  0.699811  2.589077   0.0  0.699386   
48  baseline  (51668, 64395)  0.005  0.682765  2.235409   0.0  0.680608   
53  baseline   (5192, 98047)  0.005  0.684659  2.320282   0.0  0.682843   

    gap_rms_gen  mean_gap_gen  max_gap_gen   AOD-gen   EOD-gen   SPD-gen  \
2      0.213178      0.212599     0.228299  0.212599  0.228299  0.243473   
9      0.

We only extract the following measures: balanced accuracy, average odds difference for gender and race and lower bound and proportion of times the test being rejected. 

In [186]:
measure = ['bal_acc', 'AOD-gen', 'AOD-race', 'lb', 'reject']
agg_dict = dict()
for key in measure:
    agg_dict[key] = ['mean', 'std']
result = data_lr.groupby(['algo'], as_index=False).agg(agg_dict)

In [187]:
rows = ['sensr', 'reduction', 'baseline', 'project']
row_names = ['Baseline', 'Project', 'Reduction','SenSR']
colnames=['Balanced Acc', '$\\text{AOD}_{\\text{gen}}$', '$\\text{AOD}_{\\text{race}}$', '$T_n$', 'Rejection Prop']


In [188]:
for i, c in enumerate(measure):
    if c == 'reject':
        idx_best = (1-result[c]['mean']).idxmin()
        result[colnames[i]] = (result[c]['mean']).apply("{:.1f}".format)
        #result.at[idx_best, colnames[i]] = '\\textbf{' + result[colnames[i]][idx_best] + '}'
        
    elif c == 'bal_acc':
        idx_best = (result[c]['mean']).idxmax()
        result[colnames[i]] = result[c]['mean'].apply("{:.3f}".format)+ '$\pm$' + result[c]['std'].apply("{:.3f}".format)
        best_val = result[colnames[i]][idx_best].split('$\pm$')
        best_val = '$\pm$'.join(['\\textbf{' + best_val[0] + '}', best_val[1]])
        #result.at[idx_best, colnames[i]] = best_val
    elif c == 'lb':
        idx_best = (result[c]['mean']).idxmin()
        result[colnames[i]] = result[c]['mean'].apply("{:.3f}".format)+ '$\pm$' + result[c]['std'].apply("{:.3f}".format)
        best_val = result[colnames[i]][idx_best].split('$\pm$')
        best_val = '$\pm$'.join(['\\textbf{' + best_val[0] + '}', best_val[1]])
        #result.at[idx_best, colnames[i]] = best_val
    else:
        idx_best = (result[c]['mean']).abs().idxmin()
        result[colnames[i]] = result[c]['mean'].apply("{:.3f}".format)+ '$\pm$' + result[c]['std'].apply("{:.3f}".format)
        best_val = result[colnames[i]][idx_best].split('$\pm$')
        best_val = '$\pm$'.join(['\\textbf{' + best_val[0] + '}', best_val[1]])
        #result.at[idx_best, colnames[i]] = best_val


In [189]:
ind = dict()
for i, expt in enumerate(row_names):
    ind[i] = expt
    res = result[colnames].rename(index=ind)
res

Unnamed: 0,Balanced Acc,$\text{AOD}_{\text{gen}}$,$\text{AOD}_{\text{race}}$,$T_n$,Rejection Prop
,,,,,
Baseline,0.675$\pm$0.013,0.218$\pm$0.041,0.260$\pm$0.026,2.385$\pm$0.262,1.0
Project,0.641$\pm$0.017,0.039$\pm$0.029,0.227$\pm$0.021,1.161$\pm$0.145,0.2
Reduction,0.652$\pm$0.012,-0.014$\pm$0.054,0.037$\pm$0.039,1.763$\pm$0.069,1.0
SenSR,0.640$\pm$0.022,0.046$\pm$0.031,0.237$\pm$0.018,1.098$\pm$0.061,0.0


In [190]:
print(res.to_latex(escape=False, column_format='l' + 'c'*len(colnames)))

\begin{tabular}{lccccc}
\toprule
{} &     Balanced Acc & $\text{AOD}_{\text{gen}}$ & $\text{AOD}_{\text{race}}$ &            $T_n$ & Rejection Prop \\
{} \\
\midrule
Baseline  &  0.675$\pm$0.013 &           0.218$\pm$0.041 &            0.260$\pm$0.026 &  2.385$\pm$0.262 &            1.0 \\
Project   &  0.641$\pm$0.017 &           0.039$\pm$0.029 &            0.227$\pm$0.021 &  1.161$\pm$0.145 &            0.2 \\
Reduction &  0.652$\pm$0.012 &          -0.014$\pm$0.054 &            0.037$\pm$0.039 &  1.763$\pm$0.069 &            1.0 \\
SenSR     &  0.640$\pm$0.022 &           0.046$\pm$0.031 &            0.237$\pm$0.018 &  1.098$\pm$0.061 &            0.0 \\
\bottomrule
\end{tabular}



In [137]:
import numpy as np
a = np.load('seeds.npy')
a

array([[  767,  1876],
       [ 5192, 98047],
       [50057, 73349],
       [21440, 98448],
       [20609, 49100],
       [ 7751, 43757],
       [31228, 36230],
       [97561, 21758],
       [ 2367,  4534],
       [51668, 64395]])

In [134]:
a[0, 0], a[0, 1] = 767, 1876

In [135]:
a

array([[  767,  1876],
       [ 5192, 98047],
       [50057, 73349],
       [21440, 98448],
       [20609, 49100],
       [ 7751, 43757],
       [31228, 36230],
       [97561, 21758],
       [ 2367,  4534],
       [51668, 64395]])

In [136]:
np.save('seeds.npy', a)