# Summary table for *COMPAS* data

Here we extract the summary measures for *Adult* data for four classifiers: (1) baseline, (2) project, (3) reduction, and (4) SenSR.
For each of the classifiers we perform gradient flow attack with step size $\frac{65}{1000\times i^{2/3}},$ where $i$ is the current step number. We perform 200 steps of attacks on each data points. 

First, we load the summary data for all experiments, and compile it in a panda dataframe. 

In [77]:
import re
import pandas as pd
with open('g_summary3.out', 'r') as f:
    data = f.read()
entries = re.split('\n', data)[:-1]

In [78]:
entries_dict = []
for e in entries:
    try:
        entries_dict.append(eval(e))
    except:
        continue
data = pd.DataFrame(entries_dict)
data['reject'] = data['pval'] < 0.05

In [79]:
lr = 5e-3
data_lr = data.loc[data['lr'] == lr]
data_lr = data_lr.rename(columns= {'average_odds_difference_gen': 'AOD-gen', 'average_odds_difference_race': 'AOD-race', 'statistical_parity_difference_gen': 'SPD-gen', 'statistical_parity_difference_race': 'SPD-race', 'equal_opportunity_difference_race': 'EOD-race', 'equal_opportunity_difference_gen': 'EOD-gen'})
for x in data_lr.groupby(['algo']):
    print(x)

('baseline',         algo            seed     lr  accuracy        lb  pval   bal_acc  \
7   baseline  (51668, 64395)  0.005  0.682765  1.849457   0.0  0.680608   
10  baseline   (5192, 98047)  0.005  0.684659  1.919997   0.0  0.682843   
17  baseline  (50057, 73349)  0.005  0.673295  2.064184   0.0  0.671552   
20  baseline  (21440, 98448)  0.005  0.690341  1.921498   0.0  0.689772   
26  baseline  (20609, 49100)  0.005  0.662879  2.253079   0.0  0.661266   
28  baseline  (98539, 77708)  0.005  0.678030  1.717622   0.0  0.677028   
41  baseline    (2367, 4534)  0.005  0.660985  1.687470   0.0  0.662044   
45  baseline  (97561, 21758)  0.005  0.699811  2.072982   0.0  0.699386   

    gap_rms_gen  mean_gap_gen  max_gap_gen   AOD-gen   EOD-gen   SPD-gen  \
7      0.208451      0.208451     0.208775  0.208451  0.208775  0.267464   
10     0.284018      0.275974     0.343091  0.275974  0.343091  0.305556   
17     0.213178      0.212599     0.228299  0.212599  0.228299  0.243473   
20     

We only extract the following measures: balanced accuracy, average odds difference for gender and race and lower bound and proportion of times the test being rejected. 

In [70]:
measure = ['bal_acc', 'AOD-gen', 'AOD-race', 'lb', 'reject']
agg_dict = dict()
for key in measure:
    agg_dict[key] = ['mean', 'std']
result = data_lr.groupby(['algo'], as_index=False).agg(agg_dict)

In [71]:
rows = ['sensr', 'reduction', 'baseline', 'project']
row_names = ['Baseline', 'Project', 'Reduction','SenSR']
colnames=['Balanced Acc', '$\\text{AOD}_{\\text{gen}}$', '$\\text{AOD}_{\\text{race}}$', '$T_n$', 'Rejection Prop']


In [72]:
for i, c in enumerate(measure):
    if c == 'reject':
        idx_best = (1-result[c]['mean']).idxmin()
        result[colnames[i]] = (result[c]['mean']).apply("{:.1f}".format)
        #result.at[idx_best, colnames[i]] = '\\textbf{' + result[colnames[i]][idx_best] + '}'
        
    elif c == 'bal_acc':
        idx_best = (result[c]['mean']).idxmax()
        result[colnames[i]] = result[c]['mean'].apply("{:.3f}".format)+ '$\pm$' + result[c]['std'].apply("{:.3f}".format)
        best_val = result[colnames[i]][idx_best].split('$\pm$')
        best_val = '$\pm$'.join(['\\textbf{' + best_val[0] + '}', best_val[1]])
        #result.at[idx_best, colnames[i]] = best_val
    elif c == 'lb':
        idx_best = (result[c]['mean']).idxmin()
        result[colnames[i]] = result[c]['mean'].apply("{:.3f}".format)+ '$\pm$' + result[c]['std'].apply("{:.3f}".format)
        best_val = result[colnames[i]][idx_best].split('$\pm$')
        best_val = '$\pm$'.join(['\\textbf{' + best_val[0] + '}', best_val[1]])
        #result.at[idx_best, colnames[i]] = best_val
    else:
        idx_best = (result[c]['mean']).abs().idxmin()
        result[colnames[i]] = result[c]['mean'].apply("{:.3f}".format)+ '$\pm$' + result[c]['std'].apply("{:.3f}".format)
        best_val = result[colnames[i]][idx_best].split('$\pm$')
        best_val = '$\pm$'.join(['\\textbf{' + best_val[0] + '}', best_val[1]])
        #result.at[idx_best, colnames[i]] = best_val


In [73]:
ind = dict()
for i, expt in enumerate(row_names):
    ind[i] = expt
    res = result[colnames].rename(index=ind)
res

Unnamed: 0,Balanced Acc,$\text{AOD}_{\text{gen}}$,$\text{AOD}_{\text{race}}$,$T_n$,Rejection Prop
,,,,,
Baseline,0.677$\pm$0.013,0.222$\pm$0.042,0.249$\pm$0.036,1.492$\pm$0.089,1.0
Project,0.643$\pm$0.006,0.053$\pm$0.018,0.221$\pm$0.023,1.093$\pm$0.077,0.0
Reduction,0.653$\pm$0.013,-0.011$\pm$0.051,0.033$\pm$0.042,1.281$\pm$0.018,1.0
SenSR,0.643$\pm$0.020,0.053$\pm$0.023,0.233$\pm$0.024,1.057$\pm$0.038,0.0


In [31]:
print(res.to_latex(escape=False, column_format='l' + 'c'*len(colnames)))

\begin{tabular}{lccccc}
\toprule
{} &     Balanced Acc & $\text{AOD}_{\text{gen}}$ & $\text{AOD}_{\text{race}}$ &            $T_n$ & Rejection Prop \\
{} \\
\midrule
Baseline  &  0.677$\pm$0.012 &           0.228$\pm$0.029 &            0.252$\pm$0.035 &  1.992$\pm$0.149 &            1.0 \\
Project   &  0.645$\pm$0.017 &           0.047$\pm$0.023 &            0.208$\pm$0.024 &  1.253$\pm$0.420 &            0.2 \\
Reduction &  0.655$\pm$0.013 &          -0.004$\pm$0.045 &            0.028$\pm$0.047 &  1.534$\pm$0.043 &            1.0 \\
SenSR     &  0.642$\pm$0.020 &           0.060$\pm$0.023 &            0.225$\pm$0.028 &  1.247$\pm$0.489 &            0.2 \\
\bottomrule
\end{tabular}



In [51]:
import numpy as np
a = np.load('seeds.npy')
a

array([[98539, 77708],
       [ 5192, 98047],
       [50057, 73349],
       [21440, 98448],
       [20609, 49100],
       [ 7751, 43757],
       [31228, 36230],
       [97561, 21758],
       [   23,    45],
       [51668, 64395]])

In [52]:
a[0, 0], a[0, 1] = 73467, 52346

In [53]:
a

array([[98539, 77708],
       [ 5192, 98047],
       [50057, 73349],
       [21440, 98448],
       [20609, 49100],
       [ 7751, 43757],
       [31228, 36230],
       [97561, 21758],
       [ 2367,  4534],
       [51668, 64395]])

In [54]:
np.save('seeds.npy', a)