# Experiments: Agarwal Dudik Wu

In [1]:
import AgarwalDudikWu as ADW

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.special import softmax
import collections
import warnings
warnings.filterwarnings('ignore')

from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor, GradientBoostingRegressor, GradientBoostingClassifier
from sklearn.metrics import mean_squared_error as mse
from sklearn.model_selection import train_test_split
import xgboost as xgb

from FairReg import FairReg
from evaluation import get_stats, get_risk_unf_wrt_eps, compare_with_ADW, get_stats_ADW
from evaluation_measures import DP_unfairness, prob_unfairness, prob_unfairness_summary, DP_unfairness_summary
from plots import plot_distributions_compare, plot_predictions_compare, plot_distributions, plot_predictions, plot_risk_history,plot_unfairness_history,plot_unfairness_vs_risk, plot_risk_unf_compare, plot_risk_unf_wrt_eps, plot_time_compare
from data_prep import get_lawschool_data, get_communities_data, get_frequencies

import time

# Communities and Crime

### 1. Training data: 0.8

In [2]:
num=10

eps_list = []
i_list = [1, 2, 4, 8, 16]
for i in i_list:
    eps_list+=[[2**(-i), 2**(-i)]]

ADW_results_crime_1 = get_stats_ADW(dataset='communities', 
            num=num, eps_list=eps_list, print_details = True,
            TRAIN_SIZE=0.4, UNLAB_SIZE=0.4, TEST_SIZE=0.2)

1 / 5  : collecting statistics for eps=[0.5, 0.5]
-----    1 / 10 : ADW: training completed; training time:  90.65354251861572
-----    2 / 10 : ADW: training completed; training time:  90.3375928401947
-----    3 / 10 : ADW: training completed; training time:  85.3242974281311
-----    4 / 10 : ADW: training completed; training time:  87.95451259613037
-----    5 / 10 : ADW: training completed; training time:  84.35767006874084
-----    6 / 10 : ADW: training completed; training time:  255.5059621334076
-----    7 / 10 : ADW: training completed; training time:  85.90270400047302
-----    8 / 10 : ADW: training completed; training time:  85.80736947059631
-----    9 / 10 : ADW: training completed; training time:  85.11192440986633
-----    10 / 10 : ADW: training completed; training time:  85.23613667488098
---------------------------------------------------------
2 / 5  : collecting statistics for eps=[0.25, 0.25]
-----    1 / 10 : ADW: training completed; training time:  170.34580516

In [3]:
ADW_results_crime_1

{'risk': [0.019184454289390647,
  0.02795554337699633,
  0.039888794614766646,
  0.05070958086303674,
  0.06372588798726009],
 'risk_std': [0.0010695475244603858,
  0.0016351900472801886,
  0.0026766224675669813,
  0.008033720163157756,
  0.008218994144191985],
 'unf': {0: [0.5043731166229097,
   0.28038286914636584,
   0.11969642422453788,
   0.06775097234440607,
   0.05867912230399286],
  1: [0.13535891947707485,
   0.07524652079327401,
   0.03212300202831032,
   0.018182369574856517,
   0.01574775167263707]},
 'unf_std': {0: [0.04323046723679461,
   0.04569857256615682,
   0.04612620564678026,
   0.021261165364623618,
   0.022278802516881174],
  1: [0.011601786734475199,
   0.012264153659926794,
   0.012378917809359678,
   0.005705871855042772,
   0.005978975755329088]},
 'training_time_hist': [90.65354251861572,
  90.3375928401947,
  85.3242974281311,
  87.95451259613037,
  84.35767006874084,
  255.5059621334076,
  85.90270400047302,
  85.80736947059631,
  85.11192440986633,
  85.2

### 2. Training data: 0.4

In [6]:
num=10

eps_list = []
i_list = [1, 2, 4, 8, 16]
for i in i_list:
    eps_list+=[[2**(-i), 2**(-i)]]

ADW_results_crime_2 = get_stats_ADW(dataset='communities', 
            num=num, eps_list=eps_list, print_details = True,
            TRAIN_SIZE=0.4, UNLAB_SIZE=0.4, TEST_SIZE=0.2, partial_training=True)

1 / 5  : collecting statistics for eps=[0.5, 0.5]
-----    1 / 10 : ADW: training completed; training time:  45.78951668739319
-----    2 / 10 : ADW: training completed; training time:  45.77052903175354
-----    3 / 10 : ADW: training completed; training time:  42.43022966384888
-----    4 / 10 : ADW: training completed; training time:  40.86864447593689
-----    5 / 10 : ADW: training completed; training time:  44.41554546356201
-----    6 / 10 : ADW: training completed; training time:  173.81574130058289
-----    7 / 10 : ADW: training completed; training time:  44.842381954193115
-----    8 / 10 : ADW: training completed; training time:  44.87671732902527
-----    9 / 10 : ADW: training completed; training time:  43.53170919418335
-----    10 / 10 : ADW: training completed; training time:  137.57763814926147
---------------------------------------------------------
2 / 5  : collecting statistics for eps=[0.25, 0.25]
-----    1 / 10 : ADW: training completed; training time:  65.6251

In [7]:
ADW_results_crime_2

{'risk': [0.021629530159859495,
  0.031038802773151575,
  0.04227453747840563,
  0.055204694636743955,
  0.06920788325816157],
 'risk_std': [0.0017545504113830302,
  0.0026898902040118342,
  0.0023768048585030943,
  0.011967792485616368,
  0.008978318588231013],
 'unf': {0: [0.4827684501083479,
   0.28475646919146147,
   0.12464786301474609,
   0.07774279679041593,
   0.06432366435437738],
  1: [0.12956086200990805,
   0.0764202664922772,
   0.033451822662104266,
   0.02086388156675701,
   0.017262580849098097]},
 'unf_std': {0: [0.04482584766625331,
   0.045607628521770194,
   0.03917621243002715,
   0.03329644605937856,
   0.026906733704415056],
  1: [0.012029939948770801,
   0.012239746951529415,
   0.010513743910933742,
   0.008935787440855478,
   0.007220976457414857]},
 'training_time_hist': [45.78951668739319,
  45.77052903175354,
  42.43022966384888,
  40.86864447593689,
  44.41554546356201,
  173.81574130058289,
  44.842381954193115,
  44.87671732902527,
  43.53170919418335,
 

# Law School

### 1. Training data: 0.8

In [4]:
num=10

eps_list = []
i_list = [1, 2, 4, 8, 16]
for i in i_list:
    eps_list+=[[2**(-i), 2**(-i)]]

ADW_results_law_1 = get_stats_ADW(dataset='lawschool', 
            num=num, eps_list=eps_list, print_details = True,
            TRAIN_SIZE=0.4, UNLAB_SIZE=0.4, TEST_SIZE=0.2)

1 / 5  : collecting statistics for eps=[0.5, 0.5]
-----    1 / 10 : ADW: training completed; training time:  89.0734031200409
-----    2 / 10 : ADW: training completed; training time:  90.20902442932129
-----    3 / 10 : ADW: training completed; training time:  88.48083758354187
-----    4 / 10 : ADW: training completed; training time:  87.88358736038208
-----    5 / 10 : ADW: training completed; training time:  90.92188882827759
-----    6 / 10 : ADW: training completed; training time:  90.13062167167664
-----    7 / 10 : ADW: training completed; training time:  89.00344038009644
-----    8 / 10 : ADW: training completed; training time:  85.92114162445068
-----    9 / 10 : ADW: training completed; training time:  89.10214471817017
-----    10 / 10 : ADW: training completed; training time:  96.45941734313965
---------------------------------------------------------
2 / 5  : collecting statistics for eps=[0.25, 0.25]
-----    1 / 10 : ADW: training completed; training time:  90.92153644

In [5]:
ADW_results_law_1

{'risk': [0.011493437499999998,
  0.011493437499999998,
  0.011968568623357798,
  0.01604015685898988,
  0.0182511778823257],
 'risk_std': [0.000640059887945066,
  0.000640059887945066,
  0.0005796406565443473,
  0.0016041381552198077,
  0.003068209111508659],
 'unf': {0: [0.2380625,
   0.2380625,
   0.09555004175130963,
   0.05283712079946525,
   0.05063270229675766],
  1: [0.0453452380952381,
   0.0453452380952381,
   0.01820000795263037,
   0.010064213485612391,
   0.009644324247001457]},
 'unf_std': {0: [0.05213522831301307,
   0.05213522831301307,
   0.03636354475086835,
   0.027491425276389585,
   0.02766602900628101],
  1: [0.009930519678669158,
   0.009930519678669158,
   0.006926389476355847,
   0.00523646195740753,
   0.005269719810720177]},
 'training_time_hist': [89.0734031200409,
  90.20902442932129,
  88.48083758354187,
  87.88358736038208,
  90.92188882827759,
  90.13062167167664,
  89.00344038009644,
  85.92114162445068,
  89.10214471817017,
  96.45941734313965,
  90.92

### 2. Training data: 0.4

In [8]:
num=10

eps_list = []
i_list = [1, 2, 4, 8, 16]
for i in i_list:
    eps_list+=[[2**(-i), 2**(-i)]]

ADW_results_law_2 = get_stats_ADW(dataset='lawschool', 
            num=num, eps_list=eps_list, print_details = True,
            TRAIN_SIZE=0.4, UNLAB_SIZE=0.4, TEST_SIZE=0.2, partial_training=True)

1 / 5  : collecting statistics for eps=[0.5, 0.5]
-----    1 / 10 : ADW: training completed; training time:  46.0025110244751
-----    2 / 10 : ADW: training completed; training time:  45.52815580368042
-----    3 / 10 : ADW: training completed; training time:  45.565890073776245
-----    4 / 10 : ADW: training completed; training time:  46.53616690635681
-----    5 / 10 : ADW: training completed; training time:  46.107701539993286
-----    6 / 10 : ADW: training completed; training time:  45.98188519477844
-----    7 / 10 : ADW: training completed; training time:  43.951051235198975
-----    8 / 10 : ADW: training completed; training time:  43.812763929367065
-----    9 / 10 : ADW: training completed; training time:  51.62688589096069
-----    10 / 10 : ADW: training completed; training time:  51.57475137710571
---------------------------------------------------------
2 / 5  : collecting statistics for eps=[0.25, 0.25]
-----    1 / 10 : ADW: training completed; training time:  83.1070

In [9]:
ADW_results_law_2

{'risk': [0.011484375,
  0.011485965203423229,
  0.012110514358429199,
  0.01719773565425585,
  0.019356459963280375],
 'risk_std': [0.0006429424546567139,
  0.0006343229958237794,
  0.0007804613635147565,
  0.0034421679059363445,
  0.004774412376743943],
 'unf': {0: [0.23543750000000002,
   0.23222859111947375,
   0.1082600844620257,
   0.05462989580178614,
   0.050514938878219985],
  1: [0.044845238095238105,
   0.04423401735609024,
   0.02062096846895723,
   0.010405694438435493,
   0.009621893119660949]},
 'unf_std': {0: [0.0562625333259177,
   0.05998127154157687,
   0.04722991128376921,
   0.022983963637830958,
   0.01993146902425077],
  1: [0.010716673014460528,
   0.011425004103157515,
   0.008996173577860792,
   0.004377897835777358,
   0.0037964702903334506]},
 'training_time_hist': [46.0025110244751,
  45.52815580368042,
  45.565890073776245,
  46.53616690635681,
  46.107701539993286,
  45.98188519477844,
  43.951051235198975,
  43.812763929367065,
  51.62688589096069,
  51.

# Adult

### Training data: 0.8

In [2]:
num=10

eps_list = []
i_list = [1, 2, 4, 8, 16]
for i in i_list:
    eps_list+=[[2**(-i), 2**(-i)]]

ADW_results_adult_1 = get_stats_ADW(dataset='adult', 
            num=num, eps_list=eps_list, print_details = True,
            TRAIN_SIZE=0.4, UNLAB_SIZE=0.4, TEST_SIZE=0.2)

1 / 5  : collecting statistics for eps=[0.5, 0.5]
-----    1 / 10 : ADW: training completed; training time:  91.82991433143616
-----    2 / 10 : ADW: training completed; training time:  85.82939100265503
-----    3 / 10 : ADW: training completed; training time:  86.61291766166687
-----    4 / 10 : ADW: training completed; training time:  86.93243980407715
-----    5 / 10 : ADW: training completed; training time:  85.15382313728333
-----    6 / 10 : ADW: training completed; training time:  85.06965398788452
-----    7 / 10 : ADW: training completed; training time:  83.53353476524353
-----    8 / 10 : ADW: training completed; training time:  87.16162300109863
-----    9 / 10 : ADW: training completed; training time:  84.2731077671051
-----    10 / 10 : ADW: training completed; training time:  86.10765051841736
---------------------------------------------------------
2 / 5  : collecting statistics for eps=[0.25, 0.25]
-----    1 / 10 : ADW: training completed; training time:  88.15487623

In [3]:
ADW_results_adult_1

{'risk': [0.0157911125,
  0.0157911125,
  0.016620523364190055,
  0.017444390443293444,
  0.017528807203145445],
 'risk_std': [0.0007047860633953034,
  0.0007047860633953034,
  0.0007549579424452553,
  0.0008749679427236232,
  0.000882107025326489],
 'unf': {0: [0.14418065693430654,
   0.14418065693430654,
   0.07439122230047651,
   0.02882338315820538,
   0.027304570349608426],
  1: [0.07510551330798478,
   0.07510551330798478,
   0.03875132112230143,
   0.015014461949331401,
   0.01422329329998617]},
 'unf_std': {0: [0.03031881939400721,
   0.03031881939400721,
   0.025967906433382225,
   0.011993863687565616,
   0.009517720697696965],
  1: [0.01579345344858931,
   0.01579345344858931,
   0.013527008294195264,
   0.006247754088199571,
   0.0049579001353022905]},
 'training_time_hist': [91.82991433143616,
  85.82939100265503,
  86.61291766166687,
  86.93243980407715,
  85.15382313728333,
  85.06965398788452,
  83.53353476524353,
  87.16162300109863,
  84.2731077671051,
  86.1076505184

In [6]:
ADW_results_adult_1 = {'risk': [0.0157911125,
  0.0157911125,
  0.016620523364190055,
  0.017444390443293444,
  0.017528807203145445],
 'risk_std': [0.0007047860633953034,
  0.0007047860633953034,
  0.0007549579424452553,
  0.0008749679427236232,
  0.000882107025326489],
 'unf': {0: [0.14418065693430654,
   0.14418065693430654,
   0.07439122230047651,
   0.02882338315820538,
   0.027304570349608426],
  1: [0.07510551330798478,
   0.07510551330798478,
   0.03875132112230143,
   0.015014461949331401,
   0.01422329329998617]},
 'unf_std': {0: [0.03031881939400721,
   0.03031881939400721,
   0.025967906433382225,
   0.011993863687565616,
   0.009517720697696965],
  1: [0.01579345344858931,
   0.01579345344858931,
   0.013527008294195264,
   0.006247754088199571,
   0.0049579001353022905]},
 'training_time_hist': [91.82991433143616,
  85.82939100265503,
  86.61291766166687,
  86.93243980407715,
  85.15382313728333,
  85.06965398788452,
  83.53353476524353,
  87.16162300109863,
  84.2731077671051,
  86.10765051841736,
  88.15487623214722,
  85.9133551120758,
  85.28287124633789,
  84.46916842460632,
  88.21460127830505,
  82.55495882034302,
  85.46726560592651,
  83.37708067893982,
  88.80802273750305,
  84.73397922515869,
  170.4626967906952,
  223.0327126979828,
  225.30730724334717,
  300.83043241500854,
  170.67196249961853,
  224.1606719493866,
  310.61057209968567,
  253.7964596748352,
  353.22964811325073,
  280.432208776474,
  170.0092306137085,
  171.60504627227783,
  251.82041025161743,
  168.83445286750793,
  295.14736461639404,
  172.53079748153687,
  167.61365842819214,
  169.79629731178284,
  426.05347537994385,
  170.22248315811157,
  168.53930139541626,
  169.32921147346497,
  250.66810584068298,
  173.47692131996155,
  383.4329912662506,
  221.36682200431824,
  222.5968897342682,
  219.85395193099976,
  353.7152831554413,
  170.1185085773468]}

### Training data: 0.4

In [4]:
num=10

eps_list = []
i_list = [1, 2, 4, 8, 16]
for i in i_list:
    eps_list+=[[2**(-i), 2**(-i)]]

ADW_results_adult_2 = get_stats_ADW(dataset='adult', 
            num=num, eps_list=eps_list, print_details = True,
            TRAIN_SIZE=0.4, UNLAB_SIZE=0.4, TEST_SIZE=0.2, partial_training=True)

1 / 5  : collecting statistics for eps=[0.5, 0.5]
-----    1 / 10 : ADW: training completed; training time:  44.40106272697449
-----    2 / 10 : ADW: training completed; training time:  42.65382480621338
-----    3 / 10 : ADW: training completed; training time:  42.53053283691406
-----    4 / 10 : ADW: training completed; training time:  41.88482141494751
-----    5 / 10 : ADW: training completed; training time:  43.82067275047302
-----    6 / 10 : ADW: training completed; training time:  43.26097893714905
-----    7 / 10 : ADW: training completed; training time:  43.328731060028076
-----    8 / 10 : ADW: training completed; training time:  41.00166606903076
-----    9 / 10 : ADW: training completed; training time:  45.867915391922
-----    10 / 10 : ADW: training completed; training time:  42.973122358322144
---------------------------------------------------------
2 / 5  : collecting statistics for eps=[0.25, 0.25]
-----    1 / 10 : ADW: training completed; training time:  40.8406741

In [5]:
ADW_results_adult_2

{'risk': [0.015850425,
  0.015850425,
  0.016645845325225943,
  0.017359271248599557,
  0.017711461866596178],
 'risk_std': [0.0007794749879726733,
  0.0007794749879726733,
  0.0008770455008556636,
  0.0008878649136701988,
  0.0008406734370463405],
 'unf': {0: [0.14797810218978105,
   0.14797810218978105,
   0.0686853632593512,
   0.031573350288051485,
   0.028208970962824813],
  1: [0.07708365019011403,
   0.07708365019011403,
   0.035779067553350204,
   0.016446954332559098,
   0.01469440692740306]},
 'unf_std': {0: [0.03748464157049231,
   0.03748464157049231,
   0.024625914700221557,
   0.014931364608854664,
   0.01636482853550392],
  1: [0.01952622013367853,
   0.01952622013367853,
   0.012827947961712436,
   0.0077779351764756255,
   0.008524644522296709]},
 'training_time_hist': [44.40106272697449,
  42.65382480621338,
  42.53053283691406,
  41.88482141494751,
  43.82067275047302,
  43.26097893714905,
  43.328731060028076,
  41.00166606903076,
  45.867915391922,
  42.97312235832