In [1]:
# Import all utility functions
from utility import *
from sklearn.neighbors import KernelDensity
# whether or not enforce the monotonic likelihood assumption to P_Y|X
monotone = False

### Read and process data

- Build kde estimator for all features and Learn P(y|x) using all features
- generate y using P(y|x)


In [2]:
df = pd.read_csv('index.csv')
df = df[['Sex & Marital Status','Creditability','Instalment per cent','Length of current employment','Value Savings/Stocks',
        'Account Balance', 'Duration of Credit (month)',
       'Payment Status of Previous Credit', 'Purpose', 'Credit Amount','Guarantors',
       'Duration in Current address', 'Most valuable available asset',
       'Age (years)', 'Concurrent Credits', 'Type of apartment',
       'No of Credits at this Bank', 'Occupation', 'No of dependents',
       'Telephone', 'Foreign Worker']]
df.columns = ['b','y','x1','x2','x3'] + [f"x{i}" for i in range(4,20)]


from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

for col in df.columns[2:]:
    scaler.fit(df[col].to_numpy().reshape(-1, 1))
    x_new = scaler.transform(df[col].to_numpy().reshape(-1, 1)).reshape(-1)
    df[col] = x_new


# learn P(y|x) as ground truth
X = df[[i for i in df.columns if i not in ['b','y']]]
clf = SGDClassifier(loss = 'log_loss', max_iter=10000, tol=1e-6, penalty = 'l2',random_state=42)
y = df[['y']]
clf.fit(X,y)
df['y']=clf.predict(X)

# possibly enforce monotonic likelihood
idx = np.where(clf.coef_[0]<0)[0]
if monotone:
    for i in idx:
        df[f'x{i+1}'] = -df[f'x{i+1}']

In [3]:
# Build kde for male, female and X1 to X3
df_m = df[(df['b'] == 1) | (df['b'] == 3)| (df['b'] == 4)]
df_f = df[(df['b'] == 2) | (df['b'] == 5)]
X = df_m[[f"x{i}" for i in range(1,20)]]
kde_a = KernelDensity(kernel='gaussian').fit(X)
X = df_f[[f"x{i}" for i in range(1,20)]]
kde_b = KernelDensity(kernel='gaussian').fit(X)

In [None]:
# Under the semi synthetic environment, what is the qualification rates of both groups?
N=10000
x,y,x_full = generate_true_examples_K(N, clf, kde_a)
print('qualification rate of group a:', y.mean())
x,y,x_full = generate_true_examples_K(N, clf, kde_b)
print('qualification rate of group b:', y.mean())

### Experiment begins

- Note: It may take a long time to run 100 trials, to see qualitative results can instead run 50 trials.
- Note: slight quantitative differences may exist because different versions of sklearn implement logistic classifier differently, but the qualitative results should remain same.

In [5]:
n = 50
alpha = 0.5
T = 15
Q = np.diag(5*np.ones(10))
N = 2000
tp = 3

Group $i$

In [None]:
# ratio = 0.1
np.random.seed(42)
r = 0.1
bias = 'up'
mag = 0.06
des = f"3D_setting_ratio{r}_bias{bias}_mag{mag}"
At, Qt, At_sd, Qt_sd = simulation(Q,N,n,T,alpha,bias,mag,tp,r,sd=True,kde_x=kde_a,clf=clf)
plot_save_single(At, Qt, des)
plot_save_single_err(At, Qt, At_sd, Qt_sd, des, save=True, small=True)

In [None]:
# ratio = 0.05
np.random.seed(42)
r = 0.05
bias = 'up'
mag = 0.06
des = f"3D_setting_ratio{r}_bias{bias}_mag{mag}"
At, Qt, At_sd, Qt_sd = simulation(Q,N,n,T,alpha,bias,mag,tp,r,sd=True,kde_x=kde_a,clf=clf)
plot_save_single(At, Qt, des, save=False)
plot_save_single_err(At, Qt, At_sd, Qt_sd, des, save=False, small=True)

In [None]:
# ratio = 0
np.random.seed(42)
r = 0
bias = 'up'
mag = 0.06
des = f"3D_setting_ratio{r}_bias{bias}_mag{mag}"
At, Qt, At_sd, Qt_sd = simulation(Q,N,n,T,alpha,bias,mag,tp,r,sd=True,kde_x=kde_a,clf=clf)
plot_save_single(At, Qt, des, save=True)
plot_save_single_err(At, Qt, At_sd, Qt_sd, des, save=True,small=True)

In [None]:
#infinite horizon to verify the limit
np.random.seed(42)
T = 50
r = 0
bias = 'up'
mag = 0.06
des = f"infinite_3D_setting_ratio{r}_bias{bias}_mag{mag}"
At, Qt, At_sd, Qt_sd = simulation(Q,N,n,T,alpha,bias,mag,tp,r,sd=True,kde_x=kde_a,clf=clf)
plot_save_single(At, Qt, des,save=True,limit=True)
plot_save_single_err(At, Qt, At_sd, Qt_sd, des,save=True,limit=True, small=True)

Group $j$

In [None]:
np.random.seed(42)
r = 0.1
bias = 'down'
mag = 0.06
des = f"3D_setting_ratio{r}_bias{bias}_mag{mag}"
At, Qt, At_sd, Qt_sd = simulation(Q,N,n,T,alpha,bias,mag,tp,r,sd=True,kde_x=kde_b,clf=clf)
plot_save_single(At, Qt, des)
plot_save_single_err(At, Qt, At_sd, Qt_sd, des,small=True)

In [None]:
# ratio 0.05
np.random.seed(42)
r = 0.05
bias = 'down'
mag = 0.06
des = f"3D_setting_ratio{r}_bias{bias}_mag{0.05}"
At, Qt, At_sd, Qt_sd = simulation(Q,N,n,T,alpha,bias,mag,tp,r,sd=True,kde_x=kde_b,clf=clf)
plot_save_single(At, Qt, des,save=True)
plot_save_single_err(At, Qt, At_sd, Qt_sd, des, save=True,small=False)

In [None]:
# ratio 0
np.random.seed(42)
r = 0
bias = 'down'
mag = 0.06
des = f"3D_setting_ratio{r}_bias{bias}_mag{mag}"
At, Qt, At_sd, Qt_sd = simulation(Q,N,n,T,alpha,bias,mag,tp,r,sd=True,kde_x=kde_b,clf=clf)
plot_save_single(At, Qt, des,save=True)
plot_save_single_err(At, Qt, At_sd, Qt_sd, des, save=True,small=True)

In [None]:
#infinite horizon to verify the limit
np.random.seed(42)
r = 0
bias = 'down'
mag = 0.06
des = f"infinite_3D_setting_ratio{r}_bias{bias}_mag{mag}"
At, Qt, At_sd, Qt_sd = simulation(Q,N,n,T,alpha,bias,mag,tp,r,sd=True,kde_x=kde_b,clf=clf)
plot_save_single(At, Qt, des,save=True,limit=True)
plot_save_single_err(At, Qt, At_sd, Qt_sd, des,save=True,limit=True, small=True)

Refined retraining process for $i$

In [None]:
# ratio 0.1
np.random.seed(42)
r = 0.1
bias = 'up'
mag = 0.06
des = f"3D_sampler_ratio{r}_bias{bias}_mag{mag}"
At, Qt, At_sd, Qt_sd = simulation(Q,N,n,T,alpha,bias,mag,tp,r,sd=True,refined=True,kde_x=kde_a,clf=clf)
plot_save_single(At, Qt, des, save=True)
plot_save_single_err(At, Qt, At_sd, Qt_sd, des, save=True,small=True)

In [None]:
# ratio 0.05
np.random.seed(42)
r = 0.05
bias = 'up'
mag = 0.06
des = f"3D_sampler_ratio{r}_bias{bias}_mag{mag}"
At, Qt, At_sd, Qt_sd = simulation(Q,N,n,T,alpha,bias,mag,tp,r,sd=True,refined=True,kde_x=kde_a,clf=clf)
plot_save_single(At, Qt, des, save=True)
plot_save_single_err(At, Qt, At_sd, Qt_sd, des, save=True,small=True)

In [None]:
# ratio 0
np.random.seed(42)
r = 0
bias = 'up'
mag = 0.06
des = f"3D_sampler_ratio{r}_bias{bias}_mag{mag}"
At, Qt, At_sd, Qt_sd = simulation(Q,N,n,T,alpha,bias,mag,tp,r,sd=True,refined=True,kde_x=kde_a,clf=clf)
plot_save_single(At, Qt, des, save=True)
plot_save_single_err(At, Qt, At_sd, Qt_sd, des, save=True,small=True)

Refined retraining process for $j$

In [None]:
# ratio 0.1
np.random.seed(42)
r = 0.1
bias = 'down'
mag = 0.06
des = f"3D_sampler_ratio{r}_bias{bias}_mag{mag}"
At, Qt, At_sd, Qt_sd = simulation(Q,N,n,T,alpha,bias,mag,tp,r,sd=True,refined=True,kde_x=kde_b,clf=clf)
plot_save_single(At, Qt, des, save=True)
plot_save_single_err(At, Qt, At_sd, Qt_sd, des, save=True,small=True)

In [None]:
# ratio 0.05
np.random.seed(42)
r = 0.05
bias = 'down'
mag = 0.06
des = f"3D_sampler_ratio{r}_bias{bias}_mag{mag}"
At, Qt, At_sd, Qt_sd = simulation(Q,N,n,T,alpha,bias,mag,tp,r,sd=True,refined=True,kde_x=kde_b,clf=clf)
plot_save_single(At, Qt, des, save=True)
plot_save_single_err(At, Qt, At_sd, Qt_sd, des, save=True,small=True)

In [None]:
# ratio 0
np.random.seed(42)
r = 0
bias = 'down'
mag = 0.06
des = f"3D_sampler_ratio{r}_bias{bias}_mag{mag}"
At, Qt, At_sd, Qt_sd = simulation(Q,N,n,T,alpha,bias,mag,tp,r,sd=True,refined=True,kde_x=kde_b,clf=clf)
plot_save_single(At, Qt, des, save=True)
plot_save_single_err(At, Qt, At_sd, Qt_sd, des, save=True,small=True)

Unfairness and bias

In [None]:
# ratio = 0.1
# read i
r = 0.1
bias = 'up'
mag = 0.06
des = f"3D_sampler_ratio{r}_bias{bias}_mag{mag}"
Ati_mean, Qti_mean, Ati_sd, Qti_sd = read_results(des)
    
# read j
r = 0.1
bias = 'down'
mag = 0.06
des = f"3D_setting_ratio{r}_bias{bias}_mag{mag}"
Atj_mean, Qtj_mean, Atj_sd, Qtj_sd = read_results(des)
    

# Plot
des = f"3D_ratio{r}_mag{mag}"
plot_save_fairness(Ati_mean, Atj_mean, Qti_mean, Qtj_mean, des, True)
plot_save_fairness_err(Ati_mean, Atj_mean, Qti_mean, Qtj_mean, Ati_sd, Qti_sd, Atj_sd, Qtj_sd, des, True)

In [None]:
# ratio = 0.05
# read i
r = 0.05
bias = 'up'
mag = 0.06
des = f"3D_sampler_ratio{r}_bias{bias}_mag{mag}"
Ati_mean, Qti_mean, Ati_sd, Qti_sd = read_results(des)
    
# read j
r = 0.05
bias = 'down'
mag = 0.06
des = f"3D_setting_ratio{r}_bias{bias}_mag{mag}"
Atj_mean, Qtj_mean, Atj_sd, Qtj_sd = read_results(des)
    

# Plot
des = f"3D_ratio{r}_mag{mag}"
# plot_save_fairness(Ati_mean, Atj_mean, Qti_mean, Qtj_mean, des, True)
plot_save_fairness_err(Ati_mean, Atj_mean, Qti_mean, Qtj_mean, Ati_sd, Qti_sd, Atj_sd, Qtj_sd, des, True)

In [None]:
# ratio = 0
# read i
r = 0
bias = 'up'
mag = 0.06
des = f"3D_sampler_ratio{r}_bias{bias}_mag{mag}"
Ati_mean, Qti_mean, Ati_sd, Qti_sd = read_results(des)
    
# read j
r = 0
bias = 'down'
mag = 0.06
des = f"3D_setting_ratio{r}_bias{bias}_mag{mag}"
Atj_mean, Qtj_mean, Atj_sd, Qtj_sd = read_results(des)
    

# Plot
des = f"3D_ratio{r}_mag{mag}"
plot_save_fairness(Ati_mean, Atj_mean, Qti_mean, Qtj_mean, des, True)
plot_save_fairness_err(Ati_mean, Atj_mean, Qti_mean, Qtj_mean, Ati_sd, Qti_sd, Atj_sd, Qtj_sd, des, True)

Non-strategic setting

Group $i$

In [None]:
# ratio = 0.1
np.random.seed(42)
r = 0.1
bias = 'up'
mag = 0.06
des = f"3D_nonstrat_ratio{r}_bias{bias}_mag{mag}"
At, Qt, At_sd, Qt_sd = simulation(Q,N,n,T,alpha,bias,mag,tp,r,sd=True,strategic=False,kde_x=kde_a,clf=clf)
plot_save_single(At, Qt, des, save=True)
plot_save_single_err(At, Qt, At_sd, Qt_sd, des, save=True,small=True)

In [None]:
# ratio = 0.05
np.random.seed(42)
r = 0.05
bias = 'up'
mag = 0.06
des = f"3D_nonstrat_ratio{r}_bias{bias}_mag{mag}"
At, Qt, At_sd, Qt_sd = simulation(Q,N,n,T,alpha,bias,mag,tp,r,sd=True,strategic=False,kde_x=kde_a,clf=clf)
plot_save_single(At, Qt, des, save=True)
plot_save_single_err(At, Qt, At_sd, Qt_sd, des, save=True,small=True)

In [None]:
# ratio = 0
np.random.seed(42)
r = 0
bias = 'up'
mag = 0.06
des = f"3D_nonstrat_ratio{r}_bias{bias}_mag{mag}"
At, Qt, At_sd, Qt_sd = simulation(Q,N,n,T,alpha,bias,mag,tp,r,sd=True,strategic=False,kde_x=kde_a,clf=clf)
plot_save_single(At, Qt, des, save=True)
plot_save_single_err(At, Qt, At_sd, Qt_sd, des, save=True,small=True)

Group $j$

In [None]:
# ratio = 0.1
np.random.seed(42)
r = 0.1
bias = 'down'
mag = 0.06
des = f"3D_nonstrat_ratio{r}_bias{bias}_mag{mag}"
At, Qt, At_sd, Qt_sd = simulation(Q,N,n,T,alpha,bias,mag,tp,r,sd=True,strategic=False,kde_x=kde_b,clf=clf)
plot_save_single(At, Qt, des, save=True)
plot_save_single_err(At, Qt, At_sd, Qt_sd, des, save=True,small=True)

In [None]:
# ratio = 0.05
np.random.seed(42)
r = 0.05
bias = 'down'
mag = 0.06
des = f"3D_nonstrat_ratio{r}_bias{bias}_mag{mag}"
At, Qt, At_sd, Qt_sd = simulation(Q,N,n,T,alpha,bias,mag,tp,r,sd=True,strategic=False,kde_x=kde_b,clf=clf)
plot_save_single(At, Qt, des, save=True)
plot_save_single_err(At, Qt, At_sd, Qt_sd, des, save=True,small=True)

In [None]:
# ratio = 0
np.random.seed(42)
r = 0
bias = 'down'
mag = 0.06
des = f"3D_nonstrat_ratio{r}_bias{bias}_mag{mag}"
At, Qt, At_sd, Qt_sd = simulation(Q,N,n,T,alpha,bias,mag,tp,r,sd=True,strategic=False,kde_x=kde_b,clf=clf)
plot_save_single(At, Qt, des, save=True)
plot_save_single_err(At, Qt, At_sd, Qt_sd, des, save=True,small=True)

Noisy setting

Group $i$

In [None]:
# ratio = 0.1
np.random.seed(42)
r = 0.1
bias = 'up'
mag = 0.06
des = f"3D_noisy_setting_ratio{r}_bias{bias}_mag{mag}"
At, Qt, At_sd, Qt_sd = simulation(Q,N,n,T,alpha,bias,mag,tp,r,sd=True,noise=0.1,kde_x=kde_a,clf=clf)
plot_save_single(At, Qt, des, save=True)
plot_save_single_err(At, Qt, At_sd, Qt_sd, des, save=True,small=True)

In [None]:
# ratio = 0.05
np.random.seed(42)
r = 0.05
bias = 'up'
mag = 0.06
des = f"3D_noisy_setting_ratio{r}_bias{bias}_mag{mag}"
At, Qt, At_sd, Qt_sd = simulation(Q,N,n,T,alpha,bias,mag,tp,r,sd=True,noise=0.1,kde_x=kde_a,clf=clf)
plot_save_single(At, Qt, des, save=True)
plot_save_single_err(At, Qt, At_sd, Qt_sd, des, save=True,small=True)

In [None]:
# ratio = 0
np.random.seed(42)
r = 0
bias = 'up'
mag = 0.06
des = f"3D_noisy_setting_ratio{r}_bias{bias}_mag{mag}"
At, Qt, At_sd, Qt_sd = simulation(Q,N,n,T,alpha,bias,mag,tp,r,sd=True,noise=0.1,kde_x=kde_a,clf=clf)
plot_save_single(At, Qt, des, save=True)
plot_save_single_err(At, Qt, At_sd, Qt_sd, des, save=True,small=True)

Group $j$

In [None]:
# ratio = 0.1
np.random.seed(42)
r = 0.1
bias = 'down'
mag = 0.06
des = f"3D_noisy_setting_ratio{r}_bias{bias}_mag{mag}"
At, Qt, At_sd, Qt_sd = simulation(Q,N,n,T,alpha,bias,mag,tp,r,sd=True,noise=0.1,kde_x=kde_a,clf=clf)
plot_save_single(At, Qt, des, save=True)
plot_save_single_err(At, Qt, At_sd, Qt_sd, des, save=True,small=True)

In [None]:
# ratio = 0.05
np.random.seed(42)
r = 0.05
bias = 'down'
mag = 0.06
des = f"3D_noisy_setting_ratio{r}_bias{bias}_mag{mag}"
At, Qt, At_sd, Qt_sd = simulation(Q,N,n,T,alpha,bias,mag,tp,r,sd=True,noise=0.1,kde_x=kde_a,clf=clf)
plot_save_single(At, Qt, des, save=True)
plot_save_single_err(At, Qt, At_sd, Qt_sd, des, save=True,small=True)

In [None]:
# ratio = 0
np.random.seed(42)
r = 0
bias = 'down'
mag = 0.06
des = f"3D_noisy_setting_ratio{r}_bias{bias}_mag{mag}"
At, Qt, At_sd, Qt_sd = simulation(Q,N,n,T,alpha,bias,mag,tp,r,sd=True,noise=0.1,kde_x=kde_a,clf=clf)
plot_save_single(At, Qt, des, save=True)
plot_save_single_err(At, Qt, At_sd, Qt_sd, des, save=True,small=True)