In [None]:
import sys
import os
import re
import tqdm
import time
import sklearn
import numpy as np
import pandas as pd
import scipy
import copy
import random
import math
import json
import torch
import torch.nn as nn
import torch.nn.functional as F
from load_dataset import load, generate_random_dataset
from classifier import *
from utils import *
from metrics import *  # include fairness and corresponding derivatives
from scipy import stats
from scipy.stats import rankdata
from sklearn import metrics, preprocessing
from sklearn.model_selection import train_test_split
from operator import itemgetter
from torch.autograd import grad
import torch.nn as nn
import matplotlib.pyplot as plt
from matplotlib.ticker import FormatStrFormatter
from IPython.display import Markdown, display
random.seed(1)
np.random.seed(1)
torch.manual_seed(1)

In [None]:
# ignore all the warnings
import warnings
warnings.filterwarnings('ignore') 

In [None]:
val_seeds = [42, 99, 67, 2, 23]
val_seed = val_seeds[0]

seed = 1
c = 0.003
epoch_num = 1000
th = 0.03

In [None]:
dataset = 'adult'
sens_attr = 'gender'
# subarea = 'Calcasieu'
# subarea = 'Orleans'
scenario = 1

In [None]:
A = 'y'
A_val = 1
# A = None
# A_val = None
fair_metric = 0 if (A is None) else 1
assert (A=='y' and A_val==1) or (A==None)

In [None]:
clf_name = 'LogisticRegression'

## Preparation

**Load Dataset**

In [None]:
if dataset == 'hmda':
    if scenario>=7:
        X_train, X_test, y_train, y_test = load(dataset, subarea=subarea)
    else:
        epoch_num = 300
        X_train, X_test, y_train, y_test = load(dataset)
else:
    X_train, X_test, y_train, y_test = load(dataset)

**Parametric Model**

In [None]:
X_train_orig = copy.deepcopy(X_train)
X_test_orig = copy.deepcopy(X_test)

# Scale data: regularization penalty default: ‘l2’, ‘lbfgs’ solvers support only l2 penalties. 
# Regularization makes the predictor dependent on the scale of the features.
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [None]:
X_train.shape[1], len(X_train_orig)+len(X_test_orig)

**Loss function** (Log loss for logistic regression)

In [None]:
# clf = NeuralNetwork(input_size=X_train.shape[-1])
clf = LogisticRegression(input_size=X_train.shape[-1])
# clf = SVM(input_size=X_train.shape[-1])
num_params = len(convert_grad_to_ndarray(list(clf.parameters())))
if isinstance(clf, LogisticRegression):
    loss_func = logistic_loss_torch
elif isinstance(clf, SVM):
    loss_func = svm_loss_torch
elif isinstance(clf, NeuralNetwork):
    loss_func = nn_loss_torch

In [None]:
clf.__class__.__name__

**Metrics: Initial state**

In [None]:
clf = LogisticRegression(input_size=X_train.shape[-1], epoch_num=1000, c=0.003)

clf.fit(X_train, y_train)

y_pred_test = clf.predict_proba(X_test)
y_pred_train = clf.predict_proba(X_train)

spd_0 = computeFairness(y_pred_test, X_test_orig, y_test, 0, dataset)
print("Initial statistical parity: ", spd_0)

tpr_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 1, dataset)
print("Initial TPR parity: ", tpr_parity_0)

predictive_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 2, dataset)
print("Initial predictive parity: ", predictive_parity_0)

loss_0 = logistic_loss(y_test, y_pred_test)
print("Initial loss: ", loss_0)

accuracy_0 = computeAccuracy(y_test, y_pred_test)
print("Initial accuracy: ", accuracy_0)

In [None]:
X_train_orig.columns

Correlation Ranking

In [None]:
for col in X_test_orig.columns:
    print(f'corr. between {col} and label: {np.round(np.corrcoef(X_test_orig[col], y_test)[0][1], 2)}')

In [None]:
for col in X_test_orig.columns:
    print(f'corr. between {col} and {sens_attr}: {np.round(np.corrcoef(X_test_orig[col], X_test_orig[sens_attr])[0][1], 2)}')

In [None]:
clf.lr.weight

### Definition

In [None]:
def train_clf(model, info_dict):
    val_iter = 10
    epoch = model.epoch_num
    model.epoch_num = val_iter
    val_num = epoch // val_iter
    min_loss = 2048
    best_model = None
    for _ in range(val_num):
        model.fit_info(info_dict)
        loss = model.compute_loss(info_dict)
        if loss < min_loss:
            torch.save(model.state_dict(), 'best_params/best-test.pth')
            min_loss = loss
    model.load_state_dict(torch.load('best_params/best-test.pth'))
    model.adjust_threshold(info_dict)
    model.eval()
    return model

In [None]:
def get_A_idx(x, y, A=None, A_val=None):
    if A is None:
        return x.index
    elif A == 'y':
        return y[y==A_val].index
    elif A in x.columns:
        return x[x[A]==A_val].index
    else:
        raise NotImplementedError

In [None]:
def get_attr(x, y, attr):
    if attr=='y':
        return y
    else:
        return x[attr]

In [None]:
def record_statistics(clf, record=True):
    y_pred_train = clf.predict_proba(X_train_sampled)
    metric_val = computeFairness(clf.predict_proba(X_train_sampled), X_train_orig_sampled,
                                 y_train_sampled, fair_metric, dataset)
    if record:
        BFs.append(-metric_val)
    print("BF: ", -metric_val)
    y_pred_train = clf.predict_proba(X_test)
    metric_val = computeFairness(clf.predict_proba(X_test), X_test_orig, y_test,
                            fair_metric, dataset)
    if record:
        AFs.append(-metric_val)
    print("AF: ", -metric_val)
    
    test_acc = computeAccuracy(y_test, clf.predict_proba(X_test))
    test_f1 = computeF1(y_test, clf.predict_proba(X_test))
    print("Test Acc: ", test_acc)
    print("Test F1: ", test_f1)
    if record:
        accs.append(test_acc)
        f1s.append(test_f1)

    # Group the training data based on parent (sensitive attr=0)
    protected_groups = []
    for v in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_1).unique()):
        for u in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_2).unique()):
            for w in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_3).unique()):
                idx = X_train_orig_sampled[(get_attr(X_train_orig_sampled, y_train_sampled, parent_1)==v) &\
                                           (get_attr(X_train_orig_sampled, y_train_sampled, parent_2)==u) &\
                                           (get_attr(X_train_orig_sampled, y_train_sampled, parent_3)==w) &\
                                           (get_attr(X_train_orig_sampled, y_train_sampled, sens_attr)==0)].index
                idx = idx.intersection(get_A_idx(X_train_orig_sampled, y_train_sampled, A, A_val))
                if len(idx)>0:
                    protected_groups.append(idx)

    score_protected_groups = []
    for idx in protected_groups:
        score_protected_groups.append(np.mean(clf.predict_proba(X_train_sampled)[idx]))

    # Group the training data based on parent (sensitive attr=1)
    privileged_groups = []
    for v in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_1).unique()):
        for u in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_2).unique()):
            for w in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_3).unique()):
                idx = X_train_orig_sampled[(get_attr(X_train_orig_sampled, y_train_sampled, parent_1)==v) &\
                                           (get_attr(X_train_orig_sampled, y_train_sampled, parent_2)==u) &\
                                           (get_attr(X_train_orig_sampled, y_train_sampled, parent_3)==w) &\
                                           (get_attr(X_train_orig_sampled, y_train_sampled, sens_attr)==1)].index
                idx = idx.intersection(get_A_idx(X_train_orig_sampled, y_train_sampled, A, A_val))
                if len(idx)>0:
                    privileged_groups.append(idx)

    score_privileged_groups = []
    for idx in privileged_groups:
        score_privileged_groups.append(np.mean(clf.predict_proba(X_train_sampled)[idx]))

    ub = np.max(score_privileged_groups)-np.min(score_protected_groups)
    lb = -np.max(score_protected_groups)+np.min(score_privileged_groups)

    print(f'Upper Bound: {ub}')
    print(f'Lower Bound: {lb}')

    if record:
        ubs.append(ub)
        lbs.append(lb)

## Inject Selection Bias

### HDMA scenario 1

In [None]:
indicator = ((dataset=='hmda') and (scenario==1))

**Sample based on the conditional probability**

In [None]:
if indicator:
    p = np.ones_like(X_train_orig.race).astype(float)
    p[(X_train_orig.race == 1) & (X_train_orig.DI == 1)] = 0.3
    p[(X_train_orig.race == 0) & (X_train_orig.DI == 2)] = 0.1
    p[(X_train_orig.race == 0) & (X_train_orig.DI == 3)] = 0.2
    plt.hist(p, bins=10)
    plt.show()

In [None]:
if indicator:
    np.random.seed(0)
    sample_bool = np.zeros(len(X_train))
    for idx in range(len(X_train)):
        sample_bool[idx] = np.random.binomial(n=1, p=p[idx])  # True or False

In [None]:
if indicator:
    X_train_orig_sampled = X_train_orig.loc[np.where(sample_bool)[0]]
    X_train_orig_sampled = X_train_orig_sampled.reset_index(drop=True)
    X_train_sampled = X_train[np.where(sample_bool)[0]]
    y_train_sampled = y_train[np.where(sample_bool)[0]].reset_index(drop=True)

In [None]:
if indicator:
    clf = LogisticRegression(input_size=X_train.shape[-1], epoch_num=1000, c=0.003)

    clf.fit(X_train_sampled, y_train_sampled.to_numpy(), use_sklearn=False)

    y_pred_test = clf.predict_proba(X_test)
    y_pred_train = clf.predict_proba(X_train)

    spd_0 = computeFairness(y_pred_test, X_test_orig, y_test, 0, dataset)
    print("Initial statistical parity: ", spd_0)

    tpr_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 1, dataset)
    print("Initial TPR parity: ", tpr_parity_0)

    predictive_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 2, dataset)
    print("Initial predictive parity: ", predictive_parity_0)

    loss_0 = logistic_loss(y_test, y_pred_test)
    print("Initial loss: ", loss_0)

    accuracy_0 = computeAccuracy(y_test, y_pred_test)
    print("Initial accuracy: ", accuracy_0)

In [None]:
if indicator:
    y_pred_train = clf.predict_proba(X_train_sampled)

    spd_0 = computeFairness(y_pred_train, X_train_orig_sampled, y_train_sampled, 0, dataset)
    print("Initial statistical parity: ", spd_0)

    accuracy_0 = computeAccuracy(y_train_sampled, y_pred_train)
    print("Initial accuracy: ", accuracy_0)

In [None]:
if indicator:
    sens_attr = 'race'
    parent_1 = 'race'
    parent_2 = 'race'
    parent_3 = 'DI'

**Bound**

In [None]:
if indicator:
    record_statistics(clf, record=False)

### HDMA scenario 2

In [None]:
indicator = ((dataset=='hmda') and (scenario==2))

**Sample based on the conditional probability**

In [None]:
if indicator:
    p = np.ones_like(X_train_orig.race).astype(float)
    p[(X_train_orig.income_brackets == 1) & (X_train_orig.DI == 1)] = 0.1
    p[(X_train_orig.income_brackets == 1) & (X_train_orig.DI == 2)] = 0.1
    p[(X_train_orig.income_brackets == 0) & (X_train_orig.DI == 3)] = 0.2
    plt.hist(p, bins=10)
    plt.show()

In [None]:
if indicator:
    np.random.seed(0)
    sample_bool = np.zeros(len(X_train))
    for idx in range(len(X_train)):
        sample_bool[idx] = np.random.binomial(n=1, p=p[idx])  # True or False

In [None]:
if indicator:
    X_train_orig_sampled = X_train_orig.loc[np.where(sample_bool)[0]]
    X_train_orig_sampled = X_train_orig_sampled.reset_index(drop=True)
    X_train_sampled = X_train[np.where(sample_bool)[0]]
    y_train_sampled = y_train[np.where(sample_bool)[0]].reset_index(drop=True)

In [None]:
if indicator:
    clf = LogisticRegression(input_size=X_train.shape[-1], epoch_num=3000, c=0.003)

    clf.fit(X_train_sampled, y_train_sampled.to_numpy(), use_sklearn=False)

    y_pred_test = clf.predict_proba(X_test)
    y_pred_train = clf.predict_proba(X_train)

    spd_0 = computeFairness(y_pred_test, X_test_orig, y_test, 0, dataset)
    print("Initial statistical parity: ", spd_0)

    tpr_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 1, dataset)
    print("Initial TPR parity: ", tpr_parity_0)

    predictive_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 2, dataset)
    print("Initial predictive parity: ", predictive_parity_0)

    loss_0 = logistic_loss(y_test, y_pred_test)
    print("Initial loss: ", loss_0)

    accuracy_0 = computeAccuracy(y_test, y_pred_test)
    print("Initial accuracy: ", accuracy_0)

In [None]:
if indicator:
    y_pred_train = clf.predict_proba(X_train_sampled)

    spd_0 = computeFairness(y_pred_train, X_train_orig_sampled, y_train_sampled, 0, dataset)
    print("Initial statistical parity: ", spd_0)

    accuracy_0 = computeAccuracy(y_train_sampled, y_pred_train)
    print("Initial accuracy: ", accuracy_0)

In [None]:
if indicator:
    sens_attr = 'race'
    parent_1 = 'income_brackets'
    parent_2 = 'income_brackets'
    parent_3 = 'DI'

**Bound**

In [None]:
if indicator:
    record_statistics(clf, record=False)

### HDMA scenario 3

In [None]:
indicator = ((dataset=='hmda') and (scenario==3))

**Sample based on the conditional probability**

In [None]:
if indicator:
    p = np.ones_like(X_train_orig.race).astype(float)
    p[(y_train == 1) & (X_train_orig.LV == 1)] = 0.05
    p[(y_train == 0) & (X_train_orig.LV == 1)] = 0.2
    plt.hist(p, bins=10)
    plt.show()

In [None]:
if indicator:
    np.random.seed(0)
    sample_bool = np.zeros(len(X_train))
    for idx in range(len(X_train)):
        sample_bool[idx] = np.random.binomial(n=1, p=p[idx])  # True or False

In [None]:
if indicator:
    X_train_orig_sampled = X_train_orig.loc[np.where(sample_bool)[0]]
    X_train_orig_sampled = X_train_orig_sampled.reset_index(drop=True)
    X_train_sampled = X_train[np.where(sample_bool)[0]]
    y_train_sampled = y_train[np.where(sample_bool)[0]].reset_index(drop=True)

In [None]:
if indicator:
    clf = LogisticRegression(input_size=X_train.shape[-1], epoch_num=3000, c=0.003)

    clf.fit(X_train_sampled, y_train_sampled.to_numpy(), use_sklearn=False)

    y_pred_test = clf.predict_proba(X_test)
    y_pred_train = clf.predict_proba(X_train)

    spd_0 = computeFairness(y_pred_test, X_test_orig, y_test, 0, dataset)
    print("Initial statistical parity: ", spd_0)

    tpr_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 1, dataset)
    print("Initial TPR parity: ", tpr_parity_0)

    predictive_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 2, dataset)
    print("Initial predictive parity: ", predictive_parity_0)

    loss_0 = logistic_loss(y_test, y_pred_test)
    print("Initial loss: ", loss_0)

    accuracy_0 = computeAccuracy(y_test, y_pred_test)
    print("Initial accuracy: ", accuracy_0)

In [None]:
if indicator:
    y_pred_train = clf.predict_proba(X_train_sampled)

    spd_0 = computeFairness(y_pred_train, X_train_orig_sampled, y_train_sampled, 0, dataset)
    print("Initial statistical parity: ", spd_0)

    accuracy_0 = computeAccuracy(y_train_sampled, y_pred_train)
    print("Initial accuracy: ", accuracy_0)

In [None]:
if indicator:
    sens_attr = 'race'
    parent_1 = 'LV'
    parent_2 = 'LV'
    parent_3 = 'y'

**Bound**

In [None]:
if indicator:
    record_statistics(clf, record=False)

### HDMA scenario 4

In [None]:
indicator = ((dataset=='hmda') and (scenario==4))

**Sample based on the conditional probability**

In [None]:
if indicator:
    p = np.ones_like(X_train_orig.race).astype(float)
    p[(X_train_orig.race==0)&(X_train_orig.DI==1)] = 0.2
    p[(X_train_orig.race==0)&(X_train_orig.DI==2)] = 0.3
    plt.hist(p, bins=10)
    plt.show()

In [None]:
if indicator:
    np.random.seed(0)
    sample_bool = np.zeros(len(X_train))
    for idx in range(len(X_train)):
        sample_bool[idx] = np.random.binomial(n=1, p=p[idx])  # True or False

In [None]:
if indicator:
    X_train_orig_sampled = X_train_orig.loc[np.where(sample_bool)[0]]
    X_train_orig_sampled = X_train_orig_sampled.reset_index(drop=True)
    X_train_sampled = X_train[np.where(sample_bool)[0]]
    y_train_sampled = y_train[np.where(sample_bool)[0]].reset_index(drop=True)

In [None]:
if indicator:
    clf = LogisticRegression(input_size=X_train.shape[-1], epoch_num=3000, c=0.003)

    clf.fit(X_train_sampled, y_train_sampled.to_numpy(), use_sklearn=False)

    y_pred_test = clf.predict_proba(X_test)
    y_pred_train = clf.predict_proba(X_train)

    spd_0 = computeFairness(y_pred_test, X_test_orig, y_test, 0, dataset)
    print("Initial statistical parity: ", spd_0)

    tpr_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 1, dataset)
    print("Initial TPR parity: ", tpr_parity_0)

    predictive_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 2, dataset)
    print("Initial predictive parity: ", predictive_parity_0)

    loss_0 = logistic_loss(y_test, y_pred_test)
    print("Initial loss: ", loss_0)

    accuracy_0 = computeAccuracy(y_test, y_pred_test)
    print("Initial accuracy: ", accuracy_0)

In [None]:
if indicator:
    y_pred_train = clf.predict_proba(X_train_sampled)

    spd_0 = computeFairness(y_pred_train, X_train_orig_sampled, y_train_sampled, 0, dataset)
    print("Initial statistical parity: ", spd_0)

    accuracy_0 = computeAccuracy(y_train_sampled, y_pred_train)
    print("Initial accuracy: ", accuracy_0)

In [None]:
if indicator:
    sens_attr = 'race'
    parent_1 = 'race'
    parent_2 = 'race'
    parent_3 = 'DI'

**Bound**

In [None]:
if indicator:
    record_statistics(clf, record=False)

### HDMA scenario 5

In [None]:
indicator = ((dataset=='hmda') and (scenario==5))

**Sample based on the conditional probability**

In [None]:
if indicator:
    p = np.ones_like(X_train_orig.race).astype(float)
    p[(X_train_orig.income_brackets == 1) & (X_train_orig.DI == 1)] = 0.1
    p[(X_train_orig.income_brackets == 0) & (X_train_orig.DI == 2)] = 0.1
    p[(X_train_orig.income_brackets == 1) & (X_train_orig.DI == 3)] = 0.2
    plt.hist(p, bins=10)
    plt.show()

In [None]:
if indicator:
    np.random.seed(0)
    sample_bool = np.zeros(len(X_train))
    for idx in range(len(X_train)):
        sample_bool[idx] = np.random.binomial(n=1, p=p[idx])  # True or False

In [None]:
if indicator:
    X_train_orig_sampled = X_train_orig.loc[np.where(sample_bool)[0]]
    X_train_orig_sampled = X_train_orig_sampled.reset_index(drop=True)
    X_train_sampled = X_train[np.where(sample_bool)[0]]
    y_train_sampled = y_train[np.where(sample_bool)[0]].reset_index(drop=True)

In [None]:
if indicator:
    clf = LogisticRegression(input_size=X_train.shape[-1], epoch_num=3000, c=0.003)

    clf.fit(X_train_sampled, y_train_sampled.to_numpy(), use_sklearn=False)

    y_pred_test = clf.predict_proba(X_test)
    y_pred_train = clf.predict_proba(X_train)

    spd_0 = computeFairness(y_pred_test, X_test_orig, y_test, 0, dataset)
    print("Initial statistical parity: ", spd_0)

    tpr_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 1, dataset)
    print("Initial TPR parity: ", tpr_parity_0)

    predictive_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 2, dataset)
    print("Initial predictive parity: ", predictive_parity_0)

    loss_0 = logistic_loss(y_test, y_pred_test)
    print("Initial loss: ", loss_0)

    accuracy_0 = computeAccuracy(y_test, y_pred_test)
    print("Initial accuracy: ", accuracy_0)

In [None]:
if indicator:
    y_pred_train = clf.predict_proba(X_train_sampled)

    spd_0 = computeFairness(y_pred_train, X_train_orig_sampled, y_train_sampled, 0, dataset)
    print("Initial statistical parity: ", spd_0)

    accuracy_0 = computeAccuracy(y_train_sampled, y_pred_train)
    print("Initial accuracy: ", accuracy_0)

In [None]:
if indicator:
    sens_attr = 'race'
    parent_1 = 'income_brackets'
    parent_2 = 'income_brackets'
    parent_3 = 'DI'

**Bound**

In [None]:
if indicator:
    record_statistics(clf, record=False)

### HDMA scenario 6

In [None]:
indicator = ((dataset=='hmda') and (scenario==6))

**Sample based on the conditional probability**

In [None]:
if indicator:
    p = np.ones_like(X_train_orig.race).astype(float)
    p[(y_train == 1) & (X_train_orig.DI == 1)] = 0.5
    p[(y_train == 1) & (X_train_orig.DI == 2)] = 0.5
    p[(y_train == 0) & (X_train_orig.DI == 3)] = 0.2
    plt.hist(p, bins=10)
    plt.show()

In [None]:
if indicator:
    np.random.seed(0)
    sample_bool = np.zeros(len(X_train))
    for idx in range(len(X_train)):
        sample_bool[idx] = np.random.binomial(n=1, p=p[idx])  # True or False

In [None]:
if indicator:
    X_train_orig_sampled = X_train_orig.loc[np.where(sample_bool)[0]]
    X_train_orig_sampled = X_train_orig_sampled.reset_index(drop=True)
    X_train_sampled = X_train[np.where(sample_bool)[0]]
    y_train_sampled = y_train[np.where(sample_bool)[0]].reset_index(drop=True)

In [None]:
if indicator:
    clf = LogisticRegression(input_size=X_train.shape[-1], epoch_num=3000, c=0.003)

    clf.fit(X_train_sampled, y_train_sampled.to_numpy(), use_sklearn=False)

    y_pred_test = clf.predict_proba(X_test)
    y_pred_train = clf.predict_proba(X_train)

    spd_0 = computeFairness(y_pred_test, X_test_orig, y_test, 0, dataset)
    print("Initial statistical parity: ", spd_0)

    tpr_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 1, dataset)
    print("Initial TPR parity: ", tpr_parity_0)

    predictive_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 2, dataset)
    print("Initial predictive parity: ", predictive_parity_0)

    loss_0 = logistic_loss(y_test, y_pred_test)
    print("Initial loss: ", loss_0)

    accuracy_0 = computeAccuracy(y_test, y_pred_test)
    print("Initial accuracy: ", accuracy_0)

In [None]:
if indicator:
    y_pred_train = clf.predict_proba(X_train_sampled)

    spd_0 = computeFairness(y_pred_train, X_train_orig_sampled, y_train_sampled, 0, dataset)
    print("Initial statistical parity: ", spd_0)

    accuracy_0 = computeAccuracy(y_train_sampled, y_pred_train)
    print("Initial accuracy: ", accuracy_0)

In [None]:
if indicator:
    sens_attr = 'race'
    parent_1 = 'DI'
    parent_2 = 'DI'
    parent_3 = 'y'

**Bound**

In [None]:
if indicator:
    record_statistics(clf, record=False)

### Adult scenario 1

In [None]:
indicator = ((dataset=='adult') and (scenario==1))

**Pr(C=1|married,female)=1, Pr(C=1|not_married,high)=1, Pr(C=1|married,male)=0.11, Pr(C=1|not_married,low)=1**

In [None]:
# generate Pr(C=1|gender, income) for each training data point
if indicator:
    p = np.where(np.logical_and(X_train_orig.relationship, X_train_orig.gender), 0.11, 1.0)
    plt.hist(p)
    plt.show()

**Sample based on the conditional probability**

In [None]:
if indicator:
    np.random.seed(0)
    sample_bool = np.zeros(len(X_train))
    for idx in range(len(X_train)):
        sample_bool[idx] = np.random.binomial(n=1, p=p[idx])  # True or False

In [None]:
if indicator:
    X_train_orig_sampled = X_train_orig.loc[np.where(sample_bool)[0]]
    X_train_orig_sampled = X_train_orig_sampled.reset_index(drop=True)
    X_train_sampled = X_train[np.where(sample_bool)[0]]
    y_train_sampled = y_train[np.where(sample_bool)[0]].reset_index(drop=True)

In [None]:
if indicator:
    clf = LogisticRegression(input_size=X_train.shape[-1], epoch_num=3000, c=0.003)

    clf.fit(X_train_sampled, y_train_sampled.to_numpy(), use_sklearn=False)

    y_pred_test = clf.predict_proba(X_test)
    y_pred_train = clf.predict_proba(X_train)

    spd_0 = computeFairness(y_pred_test, X_test_orig, y_test, 0, dataset)
    print("Initial statistical parity: ", spd_0)

    tpr_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 1, dataset)
    print("Initial TPR parity: ", tpr_parity_0)

    predictive_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 2, dataset)
    print("Initial predictive parity: ", predictive_parity_0)

    loss_0 = logistic_loss(y_test, y_pred_test)
    print("Initial loss: ", loss_0)

    accuracy_0 = computeAccuracy(y_test, y_pred_test)
    print("Initial accuracy: ", accuracy_0)

In [None]:
if indicator:
    y_pred_train = clf.predict_proba(X_train_sampled)

    spd_0 = computeFairness(y_pred_train, X_train_orig_sampled, y_train_sampled, 0, dataset)
    print("Initial statistical parity: ", spd_0)

    accuracy_0 = computeAccuracy(y_train_sampled, y_pred_train)
    print("Initial accuracy: ", accuracy_0)

In [None]:
if indicator:
    sens_attr = 'gender'
    parent_1 = 'relationship'
    parent_2 = 'gender'
    parent_3 = 'gender'

**Bound**

In [None]:
if indicator:
    record_statistics(clf, record=False)

### Adult scenario 2


In [None]:
indicator = ((dataset=='adult') and (scenario==2))

In [None]:
if indicator:
    p = np.ones_like(X_train_orig.gender).astype(float)
    p[(X_train_orig.relationship==1)&(X_train_orig.education==0)] = 0.2
    p[(X_train_orig.relationship==1)&(X_train_orig.education==1)] = 0.4
    p[(X_train_orig.relationship==1)&(X_train_orig.education==2)] = 0.6
    p[(X_train_orig.relationship==0)&(X_train_orig.education==1)] = 0.6
    p[(X_train_orig.relationship==0)&(X_train_orig.education==2)] = 0.4
    p[(X_train_orig.relationship==0)&(X_train_orig.education==3)] = 0.2
    plt.hist(p, bins=10)
    plt.show()

**Sample based on the conditional probability**

In [None]:
if indicator:
    np.random.seed(0)
    sample_bool = np.zeros(len(X_train))
    for idx in range(len(X_train)):
        sample_bool[idx] = np.random.binomial(n=1, p=p[idx])  # True or False

In [None]:
if indicator:
    X_train_orig_sampled = X_train_orig.loc[np.where(sample_bool)[0]]
    X_train_orig_sampled = X_train_orig_sampled.reset_index(drop=True)
    X_train_sampled = X_train[np.where(sample_bool)[0]]
    y_train_sampled = y_train[np.where(sample_bool)[0]].reset_index(drop=True)

In [None]:
if indicator:
    clf = LogisticRegression(input_size=X_train.shape[-1], epoch_num=3000, c=0.003)

    clf.fit(X_train_sampled, y_train_sampled.to_numpy(), use_sklearn=False)

    y_pred_test = clf.predict_proba(X_test)
    y_pred_train = clf.predict_proba(X_train)

    spd_0 = computeFairness(y_pred_test, X_test_orig, y_test, 0, dataset)
    print("Initial statistical parity: ", spd_0)

    tpr_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 1, dataset)
    print("Initial TPR parity: ", tpr_parity_0)

    predictive_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 2, dataset)
    print("Initial predictive parity: ", predictive_parity_0)

    loss_0 = logistic_loss(y_test, y_pred_test)
    print("Initial loss: ", loss_0)

    accuracy_0 = computeAccuracy(y_test, y_pred_test)
    print("Initial accuracy: ", accuracy_0)

In [None]:
if indicator:
    y_pred_train = clf.predict_proba(X_train_sampled)

    spd_0 = computeFairness(y_pred_train, X_train_orig_sampled, y_train_sampled, 0, dataset)
    print("Initial statistical parity: ", spd_0)

    accuracy_0 = computeAccuracy(y_train_sampled, y_pred_train)
    print("Initial accuracy: ", accuracy_0)

In [None]:
if indicator:
    sens_attr = 'gender'
    parent_1 = 'relationship'
    parent_2 = 'relationship'
    parent_3 = 'education'

**Bound**

In [None]:
if indicator:
    record_statistics(clf, record=False)

### Adult scenario 3

In [None]:
indicator = ((dataset=='adult') and (scenario==3))

In [None]:
if indicator:
    p = np.ones_like(X_train_orig.gender).astype(float)
    p[(X_train_orig.relationship==1)&(y_train==0)] = 0.2
    p[(X_train_orig.relationship==0)&(y_train==1)] = 0.2
    plt.hist(p, bins=10)
    plt.show()

**Sample based on the conditional probability**

In [None]:
if indicator:
    np.random.seed(0)
    sample_bool = np.zeros(len(X_train))
    for idx in range(len(X_train)):
        sample_bool[idx] = np.random.binomial(n=1, p=p[idx])  # True or False

In [None]:
if indicator:
    X_train_orig_sampled = X_train_orig.loc[np.where(sample_bool)[0]]
    X_train_orig_sampled = X_train_orig_sampled.reset_index(drop=True)
    X_train_sampled = X_train[np.where(sample_bool)[0]]
    y_train_sampled = y_train[np.where(sample_bool)[0]].reset_index(drop=True)

In [None]:
if indicator:
    clf = LogisticRegression(input_size=X_train.shape[-1], epoch_num=3000, c=0.003)

    clf.fit(X_train_sampled, y_train_sampled.to_numpy(), use_sklearn=False)

    y_pred_test = clf.predict_proba(X_test)
    y_pred_train = clf.predict_proba(X_train)

    spd_0 = computeFairness(y_pred_test, X_test_orig, y_test, 0, dataset)
    print("Initial statistical parity: ", spd_0)

    tpr_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 1, dataset)
    print("Initial TPR parity: ", tpr_parity_0)

    predictive_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 2, dataset)
    print("Initial predictive parity: ", predictive_parity_0)

    loss_0 = logistic_loss(y_test, y_pred_test)
    print("Initial loss: ", loss_0)

    accuracy_0 = computeAccuracy(y_test, y_pred_test)
    print("Initial accuracy: ", accuracy_0)

In [None]:
if indicator:
    y_pred_train = clf.predict_proba(X_train_sampled)

    spd_0 = computeFairness(y_pred_train, X_train_orig_sampled, y_train_sampled, 0, dataset)
    print("Initial statistical parity: ", spd_0)

    accuracy_0 = computeAccuracy(y_train_sampled, y_pred_train)
    print("Initial accuracy: ", accuracy_0)

In [None]:
if indicator:
    sens_attr = 'gender'
    parent_1 = 'relationship'
    parent_2 = 'relationship'
    parent_3 = 'y'

**Bound**

In [None]:
if indicator:
    record_statistics(clf, record=False)

### Adult scenario 4

In [None]:
indicator = ((dataset=='adult') and (scenario==4))

In [None]:
if indicator:
    p = np.ones_like(X_train_orig.gender).astype(float)
    p[(X_train_orig.relationship==0)&(X_train_orig.gender==1)] = 0.5
    plt.hist(p)
    plt.show()

**Sample based on the conditional probability**

In [None]:
if indicator:
    np.random.seed(0)
    sample_bool = np.zeros(len(X_train))
    for idx in range(len(X_train)):
        sample_bool[idx] = np.random.binomial(n=1, p=p[idx])  # True or False

In [None]:
if indicator:
    X_train_orig_sampled = X_train_orig.loc[np.where(sample_bool)[0]]
    X_train_orig_sampled = X_train_orig_sampled.reset_index(drop=True)
    X_train_sampled = X_train[np.where(sample_bool)[0]]
    y_train_sampled = y_train[np.where(sample_bool)[0]].reset_index(drop=True)

In [None]:
if indicator:
    clf = LogisticRegression(input_size=X_train.shape[-1], epoch_num=3000, c=0.003)

    clf.fit(X_train_sampled, y_train_sampled.to_numpy(), use_sklearn=False)

    y_pred_test = clf.predict_proba(X_test)
    y_pred_train = clf.predict_proba(X_train)

    spd_0 = computeFairness(y_pred_test, X_test_orig, y_test, 0, dataset)
    print("Initial statistical parity: ", spd_0)

    tpr_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 1, dataset)
    print("Initial TPR parity: ", tpr_parity_0)

    predictive_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 2, dataset)
    print("Initial predictive parity: ", predictive_parity_0)

    loss_0 = logistic_loss(y_test, y_pred_test)
    print("Initial loss: ", loss_0)

    accuracy_0 = computeAccuracy(y_test, y_pred_test)
    print("Initial accuracy: ", accuracy_0)

In [None]:
if indicator:
    y_pred_train = clf.predict_proba(X_train_sampled)

    spd_0 = computeFairness(y_pred_train, X_train_orig_sampled, y_train_sampled, 0, dataset)
    print("Initial statistical parity: ", spd_0)

    accuracy_0 = computeAccuracy(y_train_sampled, y_pred_train)
    print("Initial accuracy: ", accuracy_0)

In [None]:
if indicator:
    sens_attr = 'gender'
    parent_1 = 'gender'
    parent_2 = 'gender'
    parent_3 = 'relationship'

**Bound**

In [None]:
if indicator:
    record_statistics(clf, record=False)

### Adult scenario 5

In [None]:
indicator = ((dataset=='adult') and (scenario==5))

In [None]:
if indicator:
    p = np.ones_like(X_train_orig.gender).astype(float)
    p[(X_train_orig.relationship==0)&(X_train_orig.hours==0)] = 0.154/0.658
    p[(X_train_orig.relationship==1)&(X_train_orig.hours==1)] = 0.240/1.52
    plt.hist(p, bins=10)
    plt.show()

**Sample based on the conditional probability**

In [None]:
if indicator:
    np.random.seed(0)
    sample_bool = np.zeros(len(X_train))
    for idx in range(len(X_train)):
        sample_bool[idx] = np.random.binomial(n=1, p=p[idx])  # True or False

In [None]:
if indicator:
    X_train_orig_sampled = X_train_orig.loc[np.where(sample_bool)[0]]
    X_train_orig_sampled = X_train_orig_sampled.reset_index(drop=True)
    X_train_sampled = X_train[np.where(sample_bool)[0]]
    y_train_sampled = y_train[np.where(sample_bool)[0]].reset_index(drop=True)

In [None]:
if indicator:
    clf = LogisticRegression(input_size=X_train.shape[-1], epoch_num=3000, c=0.003)

    clf.fit(X_train_sampled, y_train_sampled.to_numpy(), use_sklearn=False)

    y_pred_test = clf.predict_proba(X_test)
    y_pred_train = clf.predict_proba(X_train)

    spd_0 = computeFairness(y_pred_test, X_test_orig, y_test, 0, dataset)
    print("Initial statistical parity: ", spd_0)

    tpr_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 1, dataset)
    print("Initial TPR parity: ", tpr_parity_0)

    predictive_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 2, dataset)
    print("Initial predictive parity: ", predictive_parity_0)

    loss_0 = logistic_loss(y_test, y_pred_test)
    print("Initial loss: ", loss_0)

    accuracy_0 = computeAccuracy(y_test, y_pred_test)
    print("Initial accuracy: ", accuracy_0)

In [None]:
if indicator:
    y_pred_train = clf.predict_proba(X_train_sampled)

    spd_0 = computeFairness(y_pred_train, X_train_orig_sampled, y_train_sampled, 0, dataset)
    print("Initial statistical parity: ", spd_0)

    accuracy_0 = computeAccuracy(y_train_sampled, y_pred_train)
    print("Initial accuracy: ", accuracy_0)

In [None]:
if indicator:
    sens_attr = 'gender'
    parent_1 = 'relationship'
    parent_2 = 'relationship'
    parent_3 = 'hours'

**Bound**

In [None]:
if indicator:
    record_statistics(clf, record=False)

### Adult scenario 6

In [None]:
indicator = ((dataset=='adult') and (scenario==6))

In [None]:
if indicator:
    p = np.ones_like(X_train_orig.gender).astype(float)
    p[(X_train_orig.relationship==1)&(y_train==1)] = 0.7
    p[(X_train_orig.relationship==0)&(y_train==0)] = 0.6
    plt.hist(p, bins=10)
    plt.show()

**Sample based on the conditional probability**

In [None]:
if indicator:
    np.random.seed(0)
    sample_bool = np.zeros(len(X_train))
    for idx in range(len(X_train)):
        sample_bool[idx] = np.random.binomial(n=1, p=p[idx])  # True or False

In [None]:
if indicator:
    X_train_orig_sampled = X_train_orig.loc[np.where(sample_bool)[0]]
    X_train_orig_sampled = X_train_orig_sampled.reset_index(drop=True)
    X_train_sampled = X_train[np.where(sample_bool)[0]]
    y_train_sampled = y_train[np.where(sample_bool)[0]].reset_index(drop=True)

In [None]:
if indicator:
    clf = LogisticRegression(input_size=X_train.shape[-1], epoch_num=3000, c=0.003)

    clf.fit(X_train_sampled, y_train_sampled.to_numpy(), use_sklearn=False)

    y_pred_test = clf.predict_proba(X_test)
    y_pred_train = clf.predict_proba(X_train)

    spd_0 = computeFairness(y_pred_test, X_test_orig, y_test, 0, dataset)
    print("Initial statistical parity: ", spd_0)

    tpr_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 1, dataset)
    print("Initial TPR parity: ", tpr_parity_0)

    predictive_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 2, dataset)
    print("Initial predictive parity: ", predictive_parity_0)

    loss_0 = logistic_loss(y_test, y_pred_test)
    print("Initial loss: ", loss_0)

    accuracy_0 = computeAccuracy(y_test, y_pred_test)
    print("Initial accuracy: ", accuracy_0)

In [None]:
if indicator:
    y_pred_train = clf.predict_proba(X_train_sampled)

    spd_0 = computeFairness(y_pred_train, X_train_orig_sampled, y_train_sampled, 0, dataset)
    print("Initial statistical parity: ", spd_0)

    accuracy_0 = computeAccuracy(y_train_sampled, y_pred_train)
    print("Initial accuracy: ", accuracy_0)

In [None]:
if indicator:
    sens_attr = 'gender'
    parent_1 = 'relationship'
    parent_2 = 'relationship'
    parent_3 = 'y'

**Bound**

In [None]:
if indicator:
    record_statistics(clf, record=False)

### Law scenario 1

In [None]:
indicator = ((dataset=='law') and (scenario==1))

In [None]:
if indicator:
    plt.figure(figsize=(4, 4))
    plt.ylabel('Num. of points')
    numerators = []
    denominators = []
    ratios = []
    for u in range(2):
        for v in range(2):
            numerator = len(X_train_orig[(X_train_orig.racetxt==1)&(X_train_orig.decile3==u)&(X_train_orig.lsat==v)].index)
            denominator = len(X_train_orig[(X_train_orig.racetxt==0)&(X_train_orig.decile3==u)&(X_train_orig.lsat==v)].index)
            numerators.append(numerator)
            denominators.append(denominator)
            ratio = numerator/denominator
            ratios.append(ratio)

    plt.bar(range(4), numerators, alpha=0.5, label='white')
    plt.bar(range(4), denominators, alpha=0.5, label='non-white')
    plt.xticks(range(4), [round(r, 2) for r in ratios])
    plt.legend()
    plt.xlabel('decile3+lsat (divided by race)')

In [None]:
# generate Pr(C=1|race, decile3, lsat) for each training data point
if indicator:
    p = np.ones_like(X_train_orig.racetxt).astype(float)
    p[(X_train_orig.racetxt==0)&(X_train_orig.decile3==0)] = 0.2
    p[(X_train_orig.racetxt==1)&(X_train_orig.decile3==1)] = 0.5
    plt.hist(p, bins=10)
    plt.show()

**Sample based on the conditional probability**

In [None]:
if indicator:
    np.random.seed(0)
    sample_bool = np.zeros(len(X_train))
    for idx in range(len(X_train)):
        sample_bool[idx] = np.random.binomial(n=1, p=p[idx])  # True or False

In [None]:
if indicator:
    X_train_orig_sampled = X_train_orig.loc[np.where(sample_bool)[0]]
    X_train_orig_sampled = X_train_orig_sampled.reset_index(drop=True)
    X_train_sampled = X_train[np.where(sample_bool)[0]]
    y_train_sampled = y_train[np.where(sample_bool)[0]].reset_index(drop=True)

In [None]:
if indicator:
    clf = LogisticRegression(input_size=X_train.shape[-1], epoch_num=3000, c=0.003)

    clf.fit(X_train_sampled, y_train_sampled)

    y_pred_test = clf.predict_proba(X_test)
    y_pred_train = clf.predict_proba(X_train)

    spd_0 = computeFairness(y_pred_test, X_test_orig, y_test, 0, dataset)
    print("Initial statistical parity: ", spd_0)

    tpr_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 1, dataset)
    print("Initial TPR parity: ", tpr_parity_0)

    predictive_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 2, dataset)
    print("Initial predictive parity: ", predictive_parity_0)

    loss_0 = logistic_loss(y_test, y_pred_test)
    print("Initial loss: ", loss_0)

    accuracy_0 = computeAccuracy(y_test, y_pred_test)
    print("Initial accuracy: ", accuracy_0)

In [None]:
if indicator:
    y_pred_train = clf.predict_proba(X_train_sampled)

    spd_0 = computeFairness(y_pred_train, X_train_orig_sampled, y_train_sampled, 0, dataset)
    print("Initial statistical parity: ", spd_0)

    accuracy_0 = computeAccuracy(y_train_sampled, y_pred_train)
    print("Initial accuracy: ", accuracy_0)

In [None]:
if indicator:
    sens_attr = 'racetxt'
    parent_1 = 'decile3'
    parent_2 = 'decile3'
    parent_3 = 'racetxt'

**Bound**

In [None]:
if indicator:
    record_statistics(clf, record=False)

### Law scenario 2

In [None]:
indicator = ((dataset=='law') and (scenario==2))

In [None]:
if indicator:
    p = np.ones_like(X_train_orig.racetxt).astype(float)
    p[(X_train_orig.lsat==1)&(X_train_orig.decile3==0)] = 0.1
    p[(X_train_orig.lsat==0)&(X_train_orig.decile3==1)] = 0.3
    plt.hist(p, bins=10)
    plt.show()

**Sample based on the conditional probability**

In [None]:
if indicator:
    np.random.seed(0)
    sample_bool = np.zeros(len(X_train))
    for idx in range(len(X_train)):
        sample_bool[idx] = np.random.binomial(n=1, p=p[idx])  # True or False

In [None]:
if indicator:
    X_train_orig_sampled = X_train_orig.loc[np.where(sample_bool)[0]]
    X_train_orig_sampled = X_train_orig_sampled.reset_index(drop=True)
    X_train_sampled = X_train[np.where(sample_bool)[0]]
    y_train_sampled = y_train[np.where(sample_bool)[0]].reset_index(drop=True)

In [None]:
if indicator:
    clf = LogisticRegression(input_size=X_train.shape[-1], epoch_num=3000, c=0.003)

    clf.fit(X_train_sampled, y_train_sampled)

    y_pred_test = clf.predict_proba(X_test)
    y_pred_train = clf.predict_proba(X_train)

    spd_0 = computeFairness(y_pred_test, X_test_orig, y_test, 0, dataset)
    print("Initial statistical parity: ", spd_0)

    tpr_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 1, dataset)
    print("Initial TPR parity: ", tpr_parity_0)

    predictive_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 2, dataset)
    print("Initial predictive parity: ", predictive_parity_0)

    loss_0 = logistic_loss(y_test, y_pred_test)
    print("Initial loss: ", loss_0)

    accuracy_0 = computeAccuracy(y_test, y_pred_test)
    print("Initial accuracy: ", accuracy_0)

In [None]:
if indicator:
    y_pred_train = clf.predict_proba(X_train_sampled)

    spd_0 = computeFairness(y_pred_train, X_train_orig_sampled, y_train_sampled, 0, dataset)
    print("Initial statistical parity: ", spd_0)

    accuracy_0 = computeAccuracy(y_train_sampled, y_pred_train)
    print("Initial accuracy: ", accuracy_0)

In [None]:
if indicator:
    sens_attr = 'racetxt'
    parent_1 = 'decile3'
    parent_2 = 'decile3'
    parent_3 = 'lsat'

**Bound**

In [None]:
if indicator:
    record_statistics(clf, record=False)

### Law scenario 3

In [None]:
indicator = ((dataset=='law') and (scenario==3))

In [None]:
if indicator:
    p = np.ones_like(X_train_orig.racetxt).astype(float)
    p[(X_train_orig.lsat==1)&(y_train==1)] = 0.1
    p[(X_train_orig.lsat==0)&(y_train==0)] = 0.7
    plt.hist(p, bins=10)
    plt.show()

**Sample based on the conditional probability**

In [None]:
if indicator:
    np.random.seed(0)
    sample_bool = np.zeros(len(X_train))
    for idx in range(len(X_train)):
        sample_bool[idx] = np.random.binomial(n=1, p=p[idx])  # True or False

In [None]:
if indicator:
    X_train_orig_sampled = X_train_orig.loc[np.where(sample_bool)[0]]
    X_train_orig_sampled = X_train_orig_sampled.reset_index(drop=True)
    X_train_sampled = X_train[np.where(sample_bool)[0]]
    y_train_sampled = y_train[np.where(sample_bool)[0]].reset_index(drop=True)

In [None]:
if indicator:
    clf = LogisticRegression(input_size=X_train.shape[-1], epoch_num=3000, c=0.003)

    clf.fit(X_train_sampled, y_train_sampled)

    y_pred_test = clf.predict_proba(X_test)
    y_pred_train = clf.predict_proba(X_train)

    spd_0 = computeFairness(y_pred_test, X_test_orig, y_test, 0, dataset)
    print("Initial statistical parity: ", spd_0)

    tpr_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 1, dataset)
    print("Initial TPR parity: ", tpr_parity_0)

    predictive_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 2, dataset)
    print("Initial predictive parity: ", predictive_parity_0)

    loss_0 = logistic_loss(y_test, y_pred_test)
    print("Initial loss: ", loss_0)

    accuracy_0 = computeAccuracy(y_test, y_pred_test)
    print("Initial accuracy: ", accuracy_0)

In [None]:
if indicator:
    y_pred_train = clf.predict_proba(X_train_sampled)

    spd_0 = computeFairness(y_pred_train, X_train_orig_sampled, y_train_sampled, 0, dataset)
    print("Initial statistical parity: ", spd_0)

    accuracy_0 = computeAccuracy(y_train_sampled, y_pred_train)
    print("Initial accuracy: ", accuracy_0)

In [None]:
if indicator:
    sens_attr = 'racetxt'
    parent_1 = 'lsat'
    parent_2 = 'lsat'
    parent_3 = 'y'

**Bound**

In [None]:
if indicator:
    y_pred_train = clf.predict_proba(X_train_sampled)
    spd_0 = computeFairness(clf.predict_proba(X_train_sampled), X_train_orig_sampled, y_train_sampled, 0, dataset)
    print("BF: ", -spd_0)
    y_pred_train = clf.predict_proba(X_train)
    spd_0 = computeFairness(clf.predict_proba(X_train), X_train_orig, y_train, 0, dataset)
    print("AF: ", -spd_0)

    # Group the training data based on parent (sensitive attr=0)
    protected_groups = []
    for v in np.sort(X_train_orig_sampled[parent_1].unique()):
        for u in np.sort(X_train_orig_sampled[parent_2].unique()):
            idx = X_train_orig_sampled[(X_train_orig_sampled[parent_1]==v) &\
                                       (X_train_orig_sampled[parent_2]==u) &\
                                       (X_train_orig_sampled[sens_attr]==0)].index
            if len(idx)>0:
                protected_groups.append(idx)

    score_protected_groups = []
    for idx in protected_groups:
        score_protected_groups.append(np.mean(clf.predict_proba(X_train_sampled)[idx]))

    # Group the training data based on parent (sensitive attr=1)
    privileged_groups = []
    for v in np.sort(X_train_orig_sampled[parent_1].unique()):
        for u in np.sort(X_train_orig_sampled[parent_2].unique()):
            idx = X_train_orig_sampled[(X_train_orig_sampled[parent_1]==v) &\
                                       (X_train_orig_sampled[parent_2]==u) &\
                                       (X_train_orig_sampled[sens_attr]==1)].index
            if len(idx)>0:
                privileged_groups.append(idx)

    score_privileged_groups = []
    for idx in privileged_groups:
        score_privileged_groups.append(np.mean(clf.predict_proba(X_train_sampled)[idx]))

    print(f'Upper Bound: {np.max(score_privileged_groups)-np.min(score_protected_groups)}')
    print(f'Lower Bound: {-np.max(score_protected_groups)+np.min(score_privileged_groups)}')

### Law scenario 4

In [None]:
indicator = ((dataset=='law') and (scenario==4))

In [None]:
if indicator:
    p = np.ones_like(X_train_orig.racetxt).astype(float)
    p[(X_train_orig.racetxt == 1) & (X_train_orig.decile3 == 0) & (X_train_orig.lsat == 0)] = 0.5
    p[(X_train_orig.racetxt == 0) & (X_train_orig.decile3 == 0) & (X_train_orig.lsat == 1)] = 0.3
    plt.hist(p, bins=10)
    plt.show()

**Sample based on the conditional probability**

In [None]:
if indicator:
    np.random.seed(0)
    sample_bool = np.zeros(len(X_train))
    for idx in range(len(X_train)):
        sample_bool[idx] = np.random.binomial(n=1, p=p[idx])  # True or False

In [None]:
if indicator:
    X_train_orig_sampled = X_train_orig.loc[np.where(sample_bool)[0]]
    X_train_orig_sampled = X_train_orig_sampled.reset_index(drop=True)
    X_train_sampled = X_train[np.where(sample_bool)[0]]
    y_train_sampled = y_train[np.where(sample_bool)[0]].reset_index(drop=True)

In [None]:
if indicator:
    clf = LogisticRegression(input_size=X_train.shape[-1], epoch_num=3000, c=0.003)

    clf.fit(X_train_sampled, y_train_sampled)

    y_pred_test = clf.predict_proba(X_test)
    y_pred_train = clf.predict_proba(X_train)

    spd_0 = computeFairness(y_pred_test, X_test_orig, y_test, 0, dataset)
    print("Initial statistical parity: ", spd_0)

    tpr_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 1, dataset)
    print("Initial TPR parity: ", tpr_parity_0)

    predictive_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 2, dataset)
    print("Initial predictive parity: ", predictive_parity_0)

    loss_0 = logistic_loss(y_test, y_pred_test)
    print("Initial loss: ", loss_0)

    accuracy_0 = computeAccuracy(y_test, y_pred_test)
    print("Initial accuracy: ", accuracy_0)

In [None]:
if indicator:
    y_pred_train = clf.predict_proba(X_train_sampled)

    spd_0 = computeFairness(y_pred_train, X_train_orig_sampled, y_train_sampled, 0, dataset)
    print("Initial statistical parity: ", spd_0)

    accuracy_0 = computeAccuracy(y_train_sampled, y_pred_train)
    print("Initial accuracy: ", accuracy_0)

In [None]:
if indicator:
    sens_attr = 'racetxt'
    parent_1 = 'decile3'
    parent_2 = 'lsat'
    parent_3 = 'racetxt'

**Bound**

In [None]:
if indicator:
    record_statistics(clf, record=False)

### Law scenario 5

In [None]:
indicator = ((dataset=='law') and (scenario==5))

In [None]:
if indicator:
    p = np.ones_like(X_train_orig.racetxt).astype(float)
    p[(X_train_orig.lsat==0)&(X_train_orig.ugpa==0)] = 0.7/5.71
    p[(X_train_orig.lsat==1)&(X_train_orig.ugpa==1)] = 5/11.48
    plt.hist(p, bins=10)
    plt.show()

**Sample based on the conditional probability**

In [None]:
if indicator:
    np.random.seed(0)
    sample_bool = np.zeros(len(X_train))
    for idx in range(len(X_train)):
        sample_bool[idx] = np.random.binomial(n=1, p=p[idx])  # True or False

In [None]:
if indicator:
    X_train_orig_sampled = X_train_orig.loc[np.where(sample_bool)[0]]
    X_train_orig_sampled = X_train_orig_sampled.reset_index(drop=True)
    X_train_sampled = X_train[np.where(sample_bool)[0]]
    y_train_sampled = y_train[np.where(sample_bool)[0]].reset_index(drop=True)

In [None]:
if indicator:
    clf = LogisticRegression(input_size=X_train.shape[-1], epoch_num=3000, c=0.003)

    clf.fit(X_train_sampled, y_train_sampled)

    y_pred_test = clf.predict_proba(X_test)
    y_pred_train = clf.predict_proba(X_train)

    spd_0 = computeFairness(y_pred_test, X_test_orig, y_test, 0, dataset)
    print("Initial statistical parity: ", spd_0)

    tpr_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 1, dataset)
    print("Initial TPR parity: ", tpr_parity_0)

    predictive_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 2, dataset)
    print("Initial predictive parity: ", predictive_parity_0)

    loss_0 = logistic_loss(y_test, y_pred_test)
    print("Initial loss: ", loss_0)

    accuracy_0 = computeAccuracy(y_test, y_pred_test)
    print("Initial accuracy: ", accuracy_0)

In [None]:
if indicator:
    y_pred_train = clf.predict_proba(X_train_sampled)

    spd_0 = computeFairness(y_pred_train, X_train_orig_sampled, y_train_sampled, 0, dataset)
    print("Initial statistical parity: ", spd_0)

    accuracy_0 = computeAccuracy(y_train_sampled, y_pred_train)
    print("Initial accuracy: ", accuracy_0)

In [None]:
if indicator:
    sens_attr = 'racetxt'
    parent_1 = 'ugpa'
    parent_2 = 'ugpa'
    parent_3 = 'lsat'

**Bound**

In [None]:
if indicator:
    record_statistics(clf, record=False)

### Law scenario 6

In [None]:
indicator = ((dataset=='law') and (scenario==6))

In [None]:
if indicator:
    p = np.ones_like(X_train_orig.racetxt).astype(float)
    p[(X_train_orig.lsat==0)&(y_train==1)] = 0.5
    p[(X_train_orig.lsat==1)&(y_train==0)] = 0.7
    plt.hist(p, bins=10)
    plt.show()

**Sample based on the conditional probability**

In [None]:
if indicator:
    np.random.seed(0)
    sample_bool = np.zeros(len(X_train))
    for idx in range(len(X_train)):
        sample_bool[idx] = np.random.binomial(n=1, p=p[idx])  # True or False

In [None]:
if indicator:
    X_train_orig_sampled = X_train_orig.loc[np.where(sample_bool)[0]]
    X_train_orig_sampled = X_train_orig_sampled.reset_index(drop=True)
    X_train_sampled = X_train[np.where(sample_bool)[0]]
    y_train_sampled = y_train[np.where(sample_bool)[0]].reset_index(drop=True)

In [None]:
if indicator:
    clf = LogisticRegression(input_size=X_train.shape[-1], epoch_num=3000, c=0.003)

    clf.fit(X_train_sampled, y_train_sampled)

    y_pred_test = clf.predict_proba(X_test)
    y_pred_train = clf.predict_proba(X_train)

    spd_0 = computeFairness(y_pred_test, X_test_orig, y_test, 0, dataset)
    print("Initial statistical parity: ", spd_0)

    tpr_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 1, dataset)
    print("Initial TPR parity: ", tpr_parity_0)

    predictive_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 2, dataset)
    print("Initial predictive parity: ", predictive_parity_0)

    loss_0 = logistic_loss(y_test, y_pred_test)
    print("Initial loss: ", loss_0)

    accuracy_0 = computeAccuracy(y_test, y_pred_test)
    print("Initial accuracy: ", accuracy_0)

In [None]:
if indicator:
    y_pred_train = clf.predict_proba(X_train_sampled)

    spd_0 = computeFairness(y_pred_train, X_train_orig_sampled, y_train_sampled, 0, dataset)
    print("Initial statistical parity: ", spd_0)

    accuracy_0 = computeAccuracy(y_train_sampled, y_pred_train)
    print("Initial accuracy: ", accuracy_0)

In [None]:
if indicator:
    sens_attr = 'racetxt'
    parent_1 = 'lsat'
    parent_2 = 'lsat'
    parent_3 = 'y'

**Bound**

In [None]:
if indicator:
    record_statistics(clf, record=False)

## Regularizer

In [None]:
BFs = []
AFs = []
ubs = []
lbs = []
accs = []
f1s = []

### Prepare Validation Set

In [None]:
X_train_orig_sampled, X_val_orig_sampled, y_train_sampled, y_val_sampled = \
    train_test_split(X_train_orig_sampled, y_train_sampled, test_size=0.25, random_state=val_seed)

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train_sampled = sc.fit_transform(X_train_orig_sampled)
X_val_sampled = sc.transform(X_val_orig_sampled)
X_train_orig_sampled = X_train_orig_sampled.reset_index(drop=True)
X_val_orig_sampled = X_val_orig_sampled.reset_index(drop=True)
y_train_sampled = y_train_sampled.reset_index(drop=True)
y_val_sampled = y_val_sampled.reset_index(drop=True)
X_test = sc.transform(X_test_orig)
X_train = sc.transform(X_train_orig)

### No Regularizer

In [None]:
epoch_num=300

In [None]:
info_dict = dict()
info_dict['x_train'] = X_train_sampled
info_dict['y_train'] = y_train_sampled.to_numpy()
info_dict['x_val'] = X_val_sampled
info_dict['y_val'] = y_val_sampled.to_numpy()

In [None]:
clf = LogisticRegression(input_size=X_train.shape[-1], epoch_num=epoch_num, c=c)

clf = train_clf(clf, info_dict=info_dict)

y_pred_test = clf.predict_proba(X_test)
y_pred_train = clf.predict_proba(X_train)

spd_0 = computeFairness(y_pred_test, X_test_orig, y_test, 0, dataset)
print("Initial statistical parity: ", spd_0)

tpr_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 1, dataset)
print("Initial TPR parity: ", tpr_parity_0)

predictive_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 2, dataset)
print("Initial predictive parity: ", predictive_parity_0)

loss_0 = logistic_loss(y_test, y_pred_test)
print("Initial loss: ", loss_0)

accuracy_0 = computeAccuracy(y_test, y_pred_test)
print("Initial accuracy: ", accuracy_0)
# accs.append(accuracy_0)

In [None]:
record_statistics(clf)

### Bound Regularizer

In [None]:
info_dict = dict()
info_dict['th'] = th
info_dict['balance'] = 1
info_dict['x_train'] = X_train_sampled
info_dict['y_train'] = y_train_sampled.to_numpy()
info_dict['x_val'] = X_val_sampled
info_dict['y_val'] = y_val_sampled.to_numpy()

protected_groups = []
for v in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_1).unique()):
    for u in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_2).unique()):
        for w in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_3).unique()):
            idx = X_train_orig_sampled[(get_attr(X_train_orig_sampled, y_train_sampled, parent_1)==v) &\
                                       (get_attr(X_train_orig_sampled, y_train_sampled, parent_2)==u) &\
                                       (get_attr(X_train_orig_sampled, y_train_sampled, parent_3)==w) &\
                                       (get_attr(X_train_orig_sampled, y_train_sampled, sens_attr)==0)].index
            idx = idx.intersection(get_A_idx(X_train_orig_sampled, y_train_sampled, A, A_val))
            if len(idx)>0:
                protected_groups.append(idx)

privileged_groups = []
for v in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_1).unique()):
    for u in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_2).unique()):
        for w in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_3).unique()):
            idx = X_train_orig_sampled[(get_attr(X_train_orig_sampled, y_train_sampled, parent_1)==v) &\
                                       (get_attr(X_train_orig_sampled, y_train_sampled, parent_2)==u) &\
                                       (get_attr(X_train_orig_sampled, y_train_sampled, parent_3)==w) &\
                                       (get_attr(X_train_orig_sampled, y_train_sampled, sens_attr)==1)].index
            idx = idx.intersection(get_A_idx(X_train_orig_sampled, y_train_sampled, A, A_val))
            if len(idx)>0:
                privileged_groups.append(idx)

info_dict['train_regularizer'] = [protected_groups, privileged_groups]

protected_groups = []
for v in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_1).unique()):
    for u in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_2).unique()):
        for w in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_3).unique()):
            idx = X_val_orig_sampled[(get_attr(X_val_orig_sampled, y_val_sampled, parent_1)==v) &\
                                       (get_attr(X_val_orig_sampled, y_val_sampled, parent_2)==u) &\
                                       (get_attr(X_val_orig_sampled, y_val_sampled, parent_3)==w) &\
                                       (get_attr(X_val_orig_sampled, y_val_sampled, sens_attr)==0)].index
            idx = idx.intersection(get_A_idx(X_val_orig_sampled, y_val_sampled, A, A_val))
            if len(idx)>0:
                protected_groups.append(idx)

privileged_groups = []
for v in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_1).unique()):
    for u in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_2).unique()):
        for w in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_3).unique()):
            idx = X_val_orig_sampled[(get_attr(X_val_orig_sampled, y_val_sampled, parent_1)==v) &\
                                       (get_attr(X_val_orig_sampled, y_val_sampled, parent_2)==u) &\
                                       (get_attr(X_val_orig_sampled, y_val_sampled, parent_3)==w) &\
                                       (get_attr(X_val_orig_sampled, y_val_sampled, sens_attr)==1)].index
            idx = idx.intersection(get_A_idx(X_val_orig_sampled, y_val_sampled, A, A_val))
            if len(idx)>0:
                privileged_groups.append(idx)
info_dict['val_regularizer'] = [protected_groups, privileged_groups]

In [None]:
clf = LogisticRegression_Reg_Bound(input_size=X_train.shape[-1], epoch_num=100, c=c)

clf = train_clf(clf, info_dict)

y_pred_test = clf.predict_proba(X_test)

spd_0 = computeFairness(y_pred_test, X_test_orig, y_test, 0, dataset)
print("Initial statistical parity: ", spd_0)

tpr_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 1, dataset)
print("Initial TPR parity: ", tpr_parity_0)

predictive_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 2, dataset)
print("Initial predictive parity: ", predictive_parity_0)

loss_0 = logistic_loss(y_test, y_pred_test)
print("Initial loss: ", loss_0)

accuracy_0 = computeAccuracy(y_test, y_pred_test)
print("Initial accuracy: ", accuracy_0)
# accs.append(accuracy_0)

In [None]:
record_statistics(clf)

### Metric Regularizer

In [None]:
info_dict = dict()
info_dict['th'] = 0
info_dict['balance'] = 1
info_dict['x_train'] = X_train_sampled
info_dict['y_train'] = y_train_sampled.to_numpy()
info_dict['x_val'] = X_val_sampled
info_dict['y_val'] = y_val_sampled.to_numpy()

protected_idx = X_train_orig_sampled[X_train_orig_sampled[sens_attr]==0].index.intersection(get_A_idx(X_train_orig_sampled, y_train_sampled, A, A_val))
privileged_idx = X_train_orig_sampled[X_train_orig_sampled[sens_attr]==1].index.intersection(get_A_idx(X_train_orig_sampled, y_train_sampled, A, A_val))
info_dict['train_regularizer'] = [protected_idx, privileged_idx]

protected_idx = X_val_orig_sampled[X_val_orig_sampled[sens_attr]==0].index.intersection(get_A_idx(X_val_orig_sampled, y_val_sampled, A, A_val))
privileged_idx = X_val_orig_sampled[X_val_orig_sampled[sens_attr]==1].index.intersection(get_A_idx(X_val_orig_sampled, y_val_sampled, A, A_val))
info_dict['val_regularizer'] = [protected_idx, privileged_idx]

In [None]:
clf = LogisticRegression_Reg_Metric(input_size=X_train.shape[-1], epoch_num=epoch_num, c=c)

clf = train_clf(clf, info_dict)

y_pred_test = clf.predict_proba(X_test)
y_pred_train = clf.predict_proba(X_train)

spd_0 = computeFairness(y_pred_test, X_test_orig, y_test, 0, dataset)
print("Initial statistical parity: ", spd_0)

tpr_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 1, dataset)
print("Initial TPR parity: ", tpr_parity_0)

predictive_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 2, dataset)
print("Initial predictive parity: ", predictive_parity_0)

loss_0 = logistic_loss(y_test, y_pred_test)
print("Initial loss: ", loss_0)

accuracy_0 = computeAccuracy(y_test, y_pred_test)
print("Initial accuracy: ", accuracy_0)

In [None]:
record_statistics(clf)

### Weighted Metric Regularizer

In [None]:
info_dict = dict()
info_dict['th'] = 0
info_dict['balance'] = 6
info_dict['x_train'] = X_train_sampled
info_dict['y_train'] = y_train_sampled.to_numpy()
info_dict['x_val'] = X_val_sampled
info_dict['y_val'] = y_val_sampled.to_numpy()

protected_groups = []
for v in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_1).unique()):
    for u in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_2).unique()):
        for w in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_3).unique()):
            idx = X_train_orig_sampled[(get_attr(X_train_orig_sampled, y_train_sampled, parent_1)==v) &\
                                       (get_attr(X_train_orig_sampled, y_train_sampled, parent_2)==u) &\
                                       (get_attr(X_train_orig_sampled, y_train_sampled, parent_3)==w) &\
                                       (get_attr(X_train_orig_sampled, y_train_sampled, sens_attr)==0)].index
            idx = idx.intersection(get_A_idx(X_train_orig_sampled, y_train_sampled, A, A_val))
            if len(idx)>0:
                protected_groups.append(idx)

privileged_groups = []
for v in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_1).unique()):
    for u in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_2).unique()):
        for w in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_3).unique()):
            idx = X_train_orig_sampled[(get_attr(X_train_orig_sampled, y_train_sampled, parent_1)==v) &\
                                       (get_attr(X_train_orig_sampled, y_train_sampled, parent_2)==u) &\
                                       (get_attr(X_train_orig_sampled, y_train_sampled, parent_3)==w) &\
                                       (get_attr(X_train_orig_sampled, y_train_sampled, sens_attr)==1)].index
            idx = idx.intersection(get_A_idx(X_train_orig_sampled, y_train_sampled, A, A_val))
            if len(idx)>0:
                privileged_groups.append(idx)

info_dict['train_regularizer'] = [protected_groups, privileged_groups]

protected_groups = []
for v in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_1).unique()):
    for u in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_2).unique()):
        for w in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_3).unique()):
            idx = X_val_orig_sampled[(get_attr(X_val_orig_sampled, y_val_sampled, parent_1)==v) &\
                                       (get_attr(X_val_orig_sampled, y_val_sampled, parent_2)==u) &\
                                       (get_attr(X_val_orig_sampled, y_val_sampled, parent_3)==w) &\
                                       (get_attr(X_val_orig_sampled, y_val_sampled, sens_attr)==0)].index
            idx = idx.intersection(get_A_idx(X_val_orig_sampled, y_val_sampled, A, A_val))
            if len(idx)>0:
                protected_groups.append(idx)

privileged_groups = []
for v in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_1).unique()):
    for u in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_2).unique()):
        for w in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_3).unique()):
            idx = X_val_orig_sampled[(get_attr(X_val_orig_sampled, y_val_sampled, parent_1)==v) &\
                                       (get_attr(X_val_orig_sampled, y_val_sampled, parent_2)==u) &\
                                       (get_attr(X_val_orig_sampled, y_val_sampled, parent_3)==w) &\
                                       (get_attr(X_val_orig_sampled, y_val_sampled, sens_attr)==1)].index
            idx = idx.intersection(get_A_idx(X_val_orig_sampled, y_val_sampled, A, A_val))
            if len(idx)>0:
                privileged_groups.append(idx)
info_dict['val_regularizer'] = [protected_groups, privileged_groups]

weights = [[], []]
for s in range(2):
    s_idx = X_train_orig[X_train_orig[sens_attr]==s].index.intersection(get_A_idx(X_train_orig, y_train, A, A_val))
    for v in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_1).unique()):
        for u in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_2).unique()):
            for w in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_3).unique()):
                idx = X_train_orig[(get_attr(X_train_orig, y_train, parent_1)==v) &\
                                   (get_attr(X_train_orig, y_train, parent_2)==u) &\
                                   (get_attr(X_train_orig, y_train, parent_3)==w) &\
                                   (get_attr(X_train_orig, y_train, sens_attr)==s)].index
                idx = idx.intersection(get_A_idx(X_train_orig, y_train, A, A_val))
                if len(idx)>0:
                    weights[s].append(len(idx)/len(s_idx))

# weights = [[0.636514, 0.363486], [0.589085, 0.410914]]
# weights = [[0.589058, 0.410942], [0.634008, 0.3659912]]
info_dict['weights'] = weights


In [None]:
clf = LogisticRegression_Reg_WeightedMetric(input_size=X_train.shape[-1], epoch_num=100, c=c)

clf = train_clf(clf, info_dict)

y_pred_test = clf.predict_proba(X_test)
y_pred_train = clf.predict_proba(X_train)

spd_0 = computeFairness(y_pred_test, X_test_orig, y_test, 0, dataset)
print("Initial statistical parity: ", spd_0)

tpr_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 1, dataset)
print("Initial TPR parity: ", tpr_parity_0)

predictive_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 2, dataset)
print("Initial predictive parity: ", predictive_parity_0)

loss_0 = logistic_loss(y_test, y_pred_test)
print("Initial loss: ", loss_0)

accuracy_0 = computeAccuracy(y_test, y_pred_test)
print("Initial accuracy: ", accuracy_0)
# accs.append(accuracy_0)

In [None]:
record_statistics(clf)

### Est. Weighted Metric Regularizer (limited info)

In [None]:
info_dict = dict()
extl_info = dict()
info_dict['th'] = 0.01
info_dict['balance'] = 2
info_dict['x_train'] = X_train_sampled
info_dict['y_train'] = y_train_sampled.to_numpy()
info_dict['x_val'] = X_val_sampled
info_dict['y_val'] = y_val_sampled.to_numpy()

protected_groups = []
for v in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_1).unique()):
    for u in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_2).unique()):
        for w in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_3).unique()):
            idx = X_train_orig_sampled[(get_attr(X_train_orig_sampled, y_train_sampled, parent_1)==v) &\
                                       (get_attr(X_train_orig_sampled, y_train_sampled, parent_2)==u) &\
                                       (get_attr(X_train_orig_sampled, y_train_sampled, parent_3)==w) &\
                                       (get_attr(X_train_orig_sampled, y_train_sampled, sens_attr)==0)].index
            idx = idx.intersection(get_A_idx(X_train_orig_sampled, y_train_sampled, A, A_val))
            if len(idx)>0:
                protected_groups.append(idx)

privileged_groups = []
for v in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_1).unique()):
    for u in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_2).unique()):
        for w in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_3).unique()):
            idx = X_train_orig_sampled[(get_attr(X_train_orig_sampled, y_train_sampled, parent_1)==v) &\
                                       (get_attr(X_train_orig_sampled, y_train_sampled, parent_2)==u) &\
                                       (get_attr(X_train_orig_sampled, y_train_sampled, parent_3)==w) &\
                                       (get_attr(X_train_orig_sampled, y_train_sampled, sens_attr)==1)].index
            idx = idx.intersection(get_A_idx(X_train_orig_sampled, y_train_sampled, A, A_val))
            if len(idx)>0:
                privileged_groups.append(idx)

info_dict['train_regularizer'] = [protected_groups, privileged_groups]

protected_groups = []
for v in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_1).unique()):
    for u in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_2).unique()):
        for w in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_3).unique()):
            idx = X_val_orig_sampled[(get_attr(X_val_orig_sampled, y_val_sampled, parent_1)==v) &\
                                       (get_attr(X_val_orig_sampled, y_val_sampled, parent_2)==u) &\
                                       (get_attr(X_val_orig_sampled, y_val_sampled, parent_3)==w) &\
                                       (get_attr(X_val_orig_sampled, y_val_sampled, sens_attr)==0)].index
            idx = idx.intersection(get_A_idx(X_val_orig_sampled, y_val_sampled, A, A_val))
            if len(idx)>0:
                protected_groups.append(idx)

privileged_groups = []
for v in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_1).unique()):
    for u in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_2).unique()):
        for w in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_3).unique()):
            idx = X_val_orig_sampled[(get_attr(X_val_orig_sampled, y_val_sampled, parent_1)==v) &\
                                       (get_attr(X_val_orig_sampled, y_val_sampled, parent_2)==u) &\
                                       (get_attr(X_val_orig_sampled, y_val_sampled, parent_3)==w) &\
                                       (get_attr(X_val_orig_sampled, y_val_sampled, sens_attr)==1)].index
            idx = idx.intersection(get_A_idx(X_val_orig_sampled, y_val_sampled, A, A_val))
            if len(idx)>0:
                privileged_groups.append(idx)
info_dict['val_regularizer'] = [protected_groups, privileged_groups]

extl_info['data'] = X_train
idxs = [[], []]
for s in range(2):
    for v in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_1).unique()):
        for u in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_2).unique()):
            for w in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_3).unique()):
                idx = X_train_orig_sampled[(get_attr(X_train_orig_sampled, y_train_sampled, parent_1)==v) &\
                                           (get_attr(X_train_orig_sampled, y_train_sampled, parent_2)==u) &\
                                           (get_attr(X_train_orig_sampled, y_train_sampled, parent_3)==w) &\
                                           (get_attr(X_train_orig_sampled, y_train_sampled, sens_attr)==s)].index
                idx_unbiased = X_train_orig[(get_attr(X_train_orig, y_train, parent_1)==v) &\
                                            (get_attr(X_train_orig, y_train, parent_2)==u) &\
                                            (get_attr(X_train_orig, y_train, parent_3)==w) &\
                                            (get_attr(X_train_orig, y_train, sens_attr)==s)].index
                idx_a = idx.intersection(get_A_idx(X_train_orig_sampled, y_train_sampled, A, A_val))
                if len(idx_a)>0:
                    idxs[s].append(len(idx_a)/len(idx)*len(idx_unbiased))

weights = [[], []]
for s in range(2):
    weights[s] = [w/sum(idxs[s]) for w in idxs[s]]
info_dict['weights'] = weights

In [None]:
clf = LogisticRegression_Reg_WeightedMetric(input_size=X_train.shape[-1], epoch_num=epoch_num, c=c)

clf = train_clf(clf, info_dict)

y_pred_test = clf.predict_proba(X_test)
y_pred_train = clf.predict_proba(X_train)

spd_0 = computeFairness(y_pred_test, X_test_orig, y_test, 0, dataset)
print("Initial statistical parity: ", spd_0)

tpr_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 1, dataset)
print("Initial TPR parity: ", tpr_parity_0)

predictive_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 2, dataset)
print("Initial predictive parity: ", predictive_parity_0)

loss_0 = logistic_loss(y_test, y_pred_test)
print("Initial loss: ", loss_0)

accuracy_0 = computeAccuracy(y_test, y_pred_test)
print("Initial accuracy: ", accuracy_0)
# accs.append(accuracy_0)

In [None]:
record_statistics(clf)

### Cov Regularizer

In [None]:
info_dict = dict()
info_dict['th'] = 0
info_dict['fair_metric'] = fair_metric
info_dict['balance'] = 8
info_dict['sens'] = X_train_orig_sampled[sens_attr].to_numpy()
info_dict['x_train'] = X_train_sampled
info_dict['y_train'] = y_train_sampled.to_numpy()
info_dict['x_val'] = X_val_sampled
info_dict['y_val'] = y_val_sampled.to_numpy()

protected_idx = X_train_orig_sampled[X_train_orig_sampled[sens_attr]==0].index.intersection(get_A_idx(X_train_orig_sampled, y_train_sampled, A, A_val))
privileged_idx = X_train_orig_sampled[X_train_orig_sampled[sens_attr]==1].index.intersection(get_A_idx(X_train_orig_sampled, y_train_sampled, A, A_val))
info_dict['train_regularizer'] = [protected_idx, privileged_idx]

protected_idx = X_val_orig_sampled[X_val_orig_sampled[sens_attr]==0].index.intersection(get_A_idx(X_val_orig_sampled, y_val_sampled, A, A_val))
privileged_idx = X_val_orig_sampled[X_val_orig_sampled[sens_attr]==1].index.intersection(get_A_idx(X_val_orig_sampled, y_val_sampled, A, A_val))
info_dict['val_regularizer'] = [protected_idx, privileged_idx]

In [None]:
clf = LogisticRegression_Reg_Cov(input_size=X_train.shape[-1], epoch_num=epoch_num, c=c)

clf = train_clf(clf, info_dict)

y_pred_test = clf.predict_proba(X_test)
y_pred_train = clf.predict_proba(X_train)

spd_0 = computeFairness(y_pred_test, X_test_orig, y_test, 0, dataset)
print("Initial statistical parity: ", spd_0)

tpr_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 1, dataset)
print("Initial TPR parity: ", tpr_parity_0)

predictive_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 2, dataset)
print("Initial predictive parity: ", predictive_parity_0)

loss_0 = logistic_loss(y_test, y_pred_test)
print("Initial loss: ", loss_0)

accuracy_0 = computeAccuracy(y_test, y_pred_test)
print("Initial accuracy: ", accuracy_0)
# accs.append(accuracy_0)

In [None]:
record_statistics(clf)

### Adversarial Debiasing

In [None]:
clf = LogisticRegression_AD(input_size=X_train.shape[-1], epoch_num=epoch_num, c=c,
                            learning_rate=0.01, adv_learning_rate=0.2)
spds, accs_clf, acc_advs = clf.fit(X_train_sampled, y_train_sampled, X_train_orig_sampled[sens_attr],
                               th=0, balance=0.8, loss_balance=8, load=True)

In [None]:
y_pred_test = clf.predict_proba(X_test)
y_pred_train = clf.predict_proba(X_train)

spd_0 = computeFairness(y_pred_test, X_test_orig, y_test, 0, dataset)
print("Initial statistical parity: ", spd_0)

tpr_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 1, dataset)
print("Initial TPR parity: ", tpr_parity_0)

predictive_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 2, dataset)
print("Initial predictive parity: ", predictive_parity_0)

loss_0 = logistic_loss(y_test, y_pred_test)
print("Initial loss: ", loss_0)

accuracy_0 = computeAccuracy(y_test, y_pred_test)
print("Initial accuracy: ", accuracy_0)

In [None]:
record_statistics(clf)

### IPW

In [None]:
positive_data = X_train[y_train==1]
positive_source = sample_bool[y_train==1]

all_data = X_train
all_source = sample_bool

In [None]:
Pr_C_given_X = LogisticRegression(input_size=X_train.shape[-1])
Pr_C_given_X.fit(all_data, all_source.ravel())
print(computeAccuracy(all_source.ravel(), Pr_C_given_X.predict_proba(all_data)))
print(computeF1(all_source.ravel(), Pr_C_given_X.predict_proba(all_data)))

Pr_C_given_YX = LogisticRegression(input_size=positive_data.shape[-1])
Pr_C_given_YX.fit(positive_data, positive_source.ravel())
print(computeAccuracy(positive_source.ravel(), Pr_C_given_YX.predict_proba(positive_data)))
print(computeF1(positive_source.ravel(), Pr_C_given_YX.predict_proba(positive_data)))

In [None]:
training_weights = Pr_C_given_X.predict_proba(X_train_sampled)/Pr_C_given_YX.predict_proba(X_train_sampled)
val_weights = Pr_C_given_X.predict_proba(X_val_sampled)/Pr_C_given_YX.predict_proba(X_val_sampled)

In [None]:
info_dict = dict()
info_dict['x_train'] = X_train_sampled
info_dict['y_train'] = y_train_sampled.to_numpy()
info_dict['x_val'] = X_val_sampled
info_dict['y_val'] = y_val_sampled.to_numpy()
info_dict['clf_numerator'] = Pr_C_given_X
info_dict['clf_denominator'] = Pr_C_given_YX
info_dict['th'] = th
info_dict['balance'] = 8

protected_idx = X_train_orig_sampled[X_train_orig_sampled[sens_attr]==0].index.intersection(get_A_idx(X_train_orig_sampled, y_train_sampled, A, A_val))
privileged_idx = X_train_orig_sampled[X_train_orig_sampled[sens_attr]==1].index.intersection(get_A_idx(X_train_orig_sampled, y_train_sampled, A, A_val))
info_dict['train_regularizer'] = [protected_idx, privileged_idx]

protected_idx = X_val_orig_sampled[X_val_orig_sampled[sens_attr]==0].index.intersection(get_A_idx(X_val_orig_sampled, y_val_sampled, A, A_val))
privileged_idx = X_val_orig_sampled[X_val_orig_sampled[sens_attr]==1].index.intersection(get_A_idx(X_val_orig_sampled, y_val_sampled, A, A_val))
info_dict['val_regularizer'] = [protected_idx, privileged_idx]

In [None]:
clf = LogisticRegression_IPW(input_size=X_train.shape[-1], epoch_num=epoch_num, c=c)
clf = train_clf(clf, info_dict=info_dict)

In [None]:
y_pred_test = clf.predict_proba(X_test)
y_pred_train = clf.predict_proba(X_train)

spd_0 = computeFairness(y_pred_test, X_test_orig, y_test, 0, dataset)
print("Initial statistical parity: ", spd_0)

tpr_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 1, dataset)
print("Initial TPR parity: ", tpr_parity_0)

predictive_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 2, dataset)
print("Initial predictive parity: ", predictive_parity_0)

loss_0 = logistic_loss(y_test, y_pred_test)
print("Initial loss: ", loss_0)

accuracy_0 = computeAccuracy(y_test, y_pred_test)
print("Initial accuracy: ", accuracy_0)
# accs.append(accuracy_0)

In [None]:
record_statistics(clf)

In [None]:
y_pred_test = clf.predict_proba(X_test)
computeFairness(y_pred_test, X_test_orig, y_test, 0, dataset)

In [None]:
plt.hist(training_weights)

In [None]:
len(X_train_sampled)/len(X_train),\
len(training_weights[training_weights>=1])/len(training_weights[training_weights<1])

### Better Bound Regularizer

In [None]:
info_dict = dict()
info_dict['th'] = th
info_dict['balance'] = 2
info_dict['x_train'] = X_train_sampled
info_dict['y_train'] = y_train_sampled.to_numpy()
info_dict['x_val'] = X_val_sampled
info_dict['y_val'] = y_val_sampled.to_numpy()

protected_subgroup_idx = [[] for _ in range(len(get_attr(X_train_orig_sampled, y_train_sampled, parent_1).unique()))]
privileged_subgroup_idx = [[] for _ in range(len(get_attr(X_train_orig_sampled, y_train_sampled, parent_1).unique()))]

protected_groups = []
for v in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_1).unique()):
    for u in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_2).unique()):
        for w in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_3).unique()):
            idx = X_train_orig_sampled[(get_attr(X_train_orig_sampled, y_train_sampled, parent_1)==v) &\
                                       (get_attr(X_train_orig_sampled, y_train_sampled, parent_2)==u) &\
                                       (get_attr(X_train_orig_sampled, y_train_sampled, parent_3)==w) &\
                                       (get_attr(X_train_orig_sampled, y_train_sampled, sens_attr)==0)].index
            idx = idx.intersection(get_A_idx(X_train_orig_sampled, y_train_sampled, A, A_val))
            if len(idx)>0:
                protected_subgroup_idx[v].append(len(protected_groups))
                protected_groups.append(idx)

privileged_groups = []
for v in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_1).unique()):
    for u in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_2).unique()):
        for w in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_3).unique()):
            idx = X_train_orig_sampled[(get_attr(X_train_orig_sampled, y_train_sampled, parent_1)==v) &\
                                       (get_attr(X_train_orig_sampled, y_train_sampled, parent_2)==u) &\
                                       (get_attr(X_train_orig_sampled, y_train_sampled, parent_3)==w) &\
                                       (get_attr(X_train_orig_sampled, y_train_sampled, sens_attr)==1)].index
            idx = idx.intersection(get_A_idx(X_train_orig_sampled, y_train_sampled, A, A_val))
            if len(idx)>0:
                privileged_subgroup_idx[v].append(len(privileged_groups))
                privileged_groups.append(idx)

info_dict['train_subgroup_idx'] = [protected_subgroup_idx, protected_subgroup_idx]
info_dict['train_regularizer'] = [protected_groups, privileged_groups]

protected_subgroup_idx = [[] for _ in range(len(get_attr(X_train_orig_sampled, y_train_sampled, parent_1).unique()))]
privileged_subgroup_idx = [[] for _ in range(len(get_attr(X_train_orig_sampled, y_train_sampled, parent_1).unique()))]

protected_groups = []
for v in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_1).unique()):
    for u in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_2).unique()):
        for w in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_3).unique()):
            idx = X_val_orig_sampled[(get_attr(X_val_orig_sampled, y_val_sampled, parent_1)==v) &\
                                       (get_attr(X_val_orig_sampled, y_val_sampled, parent_2)==u) &\
                                       (get_attr(X_val_orig_sampled, y_val_sampled, parent_3)==w) &\
                                       (get_attr(X_val_orig_sampled, y_val_sampled, sens_attr)==0)].index
            idx = idx.intersection(get_A_idx(X_val_orig_sampled, y_val_sampled, A, A_val))
            if len(idx)>0:
                protected_subgroup_idx[v].append(len(protected_groups))
                protected_groups.append(idx)

privileged_groups = []
for v in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_1).unique()):
    for u in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_2).unique()):
        for w in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_3).unique()):
            idx = X_val_orig_sampled[(get_attr(X_val_orig_sampled, y_val_sampled, parent_1)==v) &\
                                       (get_attr(X_val_orig_sampled, y_val_sampled, parent_2)==u) &\
                                       (get_attr(X_val_orig_sampled, y_val_sampled, parent_3)==w) &\
                                       (get_attr(X_val_orig_sampled, y_val_sampled, sens_attr)==1)].index
            idx = idx.intersection(get_A_idx(X_val_orig_sampled, y_val_sampled, A, A_val))
            if len(idx)>0:
                privileged_subgroup_idx[v].append(len(privileged_groups))
                privileged_groups.append(idx)

info_dict['val_subgroup_idx'] = [protected_subgroup_idx, privileged_subgroup_idx]
info_dict['val_regularizer'] = [protected_groups, privileged_groups]

weights = [[], []]
for s in range(2):
    s_idx = X_train_orig[X_train_orig[sens_attr]==s].index.intersection(get_A_idx(X_train_orig, y_train, A, A_val))
    for v in np.sort(get_attr(X_train_orig_sampled, y_train_sampled, parent_1).unique()):
        idx = X_train_orig[(get_attr(X_train_orig, y_train, parent_1)==v) &\
                           (get_attr(X_train_orig, y_train, sens_attr)==s)].index
        idx = idx.intersection(get_A_idx(X_train_orig, y_train, A, A_val))
        if len(idx)>0:
            weights[s].append(len(idx)/len(s_idx))
info_dict['weights'] = weights


In [None]:
clf = LogisticRegression_Reg_Tighter_Bound(input_size=X_train.shape[-1], epoch_num=epoch_num, c=c)

clf = train_clf(clf, info_dict)

y_pred_test = clf.predict_proba(X_test)

spd_0 = computeFairness(y_pred_test, X_test_orig, y_test, 0, dataset)
print("Initial statistical parity: ", spd_0)

tpr_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 1, dataset)
print("Initial TPR parity: ", tpr_parity_0)

predictive_parity_0 = computeFairness(y_pred_test, X_test_orig, y_test, 2, dataset)
print("Initial predictive parity: ", predictive_parity_0)

loss_0 = logistic_loss(y_test, y_pred_test)
print("Initial loss: ", loss_0)

accuracy_0 = computeAccuracy(y_test, y_pred_test)
print("Initial accuracy: ", accuracy_0)
# accs.append(accuracy_0)

In [None]:
record_statistics(clf)

## Plot Results

In [None]:
AFs, BFs, ubs, lbs, accs

In [None]:
name_mapping = {'Orig':'ORIG(SB)', 'Bound Reg.':'CRAB-MX', 'TightBound Reg.':'CRAB-MU',
                'Adv. Debias':'ZHANG', 'LFR':'ZEMEL', 'Reweighing':'KAMIRAN',
                'Cov. Reg.':'ZAFAR', 'IPW':'CORTES', 'AF Reg.':'CRAB-M$\emptyset$',
                'Est.AF Reg.':'CRAB-MA', 'BF Reg.':'BF', 'Test': 'ORIG(NOSB)'}
plot_alphabet = 'abcdefghijkl'
def get_name(x):
    if x in name_mapping:
        return name_mapping[x]
    else:
        base_name = x.split('-')[0]
        return name_mapping[base_name]
sc_mapping = {1:1, 4:2, 2:4, 5:3, 3:5, 6:6}

In [None]:
def single_scatter(ax, acc_mean, acc_std, spd_mean, spd_std, colors, methods, markers, title,
                   xlabel='Accuracy', ylabel='Statistical Parity', label_size=18, title_offset=-0.28,
                   title_size=16, legend_size=16, ticksize=12, markersize=75, eline_width=2):
#     ax.set_facecolor('#E5E4E2')
    for i in range(len(acc_mean)):
        ax.scatter(acc_mean[i], spd_mean[i], color=colors[i], linewidth=0, zorder=3,
                    edgecolor='black', label=get_name(methods[i]), s=markersize, marker=markers[i])
        ax.errorbar(acc_mean[i], spd_mean[i], xerr=acc_std[i], yerr=spd_std[i], color=colors[i],
                    ms=markersize, fmt='', elinewidth=eline_width, zorder=2)
    ax.set_ylabel(ylabel, fontsize=label_size)
    ax.set_xlabel(xlabel, fontsize=label_size)
    for spine in ax.spines.values():
        spine.set_linewidth(1)
    ax.tick_params(top=False)
    ax.tick_params(right=False)
    ax.tick_params(labeltop=False)
    ax.tick_params(labelright=False)
#     ax.xaxis.set_major_formatter(FormatStrFormatter())

    ax.xaxis.tick_bottom()
    ax.yaxis.tick_left()
    ax.set_axisbelow(True)

    ax.tick_params(colors='white', direction='out')
    for tick in ax.get_xticklabels():
        tick.set_fontsize(ticksize)
        tick.set_color('black')
    for tick in ax.get_yticklabels():
        tick.set_color('black')
        tick.set_fontsize(ticksize)
#     ax.legend(fontsize=legend_size, fancybox=False, framealpha=0.0)
    title = title
    ax.set_title(title, fontsize=title_size, y=title_offset)
    ax.locator_params(axis='y', nbins=5)
    ax.locator_params(axis='x', nbins=4)
    ax.xaxis.set_major_locator(plt.MaxNLocator(3))
#     ax.yaxis.set_major_locator(plt.MaxNLocator(4))
    ax.yaxis.set_major_formatter(FormatStrFormatter('%0.2f'))
    ax.xaxis.set_major_formatter(FormatStrFormatter('%0.3f'))
#     ax.figure.autofmt_xdate()

In [None]:
EO_STR = '$\digamma_{h, Y}(\Omega)$'
SPD_STR = '$\digamma_{h, \emptyset}(\Omega)$'

In [None]:
def plot_line(method, ds, sc, dict_name, ax, ths, label, color, marker, markersize=12, eline_width=2, err=False):
    with open(dict_name, 'r') as f:
        txt = f.read()
        d = json.loads(txt)
    test_afs = dict()
    test_accs = dict()
    methods = [f'{method}-{th}' for th in ths]
    for mt in methods:
        afs[mt] = [abs(d[ds][f'sc{sc}'][str(vs)][mt][0]) for vs in val_seeds if mt in d[ds][f'sc{sc}'][str(vs)]]
        test_accs[mt] = [d[ds][f'sc{sc}'][str(vs)][mt][-2] for vs in val_seeds if mt in d[ds][f'sc{sc}'][str(vs)]]
    af_mean = [np.mean(afs[mt]) for mt in methods]
    acc_mean = [np.mean(test_accs[mt]) for mt in methods]
    af_std = [np.std(afs[mt]) for mt in methods]
    acc_std = [np.std(test_accs[mt]) for mt in methods]
    ax.plot(acc_mean, af_mean, label=get_name(label), color=color, marker=marker, markersize=markersize)
    if err:
        for i in range(len(acc_mean)):
            ax.errorbar(acc_mean[i], af_mean[i], xerr=acc_std[i], yerr=af_std[i], color=color,
                        ms=markersize, fmt='', elinewidth=eline_width)


In [None]:
# Bound
bound_th_dict = {'adult-1': [0.03, 0.15, 0.35, 0.43, 0.5], 'adult-4': [0.01, 0.06, 0.22, 0.3, 0.5],
                 'hmda-1': [0.2, 0.565, 0.69, 0.8, 1.0], 'hmda-4': [0.15, 0.235, 0.3, 0.4, 1.0],
                 'law-1': [0.05, 0.16, 0.24, 0.33, 0.5], 'law-4': [0.02, 0.12, 0.2, 0.35, 0.5]}

# Tight Bound
tightbound_th_dict = {'adult-2': [0.08, 0.13, 0.21, 0.28, 1.0], 'adult-5': [0.05, 0.12, 0.16, 0.23, 1.0],
                      'law-2': [0.05, 0.215, 0.265, 0.33, 1.0], 'law-5': [0.05, 0.22, 0.31, 0.39, 1.0],
                      'hmda-2': [0.01, 0.15, 0.28, 0.3, 1.0], 'hmda-5': [0.03, 0.455, 0.5, 0.52, 1.0]}

# Est
est_th_dict = {'adult-2': [0.01, 0.05, 0.10, 0.15, 0.25], 'adult-3': [0.02, 0.08, 0.16, 0.24, 0.4],
               'adult-5': [0.01, 0.05, 0.09, 0.13, 0.25], 'adult-6': [0.02, 0.08, 0.16, 0.24, 0.4],
               'law-2': [0.02, 0.08, 0.16, 0.24, 0.4], 'law-3': [0.05, 0.01, 0.09, 0.13, 0.25],
               'law-5': [0.02, 0.08, 0.16, 0.24, 0.4], 'law-6': [0.02, 0.08, 0.16, 0.24, 0.4],
               'hmda-2': [0.01, 0.04, 0.07, 0.1, 0.2], 'hmda-3': [0.02, 0.08, 0.16, 0.24, 0.4],
               'hmda-5': [0.01, 0.06, 0.11, 0.16, 0.3], 'hmda-6': [0.01, 0.05, 0.09, 0.13, 0.25]}

In [None]:
cnt = 0
fig, axs = plt.subplots(1, 6, dpi=400, figsize=(21, 4))
methods = ['Orig', 'Adv. Debias', 'LFR', 'Reweighing', 'Cov. Reg.', 'Test']
colors = ['blue', 'orange', 'violet', 'slategrey', 'olive', 'red']
markers = ['o', 'p', 'v', 's', '*', 'X']

for m in ['EO']:
    dict_name = m + '_LR.dict'
    with open(dict_name, 'r') as f:
        txt = f.read()
        d = json.loads(txt)
    for sc in [1, 4]:
        for ds in ['adult', 'law', 'hmda']:
            col_id = (cnt%6)
            row_id = (cnt//6)
            ax = axs[col_id]
            afs = dict()
            test_accs = dict()
            mts = []
            for method in methods:
                mt = method
                mts.append(mt)
                afs[mt] = [abs(d[ds][f'sc{sc}'][str(vs)][mt][0]) for vs in val_seeds if mt in d[ds][f'sc{sc}'][str(vs)]]
                test_accs[mt] = [d[ds][f'sc{sc}'][str(vs)][mt][-2] for vs in val_seeds if mt in d[ds][f'sc{sc}'][str(vs)]]
            af_mean = [np.mean(afs[mt]) for mt in mts]
            acc_mean = [np.mean(test_accs[mt]) for mt in mts]
            af_std = [np.std(afs[mt]) for mt in mts]
            acc_std = [np.std(test_accs[mt]) for mt in mts]
            ylabel = None
            xlabel = None
            ths = bound_th_dict[f'{ds}-{sc}']
            plot_line('Bound Reg.', ds, sc, dict_name, ax, ths, label='Bound Reg.',
                      color='blueviolet', marker='d', markersize=10.95, err=True)
            single_scatter(ax, acc_mean, acc_std, af_mean, af_std, colors, mts, markers,
                           f'({plot_alphabet[cnt]}) {ds.upper()}-G1-S{sc_mapping[sc]}', xlabel, ylabel, label_size=10,
                           title_size=20, legend_size=5, ticksize=16, markersize=120)
            ax.set_ylim(0, 0.35)
            xleft, xright = ax.get_xlim()
            ax.fill_between((xleft, xright), 0.03, 1, color='red', alpha=0.1)
            ax.fill_between((xleft, xright), 0, 0.02999, color='green', alpha=0.1)
            ax.set_xlim(xleft, xright)
            ax.grid(color='grey', linestyle='-', alpha=0.3)
            ax.set_zorder(3)
            cnt += 1

lines, labels = axs[-1].get_legend_handles_labels()
order = [1, 6, 0, 3, 4, 2, 5]
lgd = fig.legend([lines[i] for i in order], [labels[i] for i in order], loc='upper center',
                 fontsize=30, ncol=len(labels), bbox_to_anchor=(0.5, 1.1), fancybox=False, framealpha=0.0,
                 prop={'size': 20})
for i in range(len(lgd.legendHandles)):
    lgd.legendHandles[i]._sizes = [120]
fig.add_subplot(111, frameon=False)
# hide tick and tick label of the big axes
plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False)
plt.grid(False)
# plt.xlabel('\n\nF1-Score', fontsize=20)
plt.ylabel(f'{EO_STR}\n', fontsize=24)
# ax.xaxis.set_label_coords(0.5, -1)

plt.tight_layout()
plt.savefig('bounds-comparison.pdf', bbox_inches='tight')

In [None]:
# double row
cnt = 0
fig, axs = plt.subplots(2, 6, dpi=400, figsize=(21, 7))
# methods = ['Orig', 'Bound Reg.', 'BF Reg.', 'Adv. Debias', 'LFR', 'Reweighing', 'Cov. Reg.']
# colors = ['blue', 'red', 'orange', 'violet', 'green', 'olive', 'purple']
methods = ['Orig', 'IPW', 'Test']
colors = ['blue', 'darkgreen', 'red']
markers = ['o', '^', 'X']

for m in ['EO']:
    dict_name = m + '_LR.dict'
    with open(dict_name, 'r') as f:
        txt = f.read()
        d = json.loads(txt)
    for sc in [5, 2, 3, 6]:
        for ds in ['adult', 'law', 'hmda']:
            col_id = (cnt%6)
            row_id = (cnt//6)
            ax = axs[row_id][col_id]
            afs = dict()
            test_accs = dict()
            mts = []
            for method in methods:
                mt = method
                mts.append(mt)
                afs[mt] = [abs(d[ds][f'sc{sc}'][str(vs)][mt][0]) for vs in val_seeds if mt in d[ds][f'sc{sc}'][str(vs)]]
                test_accs[mt] = [d[ds][f'sc{sc}'][str(vs)][mt][-2] for vs in val_seeds if mt in d[ds][f'sc{sc}'][str(vs)]]
            af_mean = [np.mean(afs[mt]) for mt in mts]
            acc_mean = [np.mean(test_accs[mt]) for mt in mts]
            af_std = [np.std(afs[mt]) for mt in mts]
            acc_std = [np.std(test_accs[mt]) for mt in mts]
            ylabel = None
            xlabel = None
            if sc in [2, 5]:
                ths = tightbound_th_dict[f'{ds}-{sc}']
                plot_line('TightBound Reg.', ds, sc, dict_name, ax, ths,
                          label='TightBound Reg.', color='darkred', marker='p', markersize=8)
            ths = est_th_dict[f'{ds}-{sc}']
            if sc in [2, 5]:
                plot_line('Est.AF Reg.', ds, sc, dict_name, ax, ths, label='Est.AF Reg.',
                          color='violet', marker='o', markersize=10.95)
            plot_line('AF Reg.', ds, sc, dict_name, ax, ths, label='AF Reg.',
                      color='orange', marker='v', markersize=10.95)
            single_scatter(ax, acc_mean, acc_std, af_mean, af_std, colors, methods, markers,
                           f'({plot_alphabet[cnt]}) {ds.upper()}-G{row_id+2}-S{(sc_mapping[sc]-1)%2+1}',
                           xlabel, ylabel, label_size=10,
                           title_size=20, legend_size=5, ticksize=16, markersize=120)
            ax.xaxis.set_major_locator(plt.MaxNLocator(3))
            ax.set_ylim(0, 0.35)
            xleft, xright = ax.get_xlim()
            ax.fill_between((xleft, xright), 0.03, 1, color='red', alpha=0.1)
            ax.fill_between((xleft, xright), 0, 0.02999, color='green', alpha=0.1)
            ax.set_xlim(xleft, xright)
            ax.grid(color='grey', linestyle='-', alpha=0.3)
            ax.set_zorder(3)
            cnt += 1

lines, labels = axs[0][0].get_legend_handles_labels()
order = [3, 5, 2, 1, 0, 4]
lgd = fig.legend([lines[i] for i in order], [labels[i] for i in order], loc='upper center',
                 fontsize=30, ncol=len(labels), bbox_to_anchor=(0.5, 1.06), fancybox=False,
                 framealpha=0.0, prop={'size': 20})
for i in range(len(lgd.legendHandles)):
    lgd.legendHandles[i]._sizes = [120]
fig.add_subplot(111, frameon=False)
# hide tick and tick label of the big axes
plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False)
plt.grid(False)
# plt.xlabel('\n\nF1-Score', fontsize=20)
plt.ylabel(f'{EO_STR}\n', fontsize=24)

plt.tight_layout()
plt.savefig('est-comparison.pdf', bbox_inches='tight')

threshold for bounds


unbiased model (trained on the test set)

In [None]:
# 1*2 plots for SPD
fig, axs = plt.subplots(1, 2, dpi=400, figsize=(8, 4.5))

bound_th_dict = {'adult-1': [0.05, 0.14, 0.3, 0.35, 1.0]}
tightbound_th_dict = {'adult-1': [0.02, 0.08, 0.12, 0.17, 1.0]}
est_th_dict = {'adult-1': [0.01, 0.06, 0.11, 0.16, 1.0]}

ax = axs[0]
methods = ['Orig', 'Adv. Debias', 'LFR', 'Reweighing', 'Cov. Reg.', 'Test']
colors = ['blue', 'orange', 'violet', 'slategrey', 'olive', 'red']
markers = ['o', 'p', 'v', 's', '*', 'X', 'o']

m = 'SPD'
ds, sc = 'adult', 1
cnt = 0
dict_name = m + '_LR.dict'
with open(dict_name, 'r') as f:
    txt = f.read()
    d = json.loads(txt)
    afs = dict()
    test_accs = dict()
    for mt in methods:
        afs[mt] = [abs(d[ds][f'sc{sc}'][str(vs)][mt][0]) for vs in val_seeds]
        test_accs[mt] = [d[ds][f'sc{sc}'][str(vs)][mt][-2] for vs in val_seeds]
    af_mean = [np.mean(afs[mt]) for mt in methods]
    acc_mean = [np.mean(test_accs[mt]) for mt in methods]
    af_std = [np.std(afs[mt]) for mt in methods]
    acc_std = [np.std(test_accs[mt]) for mt in methods]
    single_scatter(ax, acc_mean, acc_std, af_mean, af_std, colors, methods, markers,
                   f'(a) W/O EI', xlabel, ylabel, label_size=10,
                   title_size=20, legend_size=5, ticksize=16, markersize=80, title_offset=-0.25)
    ths = bound_th_dict[f'{ds}-{sc}']
    plot_line('Bound Reg.', ds, sc, dict_name, ax, ths, label='Bound Reg.',
              color='blueviolet', marker='d', markersize=9)
    ax.set_ylim(0, 0.25)
    xleft, xright = ax.get_xlim()
    ax.fill_between((xleft, xright), 0.03, 1, color='red', alpha=0.1)
    ax.fill_between((xleft, xright), 0, 0.02999, color='green', alpha=0.1)
    ax.set_xlim(xleft, xright)
    ax.grid(color='grey', linestyle='-', alpha=0.3)
    ax.set_zorder(3)
    cnt += 1

ax = axs[1]
methods = ['Orig', 'IPW', 'Test']
colors = ['blue', 'darkgreen', 'red']
markers = ['o', '^', 'X']
dict_name = m + '_LR.dict'
cnt = 0
with open(dict_name, 'r') as f:
    txt = f.read()
    d = json.loads(txt)
    afs = dict()
    test_accs = dict()
    for mt in methods:
        afs[mt] = [abs(d[ds][f'sc{sc}'][str(vs)][mt][0]) for vs in val_seeds]
        test_accs[mt] = [d[ds][f'sc{sc}'][str(vs)][mt][-2] for vs in val_seeds]
    af_mean = [np.mean(afs[mt]) for mt in methods]
    acc_mean = [np.mean(test_accs[mt]) for mt in methods]
    af_std = [np.std(afs[mt]) for mt in methods]
    acc_std = [np.std(test_accs[mt]) for mt in methods]

    ths = tightbound_th_dict[f'{ds}-{sc}']
    plot_line('TightBound Reg.', ds, sc, dict_name, ax, ths,
              label='TightBound Reg.', color='darkred', marker='p', markersize=9)

    ths = est_th_dict[f'{ds}-{sc}']
    plot_line('Est.AF Reg.', ds, sc, dict_name, ax, ths, label='Est.AF Reg.',
              color='violet', marker='o', markersize=9)
    plot_line('AF Reg.', ds, sc, dict_name, ax, ths, label='AF Reg.',
              color='orange', marker='v', markersize=9)
    single_scatter(ax, acc_mean, acc_std, af_mean, af_std, colors, methods, markers,
                   f'(b) W/ EI', xlabel, ylabel, label_size=10,
                   title_size=20, legend_size=5, ticksize=16, markersize=80, title_offset=-0.25)
    ax.set_ylim(0, 0.25)
    xleft, xright = ax.get_xlim()
    ax.fill_between((xleft, xright), 0.03, 1, color='red', alpha=0.1)
    ax.fill_between((xleft, xright), 0, 0.02999, color='green', alpha=0.1)
    ax.set_xlim(xleft, xright)
    ax.grid(color='grey', linestyle='-', alpha=0.3)
    ax.set_zorder(3)
    cnt += 1


# lines, labels = axs[0].get_legend_handles_labels()
# order = [0, 6, 4, 1, 2, 3, 5]
# lgd = axs[0].legend([lines[i] for i in order], [labels[i] for i in order], loc='upper left',
#                        fontsize=30, ncol=6, bbox_to_anchor=(0.5, 1.1), fancybox=False, framealpha=0.0,
#                        prop={'size': 12})
# for i in range(len(lgd.legendHandles)):
#     lgd.legendHandles[i]._sizes = [80]

lines0, labels0 = axs[0].get_legend_handles_labels()
lines1, labels1 = axs[1].get_legend_handles_labels()
lines, labels = lines0+lines1, labels0+labels1

order = [0, 5, 6, 2, 3, 1, 4, 9, 8, 7, 11]

# print([labels[i] for i in order])

lgd = fig.legend([lines[i] for i in order], [labels[i] for i in order], loc='center',
                       fontsize=30, ncol=1, bbox_to_anchor=(1.15, 0.55), fancybox=False, framealpha=0.0,
                       prop={'size': 18})
for i in range(len(lgd.legendHandles)):
    lgd.legendHandles[i]._sizes = [80]

ax = fig.add_subplot(111, frameon=False)
# hide tick and tick label of the big axes
plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False)
plt.grid(False)
# plt.xlabel('F1-Score', fontsize=20)
plt.ylabel(SPD_STR, fontsize=24)
ax.xaxis.set_label_coords(0.5, -0.28)
ax.yaxis.set_label_coords(-0.12, 0.5)
plt.tight_layout()
plt.savefig('spd-comparison.pdf', bbox_inches='tight')

In [None]:
# 2*2 plots for SVM/NN
fig, axs = plt.subplots(2, 2, dpi=400, figsize=(8, 8))

bound_th_dict = {'adult-1': [0.05, 0.14, 0.3, 0.35, 1.0]}
tightbound_th_dict = {'adult-1': [0.02, 0.09, 0.15, 0.22, 1.0]}
est_th_dict = {'adult-1': [0.02, 0.08, 0.14, 0.2, 1.0]}

ax = axs[0][0]
methods = ['Orig', 'Adv. Debias', 'LFR', 'Reweighing', 'Cov. Reg.', 'Test']
colors = ['blue', 'orange', 'violet', 'slategrey', 'olive', 'red']
markers = ['o', 'p', 'v', 's', '*', 'X', 'o']

m = 'EO'
ds, sc = 'adult', 1

dict_name = m + '_SVM.dict'
with open(dict_name, 'r') as f:
    txt = f.read()
    d = json.loads(txt)
    afs = dict()
    test_accs = dict()
    for mt in methods:
        afs[mt] = [abs(d[ds][f'sc{sc}'][str(vs)][mt][0]) for vs in val_seeds]
        test_accs[mt] = [d[ds][f'sc{sc}'][str(vs)][mt][-2] for vs in val_seeds]
    af_mean = [np.mean(afs[mt]) for mt in methods]
    acc_mean = [np.mean(test_accs[mt]) for mt in methods]
    af_std = [np.std(afs[mt]) for mt in methods]
    acc_std = [np.std(test_accs[mt]) for mt in methods]
    single_scatter(ax, acc_mean, acc_std, af_mean, af_std, colors, methods, markers,
                   '(a) SVM W/O EI', xlabel, ylabel, label_size=10,
                   title_size=16, legend_size=5, ticksize=14, markersize=80, title_offset=-0.22)
    ths = bound_th_dict[f'{ds}-{sc}']
    plot_line('Bound Reg.', ds, sc, dict_name, ax, ths, label='Bound Reg.',
              color='blueviolet', marker='d', markersize=9)
    ax.set_ylim(0, 0.25)

ax = axs[0][1]
methods = ['Orig', 'IPW', 'Test']
colors = ['blue', 'darkgreen', 'red']
markers = ['o', '^', 'X']
dict_name = m + '_SVM.dict'
with open(dict_name, 'r') as f:
    txt = f.read()
    d = json.loads(txt)
    afs = dict()
    test_accs = dict()
    for mt in methods:
        afs[mt] = [abs(d[ds][f'sc{sc}'][str(vs)][mt][0]) for vs in val_seeds]
        test_accs[mt] = [d[ds][f'sc{sc}'][str(vs)][mt][-2] for vs in val_seeds]
    af_mean = [np.mean(afs[mt]) for mt in methods]
    acc_mean = [np.mean(test_accs[mt]) for mt in methods]
    af_std = [np.std(afs[mt]) for mt in methods]
    acc_std = [np.std(test_accs[mt]) for mt in methods]

    ths = tightbound_th_dict[f'{ds}-{sc}']
    plot_line('TightBound Reg.', ds, sc, dict_name, ax, ths,
              label='TightBound Reg.', color='darkred', marker='p', markersize=8)

    ths = est_th_dict[f'{ds}-{sc}']
    plot_line('Est.AF Reg.', ds, sc, dict_name, ax, ths, label='Est.AF Reg.',
              color='violet', marker='o', markersize=9)
    plot_line('AF Reg.', ds, sc, dict_name, ax, ths, label='AF Reg.',
              color='orange', marker='v', markersize=9)
    single_scatter(ax, acc_mean, acc_std, af_mean, af_std, colors, methods, markers,
                   f'(b) SVM W/ EI', xlabel, ylabel, label_size=10,
                   title_size=16, legend_size=5, ticksize=14, markersize=80, title_offset=-0.22)
    ax.set_ylim(0, 0.25)

bound_th_dict = {'adult-1': [0.07, 0.15, 0.3, 0.35, 1.0]}
tightbound_th_dict = {'adult-1': [0.01, 0.03, 0.07, 0.09, 1.0]}
est_th_dict = {'adult-1': [0.01, 0.04, 0.07, 0.1, 1.0]}

ax = axs[1][0]
methods = ['Orig', 'Adv. Debias', 'LFR', 'Reweighing', 'Test']
colors = ['blue', 'orange', 'violet', 'slategrey', 'red']
markers = ['o', 'p', 'v', 's', 'X']
dict_name = m + '_NN.dict'
with open(dict_name, 'r') as f:
    txt = f.read()
    d = json.loads(txt)
    afs = dict()
    test_accs = dict()
    for mt in methods:
        afs[mt] = [abs(d[ds][f'sc{sc}'][str(vs)][mt][0]) for vs in val_seeds]
        test_accs[mt] = [d[ds][f'sc{sc}'][str(vs)][mt][-2] for vs in val_seeds]
    af_mean = [np.mean(afs[mt]) for mt in methods]
    acc_mean = [np.mean(test_accs[mt]) for mt in methods]
    af_std = [np.std(afs[mt]) for mt in methods]
    acc_std = [np.std(test_accs[mt]) for mt in methods]
    ths = bound_th_dict[f'{ds}-{sc}']
    plot_line('Bound Reg.', ds, sc, dict_name, ax, ths, label='Bound Reg.',
              color='blueviolet', marker='d', markersize=9)
    single_scatter(ax, acc_mean, acc_std, af_mean, af_std, colors, methods, markers,
                   f'(c) NN W/O EI', xlabel, ylabel, label_size=10,
                   title_size=16, legend_size=5, ticksize=14, markersize=80, title_offset=-0.22)
    ax.set_ylim(0, 0.18)

ax = axs[1][1]
methods = ['Orig', 'IPW', 'Test']
colors = ['blue', 'darkgreen', 'red']
markers = ['o', '^', 'X']
dict_name = m + '_NN.dict'
with open(dict_name, 'r') as f:
    txt = f.read()
    d = json.loads(txt)
    afs = dict()
    test_accs = dict()
    for mt in methods:
        afs[mt] = [abs(d[ds][f'sc{sc}'][str(vs)][mt][0]) for vs in val_seeds]
        test_accs[mt] = [d[ds][f'sc{sc}'][str(vs)][mt][-2] for vs in val_seeds]
    af_mean = [np.mean(afs[mt]) for mt in methods]
    acc_mean = [np.mean(test_accs[mt]) for mt in methods]
    af_std = [np.std(afs[mt]) for mt in methods]
    acc_std = [np.std(test_accs[mt]) for mt in methods]

    ths = tightbound_th_dict[f'{ds}-{sc}']
    plot_line('TightBound Reg.', ds, sc, dict_name, ax, ths,
              label='TightBound Reg.', color='darkred', marker='p', markersize=9)

    ths = est_th_dict[f'{ds}-{sc}']
    plot_line('Est.AF Reg.', ds, sc, dict_name, ax, ths, label='Est.AF Reg.',
              color='violet', marker='o', markersize=9)
    plot_line('AF Reg.', ds, sc, dict_name, ax, ths, label='AF Reg.',
              color='orange', marker='v', markersize=9)
    single_scatter(ax, acc_mean, acc_std, af_mean, af_std, colors, methods, markers,
                   f'(d) NN W/ EI', xlabel, ylabel, label_size=10,
                   title_size=16, legend_size=5, ticksize=14, markersize=80, title_offset=-0.22)
    ax.set_ylim(0, 0.18)

for i in range(2):
    for j in range(2):
        ax = axs[i][j]
        xleft, xright = ax.get_xlim()
        ax.fill_between((xleft, xright), 0.03, 1, color='red', alpha=0.1)
        ax.fill_between((xleft, xright), 0, 0.02999, color='green', alpha=0.1)
        ax.set_xlim(xleft, xright)
        ax.grid(color='grey', linestyle='-', alpha=0.3)
        ax.set_zorder(3)

ax = fig.add_subplot(111, frameon=False)
# hide tick and tick label of the big axes

lines0, labels0 = axs[0][0].get_legend_handles_labels()
lines1, labels1 = axs[0][1].get_legend_handles_labels()
lines, labels = lines0+lines1, labels0+labels1
order = [0, 5, 6, 3, 4, 2, 1, 7, 8, 9, 11]
# print([labels[i] for i in order])
lgd = fig.legend([lines[i] for i in order], [labels[i] for i in order], loc='center',
                       fontsize=30, ncol=1, bbox_to_anchor=(1.12, 0.6), fancybox=False, framealpha=0.0,
                       prop={'size': 14}, labelspacing=1.5)
for i in range(len(lgd.legendHandles)):
    lgd.legendHandles[i]._sizes = [80]

plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False)
plt.grid(False)
# plt.xlabel('F1-Score', fontsize=20)
plt.ylabel(EO_STR, fontsize=20)
# ax.xaxis.set_label_coords(0.5, -0.1)
ax.yaxis.set_label_coords(-0.12, 0.5)
plt.tight_layout()
plt.savefig('svm-nn-comparison.pdf', bbox_inches='tight')

In [None]:
def plot_line_double(method, ds, sc, dict_name, axs, ths, offset, label, color, marker, markersize=12):
    with open(dict_name, 'r') as f:
        txt = f.read()
        d = json.loads(txt)
    test_afs = dict()
    test_accs = dict()
    methods = [f'{method}-{th}' for th in ths]
    afs = dict()
    test_accs = dict()
    for mt in methods:
        afs[mt] = [abs(d[ds][f'sc{sc}'][str(vs)][mt][0]) for vs in val_seeds if mt in d[ds][f'sc{sc}'][str(vs)]]
        test_accs[mt] = [d[ds][f'sc{sc}'][str(vs)][mt][-2] for vs in val_seeds if mt in d[ds][f'sc{sc}'][str(vs)]]
    af_mean = np.array([np.mean(afs[mt]) for mt in methods])
    acc_mean = np.array([np.mean(test_accs[mt]) for mt in methods])
    af_std = np.array([np.std(afs[mt]) for mt in methods])
    acc_std = np.array([np.std(test_accs[mt]) for mt in methods])
    axs[0].bar(ths+offset, af_mean, 4, label=get_name(label), yerr=af_std, color=color)
#     axs[0].fill_between(ths, (af_mean-af_std), (af_mean+af_std), color=color, alpha=.1)
    axs[1].bar(ths+offset, acc_mean, 4, label=get_name(label), yerr=acc_std, color=color)
#     axs[1].fill_between(ths, (acc_mean-acc_std), (acc_mean+acc_std), color=color, alpha=.1)
    axs[0].set_xlim(min(ths)-10, max(ths)+10)
    axs[1].set_xlim(min(ths)-10, max(ths)+10)
    axs[0].set_xticks(ths)
    axs[1].set_xticks(ths)
    axs[0].set_ylabel('$\digamma_{h, Y}(\Omega)$', fontsize=32)
    axs[1].set_ylabel('F1-Score', fontsize=32)
    axs[1].set_ylim(0.55, 0.65)
    axs[0].yaxis.set_major_locator(plt.MaxNLocator(6))
    axs[1].yaxis.set_major_locator(plt.MaxNLocator(6))
    for ax in axs:
        ax.tick_params(colors='white', direction='out')
        for tick in ax.get_xticklabels():
            tick.set_fontsize(24)
            tick.set_color('black')
        for tick in ax.get_yticklabels():
            tick.set_color('black')
            tick.set_fontsize(24)
    for j in range(2):
        axs[j].yaxis.set_label_coords(-0.1, 0.5)
#     axs[1].xaxis.set_label_coords(0.5, 0.5)
    

In [None]:
# single
fig, axs = plt.subplots(2, 1, dpi=400, figsize=(15, 8))
# methods = ['Orig', 'IPW']
# colors = ['blue', 'green']
# markers = ['o', 'p']
for m in ['EO']:
    dict_name = m + '_LR.dict'
    with open(dict_name, 'r') as f:
        txt = f.read()
        d = json.loads(txt)
        for ds in ['adult']:
            for sc in [1]:
#                 ths = np.array([10, 20, 30, 40, 50])
                ths =  np.array([20, 40, 60, 80, 100])
                plot_line_double('AF Reg.-0.01', ds, sc, dict_name, axs, ths, -6, label='AF Reg.',
                                 color='orange', marker='v', markersize=8)
                plot_line_double('Est.AF Reg.-0.01', ds, sc, dict_name, axs, ths, -2, label='Est.AF Reg.',
                                 color='violet', marker='o', markersize=8)
                plot_line_double('TightBound Reg.-0.01', ds, sc, dict_name, axs, ths, 2,
                                 label='TightBound Reg.', color='darkred', marker='p', markersize=8)
                plot_line_double('IPW', ds, sc, dict_name, axs, ths, 6, label='IPW',
                                 color='darkgreen', marker='s', markersize=8)

lines, labels = axs[0].get_legend_handles_labels()
order = [0, 1, 2, 3]
lgd = fig.legend([lines[i] for i in order], [labels[i] for i in order], loc='upper center',
                 fontsize=30, ncol=len(labels), bbox_to_anchor=(0.5, 1.1), fancybox=False,
                 framealpha=0.0, prop={'size': 28})
for i in range(len(lgd.legendHandles)):
    lgd.legendHandles[i]._sizes = [120]

ax = fig.add_subplot(111, frameon=False)
# hide tick and tick label of the big axes
plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False)
plt.grid(False)
plt.xlabel('Sample Rate (%)', fontsize=32)
ax.xaxis.set_label_coords(0.5, -0.09)
# plt.ylabel('Equalized Odds\n', fontsize=20)

plt.tight_layout()
plt.savefig('subsample-comparison.pdf', bbox_inches='tight')

In [None]:
# 1*2 plots for SPD
fig, axs = plt.subplots(1, 2, dpi=400, figsize=(8, 4.5))

bound_th_dict = {'adult-1': [0.05, 0.14, 0.3, 0.35, 1.0]}
tightbound_th_dict = {'adult-1': [0.02, 0.08, 0.12, 0.17, 1.0]}
est_th_dict = {'adult-1': [0.01, 0.06, 0.11, 0.16, 1.0]}

ax = axs[0]
methods = ['Orig', 'Test', 'AF Reg.-0.0', 'TightBound Reg.-0.0']
colors = ['blue', 'red', 'orange', 'darkred']
markers = ['o', 'X', 'v', 'p']

m = 'SPD'
ds, sc = 'hmda', 7

dict_name = m + '_LR.dict'
with open(dict_name, 'r') as f:
    txt = f.read()
    d = json.loads(txt)
    afs = dict()
    test_accs = dict()
    for mt in methods:
        afs[mt] = [abs(d[ds][f'sc{sc}'][str(vs)][mt][0]) for vs in val_seeds]
        test_accs[mt] = [d[ds][f'sc{sc}'][str(vs)][mt][-2] for vs in val_seeds]
    af_mean = [np.mean(afs[mt]) for mt in methods]
    acc_mean = [np.mean(test_accs[mt]) for mt in methods]
    af_std = [np.std(afs[mt]) for mt in methods]
    acc_std = [np.std(test_accs[mt]) for mt in methods]
    single_scatter(ax, acc_mean, acc_std, af_mean, af_std, colors, methods, markers,
                   f'({plot_alphabet[0]}) {ds.upper()}-G1-Calcasieu', xlabel, ylabel, label_size=10,
                   title_size=16, legend_size=5, ticksize=14, markersize=80, title_offset=-0.25)
    ax.set_ylim(0, 0.15)

ax = axs[1]
sc = 8
methods = ['Orig', 'AF Reg.-0.0', 'TightBound Reg.-0.0', 'Test']
colors = ['blue', 'orange', 'purple', 'red']
markers = ['o', 'v', 'p', 'X']
dict_name = m + '_LR.dict'
with open(dict_name, 'r') as f:
    txt = f.read()
    d = json.loads(txt)
    afs = dict()
    test_accs = dict()
    for mt in methods:
        afs[mt] = [abs(d[ds][f'sc{sc}'][str(vs)][mt][0]) for vs in val_seeds]
        test_accs[mt] = [d[ds][f'sc{sc}'][str(vs)][mt][-2] for vs in val_seeds]
    af_mean = [np.mean(afs[mt]) for mt in methods]
    acc_mean = [np.mean(test_accs[mt]) for mt in methods]
    af_std = [np.std(afs[mt]) for mt in methods]
    acc_std = [np.std(test_accs[mt]) for mt in methods]
    single_scatter(ax, acc_mean, acc_std, af_mean, af_std, colors, methods, markers,
                   f'({plot_alphabet[1]}) {ds.upper()}-G1-Orleans', xlabel, ylabel, label_size=10,
                   title_size=16, legend_size=5, ticksize=14, markersize=80, title_offset=-0.25)
    ax.set_ylim(0, 0.15)


for i in range(2):
    ax = axs[i]
    xleft, xright = ax.get_xlim()
    ax.fill_between((xleft, xright), 0.03, 1, color='red', alpha=0.1)
    ax.fill_between((xleft, xright), 0, 0.02999, color='green', alpha=0.1)
    ax.set_xlim(xleft, xright)
    ax.grid(color='grey', linestyle='-', alpha=0.3)
    ax.set_zorder(3)

lines, labels = axs[0].get_legend_handles_labels()
order = [0, 1, 2, 3]
lgd = fig.legend([lines[i] for i in order], [labels[i] for i in order], loc='upper center',
                 fontsize=30, ncol=len(lines), bbox_to_anchor=(0.5, 1.07), fancybox=False, framealpha=0.0,
                 prop={'size': 16}, columnspacing=0.5)
for i in range(len(lgd.legendHandles)):
    lgd.legendHandles[i]._sizes = [80]

ax = fig.add_subplot(111, frameon=False)
# hide tick and tick label of the big axes
plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False)
plt.grid(False)
# plt.xlabel('F1-Score', fontsize=20)
plt.ylabel(SPD_STR, fontsize=20)
# ax.xaxis.set_label_coords(0.5, -0.28)
ax.yaxis.set_label_coords(-0.12, 0.5)

plt.tight_layout()
plt.savefig('real-ratio-comparison.pdf', bbox_inches='tight')

In [None]:
# 1*2 plots for SPD
fig, axs = plt.subplots(1, 2, dpi=400, figsize=(8, 4.1))

bound_th_dict = {'syn-1': [0.0, 0.05, 0.15, 0.2, 1.0]}

methods = ['Orig', 'Test', 'Adv. Debias', 'LFR', 'Reweighing']
colors = ['blue', 'red', 'orange', 'violet', 'slategrey']
markers = ['o', 'X', 'p', 'v', 's']

ds, sc = 'syn', 1

ax = axs[0]
m = 'EO'
dict_name = m + '_LR.dict'
with open(dict_name, 'r') as f:
    txt = f.read()
    d = json.loads(txt)
    afs = dict()
    test_accs = dict()
    for mt in methods:
        afs[mt] = [abs(d[ds][f'sc{sc}'][str(vs)][mt][0]) for vs in val_seeds]
        test_accs[mt] = [d[ds][f'sc{sc}'][str(vs)][mt][-2] for vs in val_seeds]
    af_mean = [np.mean(afs[mt]) for mt in methods]
    acc_mean = [np.mean(test_accs[mt]) for mt in methods]
    af_std = [np.std(afs[mt]) for mt in methods]
    acc_std = [np.std(test_accs[mt]) for mt in methods]
    single_scatter(ax, acc_mean, acc_std, af_mean, af_std, colors, methods, markers,
                   f'(a) Syn - {EO_STR}', xlabel, ylabel, label_size=10, title_offset=-0.20,
                   title_size=16, legend_size=5, ticksize=14, markersize=80)
    ths = bound_th_dict[f'{ds}-{sc}']
    plot_line('Bound Reg.', ds, sc, dict_name, ax, ths, label='Bound Reg.',
              color='blueviolet', marker='d', markersize=9, err=True)
    ax.set_ylim(0, 0.3)
    xleft, xright = ax.get_xlim()
    ax.fill_between((xleft, xright), 0.03, 1, color='red', alpha=0.1)
    ax.fill_between((xleft, xright), 0, 0.02999, color='green', alpha=0.1)
    ax.set_xlim(xleft, xright)
    ax.grid(color='grey', linestyle='-', alpha=0.3)
    ax.set_zorder(3)

bound_th_dict = {'syn-1': [0.05, 0.7, 0.8, 0.85, 1.0]}
ax = axs[1]
m = 'SPD'
dict_name = m + '_LR.dict'
with open(dict_name, 'r') as f:
    txt = f.read()
    d = json.loads(txt)
    afs = dict()
    test_accs = dict()
    for mt in methods:
        afs[mt] = [abs(d[ds][f'sc{sc}'][str(vs)][mt][0]) for vs in val_seeds]
        test_accs[mt] = [d[ds][f'sc{sc}'][str(vs)][mt][-2] for vs in val_seeds]
    af_mean = [np.mean(afs[mt]) for mt in methods]
    acc_mean = [np.mean(test_accs[mt]) for mt in methods]
    af_std = [np.std(afs[mt]) for mt in methods]
    acc_std = [np.std(test_accs[mt]) for mt in methods]
    single_scatter(ax, acc_mean, acc_std, af_mean, af_std, colors, methods, markers,
                   f'(b) Syn - {SPD_STR}', xlabel, ylabel, label_size=10, title_offset=-0.20,
                   title_size=16, legend_size=5, ticksize=14, markersize=80)
    ths = bound_th_dict[f'{ds}-{sc}']
    plot_line('Bound Reg.', ds, sc, dict_name, ax, ths, label='Bound Reg.',
              color='blueviolet', marker='d', markersize=9, err=True)
    ax.set_ylim(0, 0.3)
    xleft, xright = ax.get_xlim()
    ax.fill_between((xleft, xright), 0.03, 1, color='red', alpha=0.1)
    ax.fill_between((xleft, xright), 0, 0.02999, color='green', alpha=0.1)
    ax.set_xlim(xleft, xright)
    ax.grid(color='grey', linestyle='-', alpha=0.3)
    ax.set_zorder(3)

ax = fig.add_subplot(111, frameon=False)
# hide tick and tick label of the big axes
lines, labels = axs[0].get_legend_handles_labels()
order = [0, 3, 1, 4, 5, 2]
lgd = fig.legend([lines[i] for i in order], [labels[i] for i in order], loc='upper center',
                fontsize=30, ncol=3, bbox_to_anchor=(0.5, 1.12), fancybox=False, framealpha=0.0,
                prop={'size': 14})
for i in range(len(lgd.legendHandles)):
    lgd.legendHandles[i]._sizes = [80]
plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False)
plt.grid(False)
plt.ylabel('Discrimination', fontsize=20)
# ax.xaxis.set_label_coords(0.5, -0.2)
ax.yaxis.set_label_coords(-0.15, 0.5)
plt.tight_layout()
plt.savefig('syn-comparison.pdf', bbox_inches='tight')