In [1]:
import torch
import pickle
import numpy as np
import pandas as pd
import plotly.express as px
from copy import deepcopy


from model import LR
from data import FairnessDataset, SyntheticDataset, GermanDataset
from ei_model_dev import FairBatch, Covariance
from ei_effort import Optimal_Effort, PGD_Effort
from ei_utils import *

In [2]:
dataset = SyntheticDataset(seed=0)
# dataset = GermanDataset()

In [3]:
def append_res(d, acc, ei):
    d['accuracy'].append(acc)
    d['loss'].append(1-acc)
    d['ei_disparity'].append(ei)
        
def get_res(d, hp):
    res = {'alpha': [],'alpha_prime': [], 'lambda': [], 'delta': [],'delta_prime': [], 'accuracy_mean': [], 'accuracy_std': [], 'loss_mean': [], 'loss_std': [], 'ei_disparity_mean': [], 'ei_disparity_std': []}
    res['alpha'].append(hp['alpha'])
    res['alpha_prime'].append(hp['alpha_prime'])
    res['lambda'].append(hp['lambda'])
    res['delta'].append(hp['delta'])
    res['delta_prime'].append(hp['delta_prime'])
    res['accuracy_mean'].append(np.mean(d['accuracy']))
    res['accuracy_std'].append(np.std(d['accuracy']))
    res['loss_mean'].append(np.mean(d['loss']))
    res['loss_std'].append(np.std(d['loss']))
    res['ei_disparity_mean'].append(np.mean(d['ei_disparity']))
    res['ei_disparity_std'].append(np.std(d['ei_disparity']))
    
    return res

In [4]:
def model_runner(ei_model, dataset, hp, seeds):
    tau = 0.5
    pga_term = hp['pga_term']
    metrics = {'accuracy': [], 'loss': [], 'ei_disparity': []}
    metrics_r = {'accuracy': [], 'loss': [], 'ei_disparity': []}
    
    if hp['optimal_effort']:
        effort = Optimal_Effort(hp['delta'])
    else:
        effort = PGD_Effort(hp['delta'])
    
    for seed in seeds:
        train_tensors, val_tensors, test_tensors = dataset.tensor(fold=seed, z_blind=hp['z_blind'])
        train_dataset = FairnessDataset(*train_tensors, dataset.imp_feats)
        val_dataset = FairnessDataset(*val_tensors, dataset.imp_feats)
        test_dataset = FairnessDataset(*test_tensors, dataset.imp_feats)
        
        model = LR(num_features=train_dataset.X.shape[1])
        ei_m = ei_model(model, effort, pga_term, tau)
        
        ei_m.train(
            train_dataset, 
            lamb=hp['lambda'],
            lr=hp['learning_rate'],
            alpha=hp['alpha'],
            batch_size=len(train_dataset)
            )
        
        Y_hat, Y_hat_max = ei_m.predict(test_dataset, hp['alpha'])
        test_acc, test_ei = model_performance(test_dataset.Y.detach().numpy(), test_dataset.Z.detach().numpy(), Y_hat, Y_hat_max, tau)
        append_res(metrics, test_acc, test_ei)
        
        Y_hat_r, Y_hat_max_r = ei_m.predict_r(test_dataset, hp['alpha_prime'], hp['delta_prime'])
        test_acc_r, test_ei_r = model_performance(test_dataset.Y.detach().numpy(), test_dataset.Z.detach().numpy(), Y_hat_r, Y_hat_max_r, tau)
        append_res(metrics_r, test_acc_r, test_ei_r)

        models = [ei_m.model, ei_m.model_adv, ei_m.model_adv_r]
        t1, t2, t3, t4 = None, None, None, None
        for i, model in enumerate(models):
            for module in model.layers:
                if hasattr(module, 'weight'):
                    weight = module.weight.data.numpy()
                    bias = module.bias.data.numpy()
                    theta = np.hstack((weight[0], bias))
                    if i == 0: t1 = theta
                    if i == 1: t2 = theta
                    if i == 2: t3 = theta        
        
    results = get_res(metrics, hp)
    results_r = get_res(metrics_r, hp)
    
    return results, results_r, t1, t2, t3

In [5]:
def run_tradeoff(ei_model, dataset, hyper_params, seeds):
    hp = hyper_params.copy()
    results = pd.DataFrame()
    results_r = pd.DataFrame()
    models = {'lambda': [], 'alpha': [],'alpha_prime': [], 'theta': [], 'theta_adv': [], 'theta_adv_r': []}
    
    for alpha in hyper_params['alpha']:
        for lamb in hyper_params['lambda']:
            for delta in hyper_params['delta']:
                for alpha_prime in hyper_params['alpha_prime']:
                    hp['alpha'] = alpha
                    hp['lambda'] = lamb
                    hp['delta'] = delta
                    hp['alpha_prime'] = alpha_prime
                    
                    result, result_r, m1, m2, m3 = model_runner(ei_model, dataset, hp, seeds)
                    results = pd.concat((results, pd.DataFrame(result)))
                    results_r = pd.concat((results_r, pd.DataFrame(result_r)))
                    models['lambda'].append(lamb)
                    models['alpha'].append(alpha)
                    models['alpha_prime'].append(alpha_prime)
                    models['theta'].append(m1)
                    models['theta_adv'].append(m2)
                    models['theta_adv_r'].append(m3)
                    print()
    
    return results, results_r, models

In [6]:
# torch.manual_seed(0)
# Hyperparameters
hyper_params = {}
hyper_params['delta_prime'] = [0.5]
hyper_params['delta'] = [0.]
hyper_params['alpha'] = [0., 0.1, 0.5]
hyper_params['alpha_prime'] = [0., 0.1, 1]
hyper_params['learning_rate'] = 0.01
hyper_params['pga_term'] = np.divide(1e-5,2)
hyper_params['z_blind'] = False
hyper_params['optimal_effort'] = True # True only for Synthetic Data

# 5-Fold Cross Validation
seeds = list(range(2))

# EI Proxy (set ei_proxy to FairBatch or Covariance)
ei_proxy = 'FairBatch'

if ei_proxy == 'FairBatch':
    ei_model = FairBatch
    # hyper_params['lambda'] = np.linspace(0., 0.25, 10) # FairBatch lambdas
    # hyper_params['lambda'] = [0.88] # lambda value that minimizes ei for FairBatch
    hyper_params['lambda'] = [0.] # lambda value that minimizes ei for FairBatch
elif ei_proxy == 'Covariance':
    ei_model = Covariance
    # hyper_params['lambda'] = 1-np.geomspace(0.001,0.999, 20) # Covariance lambdas
    # hyper_params['lambda'] = [0.991144] # lambda value that minimizes ei for Covariance
    hyper_params['lambda'] = [0., 1.] # lambda value that minimizes ei for Covariance

# Run tradeoffs
results, results_r, models = run_tradeoff(ei_model, dataset, hyper_params, seeds)
results['alpha'] = results['alpha'].round(2)
results_r['alpha'] = results_r['alpha'].round(2)

results['alpha_prime'] = results['alpha_prime'].round(2)
results_r['alpha_prime'] = results_r['alpha_prime'].round(2)
# results['alpha'] = results['alpha'].astype(str)

Training [alpha=0.00; lambda=0.00; delta=0.00]: 100%|[38;2;0;145;255m██████████[0m| 100/100 [00:08<00:00, 12.39epochs/s]
Training [alpha=0.00; lambda=0.00; delta=0.50]: 100%|[38;2;0;145;255m██████████[0m| 100/100 [00:08<00:00, 12.04epochs/s]





Training [alpha=0.00; lambda=0.00; delta=0.00]: 100%|[38;2;0;145;255m██████████[0m| 100/100 [00:08<00:00, 12.04epochs/s]
Training [alpha=0.00; lambda=0.00; delta=0.50]: 100%|[38;2;0;145;255m██████████[0m| 100/100 [00:08<00:00, 12.09epochs/s]





Training [alpha=0.00; lambda=0.00; delta=0.00]: 100%|[38;2;0;145;255m██████████[0m| 100/100 [00:08<00:00, 12.17epochs/s]
Training [alpha=0.00; lambda=0.00; delta=0.50]: 100%|[38;2;0;145;255m██████████[0m| 100/100 [00:08<00:00, 11.94epochs/s]





Training [alpha=0.10; lambda=0.00; delta=0.00]: 100%|[38;2;0;145;255m██████████[0m| 100/100 [00:09<00:00, 10.61epochs/s]
Training [alpha=0.10; lambda=0.00; delta=0.50]: 100%|[38;2;0;145;255m██████████[0m| 100/100 [00:10<00:00,  9.99epochs/s]





Training [alpha=0.10; lambda=0.00; delta=0.00]: 100%|[38;2;0;145;255m██████████[0m| 100/100 [00:09<00:00, 10.21epochs/s]
Training [alpha=0.10; lambda=0.00; delta=0.50]: 100%|[38;2;0;145;255m██████████[0m| 100/100 [00:09<00:00, 10.12epochs/s]





Training [alpha=0.10; lambda=0.00; delta=0.00]: 100%|[38;2;0;145;255m██████████[0m| 100/100 [00:09<00:00, 10.46epochs/s]
Training [alpha=0.10; lambda=0.00; delta=0.50]: 100%|[38;2;0;145;255m██████████[0m| 100/100 [00:10<00:00,  9.62epochs/s]





Training [alpha=0.50; lambda=0.00; delta=0.00]: 100%|[38;2;0;145;255m██████████[0m| 100/100 [00:12<00:00,  7.77epochs/s]
Training [alpha=0.50; lambda=0.00; delta=0.50]: 100%|[38;2;0;145;255m██████████[0m| 100/100 [00:13<00:00,  7.19epochs/s]





Training [alpha=0.50; lambda=0.00; delta=0.00]: 100%|[38;2;0;145;255m██████████[0m| 100/100 [00:14<00:00,  6.91epochs/s]
Training [alpha=0.50; lambda=0.00; delta=0.50]: 100%|[38;2;0;145;255m██████████[0m| 100/100 [00:16<00:00,  6.08epochs/s]





Training [alpha=0.50; lambda=0.00; delta=0.00]: 100%|[38;2;0;145;255m██████████[0m| 100/100 [00:14<00:00,  6.85epochs/s]
Training [alpha=0.50; lambda=0.00; delta=0.50]: 100%|[38;2;0;145;255m██████████[0m| 100/100 [00:18<00:00,  5.51epochs/s]





In [7]:
dt = pd.DataFrame(models)
dt = dt.applymap(lambda x: np.round(x, 2))
dt['L-inf(theta-theta_adv)'] = (dt.theta - dt.theta_adv).apply(lambda x: np.linalg.norm(x, np.inf))
dt[dt['lambda']==1.][['lambda', 'alpha', 'theta', 'theta_adv', 'L-inf(theta-theta_adv)']].set_index('lambda').sort_index()
dt

  dt = dt.applymap(lambda x: np.round(x, 2))


Unnamed: 0,lambda,alpha,theta,theta_adv,theta_adv_r,L-inf(theta-theta_adv)
0,0.0,0.0,"[0.61, 1.01, 0.18, -0.32]","[0.61, 1.01, 0.18, -0.32]","[0.61, 1.01, 0.18, -0.32]",0.0
1,0.0,0.1,"[0.31, 0.51, 0.58, -0.67]","[0.21, 0.41, 0.68, -0.77]","[0.21, 0.41, 0.68, -0.77]",0.1
2,0.0,1.5,"[0.98, 1.3, 0.18, -0.36]","[-0.52, -0.2, 1.68, -1.86]","[-0.0, -0.0, 1.68, -1.86]",1.5
3,0.0,2.0,"[0.48, 0.73, 0.23, -0.31]","[-0.15, -1.27, 2.23, -2.31]","[0.0, -0.0, 2.23, -2.31]",2.0


In [7]:
dt = pd.DataFrame(models)
dt = dt.applymap(lambda x: np.round(x, 2))
dt['L-inf(theta-theta_adv)'] = (dt.theta - dt.theta_adv).apply(lambda x: np.linalg.norm(x, np.inf))
dt[dt['lambda']==1.][['lambda', 'alpha', 'alpha_prime', 'theta', 'theta_adv', 'L-inf(theta-theta_adv)']].set_index('lambda').sort_index()
dt

  dt = dt.applymap(lambda x: np.round(x, 2))


Unnamed: 0,lambda,alpha,alpha_prime,theta,theta_adv,theta_adv_r,L-inf(theta-theta_adv)
0,0.0,0.0,0.0,"[0.87, 1.04, 0.8, -0.9]","[0.87, 1.04, 0.8, -0.9]","[0.87, 1.04, 0.8, -0.9]",0.0
1,0.0,0.0,0.1,"[0.55, 0.76, 0.49, -0.56]","[0.55, 0.76, 0.49, -0.56]","[0.45, 0.66, 0.59, -0.66]",0.0
2,0.0,0.0,1.0,"[1.0, 0.84, 0.26, -0.65]","[1.0, 0.84, 0.26, -0.65]","[0.0, 0.0, 1.26, -1.65]",0.0
3,0.0,0.1,0.0,"[1.2, 0.37, 0.26, -0.66]","[1.3, 0.27, 0.36, -0.76]","[1.2, 0.37, 0.26, -0.66]",0.1
4,0.0,0.1,0.1,"[0.64, 0.42, 0.45, -0.56]","[0.54, 0.32, 0.55, -0.66]","[0.54, 0.32, 0.55, -0.66]",0.1
5,0.0,0.1,1.0,"[0.47, 0.55, 0.73, -0.81]","[0.37, 0.45, 0.83, -0.91]","[-0.0, 0.0, 1.73, -1.81]",0.1
6,0.0,0.5,0.0,"[1.22, 1.07, 0.02, -0.25]","[1.59, 0.57, 0.52, -0.75]","[1.22, 1.07, 0.02, -0.25]",0.5
7,0.0,0.5,0.1,"[0.34, 1.31, 0.67, -0.74]","[-0.16, 0.81, 1.17, -1.24]","[0.24, 1.21, 0.77, -0.84]",0.5
8,0.0,0.5,1.0,"[0.77, 1.04, 0.45, -0.73]","[0.27, 0.54, 0.95, -1.23]","[-0.0, 0.04, 1.45, -1.73]",0.5


In [8]:
print('EI (train alpha = 0)')
results[['alpha', 'lambda', 'delta', 'loss_mean', 'loss_std', 'ei_disparity_mean', 'ei_disparity_std']].sort_values(['lambda', 'alpha']).set_index(['lambda', 'alpha'])

EI (train alpha = 0)


Unnamed: 0_level_0,Unnamed: 1_level_0,delta,loss_mean,loss_std,ei_disparity_mean,ei_disparity_std
lambda,alpha,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0.0,0.0,0.5,0.22525,0.00725,0.077881,0.001989
0.0,0.1,0.5,0.22975,0.011,0.346596,0.106597
0.0,1.5,0.5,0.223125,0.002875,0.506662,0.144265
0.0,2.0,0.5,0.223375,0.007375,0.71247,0.04367


In [12]:
print('EI (train alpha = 0)')
results[['alpha', 'alpha_prime', 'lambda', 'delta', 'delta_prime', 'loss_mean', 'loss_std', 'ei_disparity_mean', 'ei_disparity_std']].sort_values(['lambda', 'alpha', 'alpha_prime']).set_index(['lambda', 'alpha', 'alpha_prime'])

EI (train alpha = 0)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,delta,delta_prime,loss_mean,loss_std,ei_disparity_mean,ei_disparity_std
lambda,alpha,alpha_prime,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0.0,0.0,0.0,0.0,[0.5],0.231375,0.004625,0.105355,0.105355
0.0,0.0,0.1,0.0,[0.5],0.22525,0.002,0.095254,0.095254
0.0,0.0,1.0,0.0,[0.5],0.23475,0.0075,0.039337,0.039337
0.0,0.1,0.0,0.0,[0.5],0.262875,0.018375,0.083056,0.07211
0.0,0.1,0.1,0.0,[0.5],0.23225,0.00225,0.191829,0.178219
0.0,0.1,1.0,0.0,[0.5],0.23025,0.0075,0.253156,0.240555
0.0,0.5,0.0,0.0,[0.5],0.23025,0.00575,0.118263,0.104547
0.0,0.5,0.1,0.0,[0.5],0.23325,0.0115,0.253343,0.247645
0.0,0.5,1.0,0.0,[0.5],0.237625,0.011375,0.175129,0.129936


In [22]:
print('REI (train alpha != 0)')
results_r[['alpha', 'lambda', 'delta', 'loss_mean', 'loss_std', 'ei_disparity_mean', 'ei_disparity_std']].sort_values(['lambda', 'alpha']).set_index(['lambda', 'alpha'])

REI (train alpha != 0)


Unnamed: 0_level_0,Unnamed: 1_level_0,delta,loss_mean,loss_std,ei_disparity_mean,ei_disparity_std
lambda,alpha,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0.0,0.0,0.5,0.22525,0.00725,0.077881,0.001989
0.0,0.1,0.5,0.22975,0.011,0.346596,0.106597
0.0,1.5,0.5,0.223125,0.002875,0.0,0.0
0.0,2.0,0.5,0.223375,0.007375,0.0,0.0


In [9]:
print('REI (train alpha != 0)')
results_r[['alpha','alpha_prime', 'lambda', 'delta', 'delta_prime', 'loss_mean', 'loss_std', 'ei_disparity_mean', 'ei_disparity_std']].sort_values(['lambda', 'alpha']).set_index(['lambda', 'alpha', 'alpha_prime'])

REI (train alpha != 0)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,delta,delta_prime,loss_mean,loss_std,ei_disparity_mean,ei_disparity_std
lambda,alpha,alpha_prime,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0.0,0.0,0.0,0.0,[0.5],0.22525,0.00725,0.077881,0.001989
0.0,0.0,0.1,0.0,[0.5],0.22975,0.011,0.346596,0.106597
0.0,0.0,1.0,0.0,[0.5],0.223125,0.002875,0.03842,0.03842
0.0,0.1,0.0,0.0,[0.5],0.223375,0.007375,0.173257,0.068446
0.0,0.1,0.1,0.0,[0.5],0.237875,0.018625,0.19465,0.047135
0.0,0.1,1.0,0.0,[0.5],0.232125,0.009875,0.0,0.0
0.0,0.5,0.0,0.0,[0.5],0.232375,0.002125,0.110738,0.09224
0.0,0.5,0.1,0.0,[0.5],0.240625,0.022125,0.190451,0.013651
0.0,0.5,1.0,0.0,[0.5],0.22725,0.0075,0.004717,0.004717


In [None]:
# Uncomment this to save the results
# results.to_pickle(f'robust_ei_optimal_lambda_tradeoff_{ei_proxy.lower()}_synthetic_5crossval.pkl')

In [None]:
# Compute the pareto frontier
results_pareto = pd.DataFrame()
for alpha in results['alpha'].unique():
    test_results_alpha = results[results['alpha'] == alpha]
    mask = pareto_frontier(test_results_alpha['loss_mean'], test_results_alpha['ei_disparity_mean'])
    results_alpha_pareto = test_results_alpha.iloc[mask]
    results_pareto = pd.concat((results_pareto, results_alpha_pareto.sort_values('ei_disparity_mean')))

In [None]:
fig = px.line(results, x='ei_disparity_mean', y='loss_mean', color='alpha', hover_data='lambda', markers=True)
fig.add_annotation(dict(font=dict(color='black',size=10),
                                        x=0.9,
                                        y=0.99,
                                        showarrow=False,
                                        text='dataset=synthetic',
                                        textangle=0,
                                        xanchor='left',
                                        xref="paper",
                                        yref="paper"))
fig.update_layout(title=dict(text='Fairness vs Loss Tradeoff', x=0.5))
fig.update_traces(marker=dict(size=3))
fig.show()