In [1]:
import torch
import pickle
import numpy as np
import pandas as pd
import plotly.express as px
from copy import deepcopy


from model import LR
from data import SyntheticDataset, FairnessDataset
from ei_model_dev import FairBatch, Covariance
from ei_effort import Optimal_Effort, PGD_Effort
from ei_utils import *

In [4]:
set(torch.tensor([0,0,1,0]).numpy())

{0, 1}

In [3]:
dataset = SyntheticDataset(seed=0)

In [4]:
def append_res(d, acc, ei):
    d['accuracy'].append(acc)
    d['loss'].append(1-acc)
    d['ei_disparity'].append(ei)
        
def get_res(d, hp):
    res = {'alpha': [], 'lambda': [], 'delta': [], 'accuracy_mean': [], 'accuracy_std': [], 'loss_mean': [], 'loss_std': [], 'ei_disparity_mean': [], 'ei_disparity_std': []}
    res['alpha'].append(hp['alpha'])
    res['lambda'].append(hp['lambda'])
    res['delta'].append(hp['delta'])
    res['accuracy_mean'].append(np.mean(d['accuracy']))
    res['accuracy_std'].append(np.std(d['accuracy']))
    res['loss_mean'].append(np.mean(d['loss']))
    res['loss_std'].append(np.std(d['loss']))
    res['ei_disparity_mean'].append(np.mean(d['ei_disparity']))
    res['ei_disparity_std'].append(np.std(d['ei_disparity']))
    
    return res

In [5]:
def model_runner(ei_model, dataset, hp, seeds):
    tau = 0.5
    pga_iter = hp['pga_iter']
    metrics = {'accuracy': [], 'loss': [], 'ei_disparity': []}
    
    if hp['optimal_effort']:
        effort = Optimal_Effort(hp['delta'])
    else:
        effort = PGD_Effort(hp['delta'])
    
    for seed in seeds:
        train_tensors, val_tensors, test_tensors = dataset.tensor(fold=seed, z_blind=hp['z_blind'])
        train_dataset = FairnessDataset(*train_tensors, dataset.imp_feats)
        val_dataset = FairnessDataset(*val_tensors, dataset.imp_feats)
        test_dataset = FairnessDataset(*test_tensors, dataset.imp_feats)
        
        model = LR(num_features=train_dataset.X.shape[1])
        ei_m = ei_model(model, effort, pga_iter, tau)
        
        ei_m.train(
            train_dataset, 
            lamb=hp['lambda'],
            lr=hp['learning_rate'],
            alpha=0.
            )
        
        Y_hat, Y_hat_max = ei_m.predict(test_dataset, hp['alpha'])
        test_acc, test_ei = model_performance(test_dataset.Y.detach().numpy(), test_dataset.Z.detach().numpy(), Y_hat, Y_hat_max, tau)
        append_res(metrics, test_acc, test_ei)
    
    results = get_res(metrics, hp)
    
    return results

In [6]:
def run_tradeoff(ei_model, dataset, hyper_params, seeds):
    hp = hyper_params.copy()
    results = pd.DataFrame()
    
    for alpha in hyper_params['alpha']:
        for lamb in hyper_params['lambda']:
            for delta in hyper_params['delta']:
                hp['alpha'] = alpha
                hp['lambda'] = lamb
                hp['delta'] = delta
                
                result = model_runner(ei_model, dataset, hp, seeds)
                results = pd.concat((results, pd.DataFrame(result)))
                print()
    
    return results

In [7]:
# Hyperparameters
hyper_params = {}
hyper_params['delta'] = [0.5]
hyper_params['alpha'] = [0., 0.1, 1.5]
hyper_params['learning_rate'] = 0.01
hyper_params['pga_iter'] = 2
hyper_params['z_blind'] = False
hyper_params['optimal_effort'] = True # True only for Synthetic Data

# 5-Fold Cross Validation
seeds = list(range(5))

# EI Proxy (set ei_proxy to FairBatch or Covariance)
ei_proxy = 'Covariance'

if ei_proxy == 'FairBatch':
    ei_model = FairBatch
    hyper_params['lambda'] = np.linspace(0., 0.25, 10) # FairBatch lambdas
    # hyper_params['lambda'] = [0.88] # lambda value that minimizes ei for FairBatch
elif ei_proxy == 'Covariance':
    ei_model = Covariance
    # hyper_params['lambda'] = 1-np.geomspace(0.001,0.999, 20) # Covariance lambdas
    hyper_params['lambda'] = [0.] # lambda value that minimizes ei for Covariance

# Run tradeoffs
results = run_tradeoff(ei_model, dataset, hyper_params, seeds)
results['alpha'] = results['alpha'].astype(str)

Training [alpha=0.0000; lambda=0.0000; delta=0.5000]: 100%|[38;2;0;145;255m██████████[0m| 100/100 [00:05<00:00, 17.05epochs/s]
Training [alpha=0.0000; lambda=0.0000; delta=0.5000]: 100%|[38;2;0;145;255m██████████[0m| 100/100 [00:05<00:00, 17.67epochs/s]
Training [alpha=0.0000; lambda=0.0000; delta=0.5000]: 100%|[38;2;0;145;255m██████████[0m| 100/100 [00:05<00:00, 17.65epochs/s]
Training [alpha=0.0000; lambda=0.0000; delta=0.5000]: 100%|[38;2;0;145;255m██████████[0m| 100/100 [00:05<00:00, 18.06epochs/s]
Training [alpha=0.0000; lambda=0.0000; delta=0.5000]: 100%|[38;2;0;145;255m██████████[0m| 100/100 [00:05<00:00, 18.08epochs/s]





Training [alpha=0.0000; lambda=0.0000; delta=0.5000]: 100%|[38;2;0;145;255m██████████[0m| 100/100 [00:05<00:00, 17.02epochs/s]
Training [alpha=0.0000; lambda=0.0000; delta=0.5000]: 100%|[38;2;0;145;255m██████████[0m| 100/100 [00:05<00:00, 17.24epochs/s]
Training [alpha=0.0000; lambda=0.0000; delta=0.5000]: 100%|[38;2;0;145;255m██████████[0m| 100/100 [00:05<00:00, 17.31epochs/s]
Training [alpha=0.0000; lambda=0.0000; delta=0.5000]: 100%|[38;2;0;145;255m██████████[0m| 100/100 [00:05<00:00, 17.44epochs/s]
Training [alpha=0.0000; lambda=0.0000; delta=0.5000]: 100%|[38;2;0;145;255m██████████[0m| 100/100 [00:05<00:00, 17.93epochs/s]





Training [alpha=0.0000; lambda=0.0000; delta=0.5000]: 100%|[38;2;0;145;255m██████████[0m| 100/100 [00:05<00:00, 17.84epochs/s]
Training [alpha=0.0000; lambda=0.0000; delta=0.5000]: 100%|[38;2;0;145;255m██████████[0m| 100/100 [00:05<00:00, 17.41epochs/s]
Training [alpha=0.0000; lambda=0.0000; delta=0.5000]: 100%|[38;2;0;145;255m██████████[0m| 100/100 [00:05<00:00, 17.80epochs/s]
Training [alpha=0.0000; lambda=0.0000; delta=0.5000]: 100%|[38;2;0;145;255m██████████[0m| 100/100 [00:05<00:00, 17.97epochs/s]
Training [alpha=0.0000; lambda=0.0000; delta=0.5000]: 100%|[38;2;0;145;255m██████████[0m| 100/100 [00:05<00:00, 17.99epochs/s]







In [8]:
results[['alpha', 'lambda', 'delta', 'loss_mean', 'loss_std', 'ei_disparity_mean', 'ei_disparity_std']]

Unnamed: 0,alpha,lambda,delta,loss_mean,loss_std,ei_disparity_mean,ei_disparity_std
0,0.0,0.0,0.5,0.2132,0.002581,0.131804,0.012986
0,0.1,0.0,0.5,0.21345,0.002477,0.134903,0.013098
0,1.5,0.0,0.5,0.21325,0.002603,0.135201,0.013667


In [None]:
# Uncomment this to save the results
results.to_pickle(f'nonrobust_ei_optimal_lambda_tradeoff_{ei_proxy.lower()}_synthetic_5crossval.pkl')

In [None]:
# Compute the pareto frontier
results_pareto = pd.DataFrame()
for alpha in results['alpha'].unique():
    test_results_alpha = results[results['alpha'] == alpha]
    mask = pareto_frontier(test_results_alpha['loss_mean'], test_results_alpha['ei_disparity_mean'])
    results_alpha_pareto = test_results_alpha.iloc[mask]
    results_pareto = pd.concat((results_pareto, results_alpha_pareto.sort_values('ei_disparity_mean')))

In [None]:
fig = px.line(results, x='ei_disparity_mean', y='loss_mean', color='alpha', hover_data='lambda', markers=True)
fig.add_annotation(dict(font=dict(color='black',size=10),
                                        x=0.9,
                                        y=0.99,
                                        showarrow=False,
                                        text='dataset=synthetic',
                                        textangle=0,
                                        xanchor='left',
                                        xref="paper",
                                        yref="paper"))
fig.update_layout(title=dict(text='Fairness vs Loss Tradeoff', x=0.5))
fig.update_traces(marker=dict(size=3))
fig.show()