In [1]:
import torch
import pickle
import numpy as np
import pandas as pd
import plotly.express as px
from copy import deepcopy


from model import LR
from data import SyntheticDataset, FairnessDataset
from ei_model_dev import FairBatch
from ei_effort import Optimal_Effort, PGD_Effort
from ei_utils import *

In [2]:
dataset = SyntheticDataset(seed=0)

In [3]:
def append_res(d, acc, ei):
    d['accuracy'].append(acc)
    d['ei_disparity'].append(ei)
        
def get_res(d, id, hp):
    res = {'id': [], 'alpha': [], 'lambda': [], 'delta': [], 'accuracy_mean': [], 'accuracy_std': [], 'ei_disparity_mean': [], 'ei_disparity_std': []}
    res['id'].append(id)
    res['alpha'].append(hp['alpha'])
    res['lambda'].append(hp['lambda'])
    res['delta'].append(hp['delta'])
    res['accuracy_mean'].append(np.mean(d['accuracy']))
    res['accuracy_std'].append(np.std(d['accuracy']))
    res['ei_disparity_mean'].append(np.mean(d['ei_disparity']))
    res['ei_disparity_std'].append(np.std(d['ei_disparity']))
    
    return res

def get_model(models):
    weights = []
    bias = []
    for model in models:
        for module in model.layers:
            if hasattr(module, 'weight'):
                weights.append(module.weight.data)
            if hasattr(module, 'bias'):
                bias.append(module.bias.data)
            
    weights = torch.cat(weights).mean(dim=0)
    bias = torch.cat(bias).mean(dim=0)
    return weights, bias

In [4]:
def lr_fb_model_runner(dataset, hp, seeds):
    tau = 0.5
    train_metrics = {'alpha': [], 'accuracy': [], 'ei_disparity': []}
    val_metrics = deepcopy(train_metrics)
    test_metrics = deepcopy(train_metrics)
    ei_models = []
    
    if hp['optimal_effort']:
        effort = Optimal_Effort(hp['delta'])
    else:
        effort = PGD_Effort(hp['delta'])
    
    for seed in seeds:
        train_tensors, val_tensors, test_tensors = dataset.tensor(fold=seed, z_blind=hp['z_blind'])
        train_dataset = FairnessDataset(*train_tensors, dataset.imp_feats)
        val_dataset = FairnessDataset(*val_tensors, dataset.imp_feats)
        test_dataset = FairnessDataset(*test_tensors, dataset.imp_feats)
        
        model = LR(num_features=train_dataset.X.shape[1])
        ei_model = FairBatch(model, effort, tau)
        
        ei_model.train(
            train_dataset, 
            sensitive_attrs=dataset.sensitive_attrs,
            lamb=hp['lambda'],
            lr=hp['learning_rate'],
            alpha=hp['alpha'],
            )
 
        train_acc, train_ei = ei_model.evaluate(train_dataset, hp['alpha'])
        val_acc, val_ei = ei_model.evaluate(val_dataset, hp['alpha'])
        test_acc, test_ei = ei_model.evaluate(test_dataset, hp['alpha'])
        
        append_res(train_metrics, train_acc, train_ei)
        append_res(val_metrics, val_acc, val_ei)
        append_res(test_metrics, test_acc, test_ei)
        
        ei_models.append(ei_model.model)
    
    res_train = get_res(train_metrics, 'train', hp)
    res_val = get_res(val_metrics, 'val', hp)
    res_test = get_res(test_metrics, 'test', hp)
    
    
    return res_train, res_val, res_test, ei_models

In [14]:
def fb_tradeoff(dataset, hyper_params, seeds):
    hp = hyper_params.copy()
    results = pd.DataFrame()
    ei_models = []
    
    for alpha in hyper_params['alpha']:
        for lamb in hyper_params['lambda']:
            for delta in hyper_params['delta']:
                hp['alpha'] = alpha
                hp['lambda'] = lamb
                hp['delta'] = delta
                
                train_res, val_res, test_res, models = lr_fb_model_runner(dataset, hp, seeds)
                results = pd.concat((results, pd.DataFrame(train_res), pd.DataFrame(val_res), pd.DataFrame(test_res)))
                ei_models.extend(models)
        print()
    
    return results, ei_models

In [6]:
hyper_params = {}
hyper_params['learning_rate'] = 0.01
hyper_params['delta'] = [1.]
hyper_params['alpha'] = [0., 0.1, 1., 3.]
hyper_params['lambda'] = np.linspace(0,1., 11).round(3)
hyper_params['z_blind'] = False
hyper_params['optimal_effort'] = False
seeds = list(range(1))

results, ei_models = fb_tradeoff(dataset, hyper_params, seeds)
results['loss_mean'] = 1 - results['accuracy_mean']
results['alpha'] = results['alpha'].astype(str)

Training [alpha=0.00; lambda=0.00; delta=1.00]: 100%|[38;2;0;145;255m██████████[0m| 100/100 [00:12<00:00,  8.01epochs/s]
Training [alpha=0.00; lambda=0.50; delta=1.00]: 100%|[38;2;0;145;255m██████████[0m| 100/100 [00:12<00:00,  8.05epochs/s]
Training [alpha=0.00; lambda=1.00; delta=1.00]: 100%|[38;2;0;145;255m██████████[0m| 100/100 [00:09<00:00, 11.01epochs/s]





Training [alpha=0.10; lambda=0.00; delta=1.00]: 100%|[38;2;0;145;255m██████████[0m| 100/100 [01:36<00:00,  1.03epochs/s]
Training [alpha=0.10; lambda=0.50; delta=1.00]: 100%|[38;2;0;145;255m██████████[0m| 100/100 [01:35<00:00,  1.05epochs/s]
Training [alpha=0.10; lambda=1.00; delta=1.00]: 100%|[38;2;0;145;255m██████████[0m| 100/100 [00:30<00:00,  3.26epochs/s]





Training [alpha=1.00; lambda=0.00; delta=1.00]: 100%|[38;2;0;145;255m██████████[0m| 100/100 [01:40<00:00,  1.01s/epochs]
Training [alpha=1.00; lambda=0.50; delta=1.00]: 100%|[38;2;0;145;255m██████████[0m| 100/100 [01:41<00:00,  1.02s/epochs]
Training [alpha=1.00; lambda=1.00; delta=1.00]: 100%|[38;2;0;145;255m██████████[0m| 100/100 [00:23<00:00,  4.19epochs/s]





In [7]:
results

Unnamed: 0,id,alpha,lambda,delta,accuracy_mean,accuracy_std,ei_disparity_mean,ei_disparity_std,loss_mean
0,train,0.0,0.0,1.0,0.784531,0.0,0.070793,0.0,0.215469
0,val,0.0,0.0,1.0,0.793438,0.0,0.07779,0.0,0.206562
0,test,0.0,0.0,1.0,0.78325,0.0,0.075344,0.0,0.21675
0,train,0.0,0.5,1.0,0.742266,0.0,0.002744,0.0,0.257734
0,val,0.0,0.5,1.0,0.748125,0.0,0.003736,0.0,0.251875
0,test,0.0,0.5,1.0,0.738,0.0,0.020123,0.0,0.262
0,train,0.0,1.0,1.0,0.381016,0.0,0.0,0.0,0.618984
0,val,0.0,1.0,1.0,0.365,0.0,0.0,0.0,0.635
0,test,0.0,1.0,1.0,0.39275,0.0,0.0,0.0,0.60725
0,train,0.1,0.0,1.0,0.784844,0.0,0.068893,0.0,0.215156


In [8]:
train_results = results[results['id'] == 'train']
val_results = results[results['id'] == 'val']
test_results = results[results['id'] == 'test']

In [9]:
test_results

Unnamed: 0,id,alpha,lambda,delta,accuracy_mean,accuracy_std,ei_disparity_mean,ei_disparity_std,loss_mean
0,test,0.0,0.0,1.0,0.78325,0.0,0.075344,0.0,0.21675
0,test,0.0,0.5,1.0,0.738,0.0,0.020123,0.0,0.262
0,test,0.0,1.0,1.0,0.39275,0.0,0.0,0.0,0.60725
0,test,0.1,0.0,1.0,0.78325,0.0,0.073361,0.0,0.21675
0,test,0.1,0.5,1.0,0.78325,0.0,0.073361,0.0,0.21675
0,test,0.1,1.0,1.0,0.60725,0.0,0.0,0.0,0.39275
0,test,1.0,0.0,1.0,0.78325,0.0,0.073361,0.0,0.21675
0,test,1.0,0.5,1.0,0.78325,0.0,0.073361,0.0,0.21675
0,test,1.0,1.0,1.0,0.60725,0.0,0.0,0.0,0.39275


In [11]:
test_results_pareto = pd.DataFrame()
for alpha in test_results['alpha'].unique():
    test_results_alpha = test_results[test_results['alpha'] == alpha]
    mask = pareto_frontier(test_results_alpha['loss_mean'], test_results_alpha['ei_disparity_mean'])
    results_alpha_pareto = test_results_alpha.iloc[mask]
    test_results_pareto = pd.concat((test_results_pareto, results_alpha_pareto.sort_values('ei_disparity_mean')))

In [13]:
px.line(test_results, x='ei_disparity_mean', y='loss_mean', color='alpha', hover_data='lambda', markers=True, animation_frame='delta')