In [1]:
import torch
import numpy as np
import pandas as pd
import plotly.express as px
from copy import deepcopy


from model import LR
from data import SyntheticDataset, FairnessDataset
from ei_model_dev import FairBatch
from ei_effort import Optimal_Effort, PGD_Effort
from ei_utils import pareto_frontier

In [2]:
dataset = SyntheticDataset(seed=0)

In [3]:
def append_res(d, alpha, acc, ei):
	d['alpha'].append(alpha)
	d['accuracy'].append(acc)
	d['ei_disparity'].append(ei)
        
def get_res(d, id, hp):
	df = pd.DataFrame(d)
	res = {'id': [], 'lambda': [], 'delta': [], 'alpha': [], 'accuracy_mean': [], 'accuracy_std': [], 'ei_disparity_mean': [], 'ei_disparity_std': []}
	for alpha in hp['alpha']:
		df_alpha = df[df['alpha'] == alpha]
		res['id'].append(id)
		res['lambda'].append(hp['lambda'])
		res['delta'].append(hp['delta'])
		res['alpha'].append(alpha)
		res['accuracy_mean'].append(np.mean(df_alpha['accuracy']))
		res['accuracy_std'].append(np.std(df_alpha['accuracy']))
		res['ei_disparity_mean'].append(np.mean(df_alpha['ei_disparity']))
		res['ei_disparity_std'].append(np.std(df_alpha['ei_disparity']))

	return res

In [4]:
def lr_fb_model_runner(dataset, hp, seeds):
    train_metrics = {'alpha': [], 'accuracy': [], 'ei_disparity': []}
    val_metrics = deepcopy(train_metrics)
    test_metrics = deepcopy(train_metrics)
    ei_models = []
    
    if hp['optimal_effort']:
        effort = Optimal_Effort(hp['delta'])
    else:
        effort = PGD_Effort(hp['delta'])
    
    for seed in seeds:
        train_tensors, val_tensors, test_tensors = dataset.tensor(fold=seed, z_blind=hp['z_blind'])
        train_dataset = FairnessDataset(*train_tensors, dataset.imp_feats)
        val_dataset = FairnessDataset(*val_tensors, dataset.imp_feats)
        test_dataset = FairnessDataset(*test_tensors, dataset.imp_feats)
        
        model = LR(num_features=train_dataset.X.shape[1])
        ei_model = FairBatch(model, effort)
        
        for alpha in hp['alpha']:
            
            ei_model.train(
                train_dataset, 
                sensitive_attrs=dataset.sensitive_attrs,
                lamb=hp['lambda'],
                lr=hp['learning_rate'],
                alpha=alpha
                )
        
            train_acc, train_ei = ei_model.eval(train_dataset, alpha, dataset.sensitive_attrs)
            val_acc, val_ei = ei_model.eval(val_dataset, alpha, dataset.sensitive_attrs)
            test_acc, test_ei = ei_model.eval(test_dataset, alpha, dataset.sensitive_attrs)
        
            append_res(train_metrics, alpha, train_acc, train_ei)
            append_res(val_metrics, alpha, val_acc, val_ei)
            append_res(test_metrics, alpha, test_acc, test_ei)
		
        ei_models.append(ei_model.model)
    
    res_train = get_res(train_metrics, 'train', hp)
    res_val = get_res(val_metrics, 'val', hp)
    res_test = get_res(test_metrics, 'test', hp)
    
    return res_train, res_val, res_test, ei_models

In [5]:
def fb_tradeoff(dataset, hp_test, seeds):
    hp = hp_test.copy()
    result = pd.DataFrame()
    ei_models = []
    
    for lamb in hp_test['lambda']:
        for delta in hp_test['delta']:
            hp['lambda'] = lamb
            hp['delta'] = delta
            
            train, val, test, models = lr_fb_model_runner(dataset, hp, seeds)
            result = pd.concat((result, pd.DataFrame(train), pd.DataFrame(val), pd.DataFrame(test)))
            ei_models.extend(models)
            print()
    
    return result, ei_models

In [6]:
hyper_params = {}
hyper_params['learning_rate'] = 0.01
hyper_params['delta'] = [0.5]
hyper_params['alpha'] = [0., 0.25, 1.4]
hyper_params['lambda'] = np.linspace(0.,0.25,10).round(3)
hyper_params['z_blind'] = False
hyper_params['optimal_effort'] = False
seeds = list(range(1))

results, ei_models = fb_tradeoff(dataset, hyper_params, seeds)
results['loss_mean'] = 1 - results['accuracy_mean']
results['alpha'] = results['alpha'].astype(str)

TypeError: unsupported format string passed to list.__format__

In [None]:
results.head()

In [None]:
train_results = results[results['id'] == 'train']
val_results = results[results['id'] == 'val']
test_results = results[results['id'] == 'test']

In [None]:
test_results_pareto = pd.DataFrame()
for alpha in test_results['alpha'].unique():
    test_results_alpha = test_results[test_results['alpha'] == alpha]
    mask = pareto_frontier(test_results_alpha['loss_mean'], test_results_alpha['ei_disparity_mean'])
    results_alpha_pareto = test_results_alpha.iloc[mask]
    test_results_pareto = pd.concat((test_results_pareto, results_alpha_pareto.sort_values('ei_disparity_mean')))

In [None]:
px.line(test_results_pareto, x='ei_disparity_mean', y='loss_mean', color='alpha', hover_data='lambda', markers=True)