In [1]:
import torch
import pickle
import numpy as np
import pandas as pd
import plotly.express as px
from copy import deepcopy


from model import LR
from data import FairnessDataset, SyntheticDataset, GermanDataset
from ei_model_dev import FairBatch, Covariance
from ei_effort import Optimal_Effort, PGD_Effort
from ei_utils import *

In [12]:
dataset = SyntheticDataset(seed=0)

In [13]:
train_tensors, val_tensors, test_tensors = dataset.tensor(fold=0, z_blind=False)
train_dataset = FairnessDataset(*train_tensors, dataset.imp_feats)
val_dataset = FairnessDataset(*val_tensors, dataset.imp_feats)
test_dataset = FairnessDataset(*test_tensors, dataset.imp_feats)

In [20]:
train_dataset.Z.unique()
# sorted(list(set(train_dataset.Z)))
# dataset.sensitive_attrs

tensor([0., 1.])

In [10]:
dataset = SyntheticDataset(seed=0)

In [None]:
def append_res(d, acc, ei):
    d['accuracy'].append(acc)
    d['loss'].append(1-acc)
    d['ei_disparity'].append(ei)
        
def get_res(d, hp):
    res = {'alpha': [], 'lambda': [], 'delta': [], 'accuracy_mean': [], 'accuracy_std': [], 'loss_mean': [], 'loss_std': [], 'ei_disparity_mean': [], 'ei_disparity_std': []}
    res['alpha'].append(hp['alpha'])
    res['lambda'].append(hp['lambda'])
    res['delta'].append(hp['delta'])
    res['accuracy_mean'].append(np.mean(d['accuracy']))
    res['accuracy_std'].append(np.std(d['accuracy']))
    res['loss_mean'].append(np.mean(d['loss']))
    res['loss_std'].append(np.std(d['loss']))
    res['ei_disparity_mean'].append(np.mean(d['ei_disparity']))
    res['ei_disparity_std'].append(np.std(d['ei_disparity']))
    
    return res

In [None]:
def model_runner(ei_model, dataset, hp, seeds):
    tau = 0.5
    pga_iter = hp['pga_iter']
    metrics = {'accuracy': [], 'loss': [], 'ei_disparity': []}
    
    if hp['optimal_effort']:
        effort_model = Optimal_Effort(hp['delta'])
    else:
        effort_model = PGD_Effort(hp['delta'])
    
    for seed in seeds:
        train_tensors, val_tensors, test_tensors = dataset.tensor(fold=seed, z_blind=hp['z_blind'])
        train_dataset = FairnessDataset(*train_tensors, dataset.imp_feats)
        val_dataset = FairnessDataset(*val_tensors, dataset.imp_feats)
        test_dataset = FairnessDataset(*test_tensors, dataset.imp_feats)
        
        model = LR(num_features=train_dataset.X.shape[1])
        ei_m = ei_model(model, effort_model, pga_iter, tau)
        
        ei_m.train(
            train_dataset, 
            lamb=hp['lambda'],
            lr=hp['learning_rate'],
            alpha=hp['alpha']
            )
        
        Y_hat, Y_hat_max = ei_m.predict(test_dataset, hp['alpha'])
        test_acc, test_ei = model_performance(test_dataset.Y.detach().numpy(), test_dataset.Z.detach().numpy(), Y_hat, Y_hat_max, tau)
        append_res(metrics, test_acc, test_ei)
    
    results = get_res(metrics, hp)
    
    return results

In [None]:
def run_tradeoff(ei_model, dataset, hyper_params, seeds):
    hp = hyper_params.copy()
    results = pd.DataFrame()
    
    for alpha in hyper_params['alpha']:
        for lamb in hyper_params['lambda']:
            for delta in hyper_params['delta']:
                hp['alpha'] = alpha
                hp['lambda'] = lamb
                hp['delta'] = delta
                
                result = model_runner(ei_model, dataset, hp, seeds)
                results = pd.concat((results, pd.DataFrame(result)))
                print()
    
    return results

In [None]:
# Hyperparameters
hyper_params = {}
hyper_params['delta'] = [0.5]
hyper_params['alpha'] = [0., 0.1, 1.5]
# hyper_params['lambda'] = [0.88] # FairBatch lambda
hyper_params['lambda'] = 1-np.geomspace(0.001,0.999,20) # Covariance lambdas
hyper_params['learning_rate'] = 0.01
hyper_params['pga_iter'] = 20
hyper_params['z_blind'] = False
hyper_params['optimal_effort'] = True # True only for Synthetic Data

# 5-Fold Cross Validation
seeds = list(range(5))

# EI Model (select one)
# ei_model = FairBatch
ei_model = Covariance

# Run tradeoffs
results = run_tradeoff(ei_model, dataset, hyper_params, seeds)
results['alpha'] = results['alpha'].astype(str)

In [None]:
results

In [None]:
train_results = results[results['id'] == 'train']
val_results = results[results['id'] == 'val']
test_results = results[results['id'] == 'test']

In [None]:
test_results

In [None]:
test_results_pareto = pd.DataFrame()
for alpha in test_results['alpha'].unique():
    test_results_alpha = test_results[test_results['alpha'] == alpha]
    mask = pareto_frontier(test_results_alpha['loss_mean'], test_results_alpha['ei_disparity_mean'])
    results_alpha_pareto = test_results_alpha.iloc[mask]
    test_results_pareto = pd.concat((test_results_pareto, results_alpha_pareto.sort_values('ei_disparity_mean')))

In [None]:
px.line(test_results, x='ei_disparity_mean', y='loss_mean', color='alpha', hover_data='lambda', markers=True, animation_frame='delta')