In [1]:
import torch
import numpy as np
import pandas as pd
import plotly.express as px
from copy import deepcopy
from tqdm import tqdm
from typing import Iterable

from model import LR
from data import FairnessDataset, Dataset, SyntheticDataset, GermanDataset, IncomeDataset
from ei_effort import Optimal_Effort, PGD_Effort
from ei_utils import model_performance, pareto_frontier
from ei_model import EIModel, fair_batch_proxy, covariance_proxy

In [2]:
def append_res(d, seed, method, delta, lamb, train_alpha, alpha, pred_loss, fair_loss, accuracy, ei_disparity, ei_model):
    d['seed'].append(seed)
    d['method'].append(method)
    d['delta'].append(delta)
    d['lambda'].append(lamb)
    d['train_alpha'].append(train_alpha)
    d['alpha'].append(alpha)
    d['loss'].append((1-lamb)*pred_loss + lamb*fair_loss)
    d['pred_loss'].append(pred_loss)
    d['fair_loss'].append(fair_loss)
    d['accuracy'].append(accuracy)
    d['error'].append(1-accuracy)
    d['ei_disparity'].append(ei_disparity)
    d['ei_model'].append(ei_model)

In [3]:
def run_tradeoff(dataset: Dataset, hyper_params: dict, seeds: int, results: dict):
    
    tau = hyper_params['tau']
    delta = dataset.delta
    
    if hyper_params['optimal_effort']:
        effort = Optimal_Effort(delta)
    else:
        effort = PGD_Effort(delta)
    
    for seed in seeds:
        train_tensors, val_tensors, test_tensors = dataset.tensor(fold=seed, z_blind=hyper_params['z_blind'])
        train_dataset = FairnessDataset(*train_tensors, dataset.imp_feats)
        val_dataset = FairnessDataset(*val_tensors, dataset.imp_feats)
        test_dataset = FairnessDataset(*test_tensors, dataset.imp_feats)
        
        model = LR(num_features=train_dataset.X.shape[1])
        
        for lamb in hyper_params['lambda']:
            ei_model = EIModel(
                    model = deepcopy(model),
                    proxy = hyper_params['proxy'],
                    effort = effort,
                    tau = tau
                    )
                
            ei_model.train(
                train_dataset,
                lamb=lamb,
                alpha=0.,
                lr=hyper_params['learning_rate'],
                n_epochs=hyper_params['n_epochs'],
                # batch_size=1024,
                batch_size=len(train_dataset.X),
                abstol=hyper_params['pga_abstol']
                )
            
            Y_hat, Y_hat_max, pred_loss, fair_loss = ei_model.predict(test_dataset, alpha=0., abstol=hyper_params['pga_abstol'])
            accuracy, ei_disparity = model_performance(test_dataset.Y.detach().numpy(), test_dataset.Z.detach().numpy(), Y_hat, Y_hat_max, tau)
            append_res(results, seed, 'EI', delta, lamb, 0., 0., pred_loss, fair_loss, accuracy, ei_disparity, deepcopy(ei_model))
            
            for alpha in hyper_params['alpha']:
                Y_hat, Y_hat_max, pred_loss, fair_loss = ei_model.predict(test_dataset, alpha=alpha, abstol=hyper_params['pga_abstol'])
                accuracy, ei_disparity = model_performance(test_dataset.Y.detach().numpy(), test_dataset.Z.detach().numpy(), Y_hat, Y_hat_max, tau)
                append_res(results, seed, 'EI', delta, lamb, 0., alpha, pred_loss, fair_loss, accuracy, ei_disparity, deepcopy(ei_model))
                
                rei_model = EIModel(
                    model = deepcopy(ei_model.model),
                    proxy = hyper_params['proxy'],
                    effort = effort,
                    tau = tau
                    )
                
                rei_model.train(
                    train_dataset,
                    lamb=lamb,
                    alpha=alpha,
                    lr=hyper_params['learning_rate'],
                    n_epochs=hyper_params['n_epochs'],
                    # batch_size=1024,
                    batch_size=len(train_dataset.X),
                    abstol=hyper_params['pga_abstol']
                    )
                
                Y_hat_r, Y_hat_max_r, pred_loss_r, fair_loss_r = rei_model.predict(test_dataset, alpha=0., abstol=hyper_params['pga_abstol'])
                accuracy_r, rei_disparity = model_performance(test_dataset.Y.detach().numpy(), test_dataset.Z.detach().numpy(), Y_hat_r, Y_hat_max_r, tau)
                append_res(results, seed, 'REI', delta, lamb, alpha, 0., pred_loss_r, fair_loss_r, accuracy_r, rei_disparity, deepcopy(rei_model))
                
                Y_hat_r, Y_hat_max_r, pred_loss_r, fair_loss_r = rei_model.predict(test_dataset, alpha=alpha, abstol=hyper_params['pga_abstol'])
                accuracy_r, rei_disparity = model_performance(test_dataset.Y.detach().numpy(), test_dataset.Z.detach().numpy(), Y_hat_r, Y_hat_max_r, tau)
                append_res(results, seed, 'REI', delta, lamb, alpha, alpha, pred_loss_r, fair_loss_r, accuracy_r, rei_disparity, deepcopy(rei_model))
            print()

In [5]:
torch.manual_seed(0)

# ----- Dataset -----
dataset = SyntheticDataset(num_samples=20000, seed=0)
# dataset = GermanDataset(seed=0)
# dataset = IncomeDataset(num_samples=1000, seed=0)

# ----- Hyperparameters -----
hyper_params = {}
# hyper_params['lambda'] = 1-np.geomspace(0.001, 0.999, 20)
hyper_params['lambda'] = [0.99999]
hyper_params['alpha'] = [1.5]
hyper_params['tau'] = 0.5
hyper_params['learning_rate'] = 0.01
hyper_params['n_epochs'] = 500
hyper_params['proxy'] = covariance_proxy
hyper_params['pga_abstol'] = 20
hyper_params['z_blind'] = True
hyper_params['optimal_effort'] = True

seeds = range(1)

# ----- Run Experiment -----
results_xl = {'seed': [], 'method': [], 'delta': [], 'lambda': [], 'train_alpha': [], 'alpha': [], 'loss': [], 'pred_loss': [ ], 'fair_loss': [], 'accuracy': [], 'error': [], 'ei_disparity': [], 'ei_model': []}
run_tradeoff(dataset, hyper_params, seeds, results_xl)

Training [alpha=0.000; lambda=0.99999; delta=0.500]:  23%|[38;2;0;145;255m██▎       [0m| 117/500 [00:08<00:27, 14.14epochs/s]

In [None]:
df = pd.DataFrame(results_xl)
df['ei_model.theta'] = df['ei_model'].apply(lambda x: x.model.get_theta().numpy().round(3))
df['ei_model.theta_adv'] = df['ei_model'].apply(lambda x: x.model_adv.get_theta().numpy().round(3))
# df['lambda'] = df['lambda'].apply(lambda x: np.round(x, 4))

In [None]:
df[(df['alpha']==0) & (df['train_alpha']>=0)][['method', 'lambda', 'train_alpha', 'alpha', 'loss', 'pred_loss', 'fair_loss', 'error', 'ei_disparity', 'ei_model.theta', 'ei_model.theta_adv']].sort_values(['lambda', 'alpha', 'method'])

In [None]:
df[(df['alpha']>=0) & ((df['train_alpha']==0) | (df['train_alpha']==1.5))][['method', 'lambda', 'train_alpha', 'alpha', 'loss', 'pred_loss', 'fair_loss', 'error', 'ei_disparity', 'ei_model.theta', 'ei_model.theta_adv']].sort_values(['lambda', 'alpha', 'method'])

In [None]:
x_val = 'ei_disparity'
df_avg = df.groupby(['method', 'alpha', 'lambda'], as_index=False).mean(True).sort_values(['method', 'lambda', 'alpha'])
px.line(df_avg, y='error', x=x_val, markers=True, color='method', facet_col='alpha', hover_data=['lambda', 'seed', 'train_alpha'], title=f'{dataset.name}', height=700)

In [None]:
temp_pareto = df.copy()
df_pareto = pd.DataFrame()

for method in temp_pareto['method'].unique():
    for alpha in temp_pareto['alpha'].unique():            
        temp = temp_pareto.loc[(temp_pareto['method']==method) & (temp_pareto['alpha']==alpha)]
        mask = pareto_frontier(temp['error'], temp[x_val])
        df_pareto = pd.concat((df_pareto, temp.iloc[mask]))
df_pareto = df_pareto.sort_values(['alpha', 'error'], ascending=True)

px.line(df_pareto, y='error', x=x_val, markers=True, color='method', facet_col='alpha', hover_data=['lambda', 'seed', 'train_alpha'], title=f'{dataset.name}', height=700)

In [None]:
px.scatter(df_avg, x='lambda', y='fair_loss', facet_col='alpha', color='method').show()
px.scatter(df_avg, x='lambda', y='pred_loss', facet_col='alpha', color='method').show()
px.scatter(df_avg, x='lambda', y='loss', facet_col='alpha', color='method').show()
px.scatter(df_avg, x='lambda', y='error', facet_col='alpha', color='method').show()

In [None]:
s = 0
lamb = 0.999
alpha = 1.5
temp = df[(df['seed']==s) & (df['lambda']==lamb) & (df['alpha']==alpha)]
px.line(vars(temp.iloc[0]['ei_model'].train_history), y='pred_loss', markers=True, title=f'EI | lambda = {lamb} | alpha = {alpha}').show()
px.line(vars(temp.iloc[1]['ei_model'].train_history), y='pred_loss', markers=True, title=f'REI | lambda = {lamb} | alpha = {alpha}').show()

In [None]:
# df.to_pickle('tradeoff_synthetic_cov_5cv_500epochs.pkl')

In [None]:
fig = px.scatter(x=dataset.X['x1'], y=dataset.X['x2'], color=dataset.Y, facet_col=dataset.Z)

generate data with 2 groups that have equal # of negatives that are equidistant from theta.