In [1]:
import sys
sys.path.append('..')

In [2]:
import torch
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from copy import deepcopy
from tqdm import tqdm

from model import LR
from data import FairnessDataset, Dataset, SyntheticDataset
from ei_effort import Optimal_Effort
from ei_utils import model_performance, pdump
from ei_model import EIModel, fair_batch_proxy

In [3]:
default_layout = lambda width = 720, height = 540, color='#5d5d5d': dict(
    width = width,
    height = height,
    font=dict(
        family='Iosevka', 
        color=color
        ),
    title=dict(
        x=0.5,
        font=dict(size=17), 
        ),
    legend=dict(
        font=dict(size=10),
        )
    )

In [4]:
def append_res(d, seed, method, delta, lamb, train_alpha, alpha, pred_loss, fair_loss, accuracy, ei_disparity, rei_disparity, ei_model):
    d['seed'].append(seed)
    d['method'].append(method)
    d['delta'].append(delta)
    d['lambda'].append(lamb)
    d['train_alpha'].append(train_alpha)
    d['alpha'].append(alpha)
    d['loss'].append((1-lamb)*pred_loss + lamb*fair_loss)
    d['pred_loss'].append(pred_loss)
    d['fair_loss'].append(fair_loss)
    d['accuracy'].append(accuracy)
    d['error'].append(1-accuracy)
    d['ei_disparity'].append(ei_disparity)
    d['rei_disparity'].append(rei_disparity)
    d['ei_model'].append(ei_model)

In [5]:
def run_tradeoff(dataset: Dataset, params: dict, seeds: int, results: dict):
    
    tau = params['tau']
    delta = dataset.delta
    
    for seed in seeds:
        train_tensors, val_tensors, test_tensors = dataset.tensor(fold=seed)
        train_dataset = FairnessDataset(*train_tensors, dataset.imp_feats)
        val_dataset = FairnessDataset(*val_tensors, dataset.imp_feats)
        test_dataset = FairnessDataset(*test_tensors, dataset.imp_feats)
        
        model = LR(num_features=train_dataset.X.shape[1])
        
        for lamb in params['lambda']:
            ei_model = EIModel(
                    model = deepcopy(model).xavier_init(),
                    proxy = params['proxy'],
                    effort = params['effort'],
                    tau = params['tau']
                    )
                
            ei_model.train(
                train_dataset,
                lamb=lamb,
                alpha=0.,
                lr=params['learning_rate'],
                n_epochs=params['n_epochs'],
                batch_size=params['batch_size'],
                abstol=params['pga_abstol'],
                pga_n_iters=params['pga_n_iters']
                )
            
            Y_hat, Y_hat_max, pred_loss, fair_loss = ei_model.predict(test_dataset, alpha=0., abstol=params['pga_abstol'])
            accuracy, ei_disparity = model_performance(test_dataset.Y.detach().numpy(), test_dataset.Z[(Y_hat<tau)].detach().numpy(), Y_hat, Y_hat_max, tau)
            
            for alpha in params['alpha']:
                Y_hat, Y_hat_max, pred_loss, fair_loss = ei_model.predict(test_dataset, alpha=alpha, abstol=params['pga_abstol'], pga_n_iters=params['pga_n_iters'])
                accuracy, rei_disparity = model_performance(test_dataset.Y.detach().numpy(), test_dataset.Z[(Y_hat<tau)].detach().numpy(), Y_hat, Y_hat_max, tau)
                append_res(results, seed, 'EI', delta, lamb, 0., alpha, pred_loss, fair_loss, accuracy, ei_disparity, rei_disparity, deepcopy(ei_model))
                
                rei_model = EIModel(
                    model = deepcopy(model),
                    proxy = params['proxy'],
                    effort = params['effort'],
                    tau = params['tau']
                    )
                
                rei_model.train(
                    train_dataset,
                    lamb=lamb,
                    alpha=alpha,
                    lr=params['learning_rate'],
                    n_epochs=params['n_epochs'],
                    batch_size=params['batch_size'],
                    abstol=params['pga_abstol'],
                    pga_n_iters=params['pga_n_iters']
                    )
                
                Y_hat, Y_hat_max, pred_loss, fair_loss = rei_model.predict(test_dataset, alpha=0., abstol=params['pga_abstol'])
                accuracy, ei_disparity = model_performance(test_dataset.Y.detach().numpy(), test_dataset.Z[(Y_hat<tau)].detach().numpy(), Y_hat, Y_hat_max, tau)
                
                Y_hat_r, Y_hat_max_r, pred_loss_r, fair_loss_r = rei_model.predict(test_dataset, alpha=alpha, abstol=params['pga_abstol'], pga_n_iters=params['pga_n_iters'])
                accuracy_r, rei_disparity = model_performance(test_dataset.Y.detach().numpy(), test_dataset.Z[(Y_hat<tau)].detach().numpy(), Y_hat_r, Y_hat_max_r, tau)
                append_res(results, seed, 'REI', delta, lamb, alpha, alpha, pred_loss_r, fair_loss_r, accuracy_r, ei_disparity, rei_disparity, deepcopy(rei_model))
            print()

In [6]:
torch.manual_seed(0)

# ----- Dataset -----
dataset = SyntheticDataset(num_samples=1000, seed=0)

# ----- Hyperparameters -----
params = {}
params['lambda'] = 1-np.geomspace(0.001, 0.999, 20)
params['alpha'] = [1.]
params['tau'] = 0.5
params['learning_rate'] = 0.001
params['n_epochs'] = 500
params['batch_size'] = 64
params['proxy'] = fair_batch_proxy
params['pga_abstol'] = 1e-7
params['pga_n_iters'] = 50
params['effort'] = Optimal_Effort(dataset.delta)

seeds = range(1)

# ----- Run Experiment -----
results_xl = {'seed': [], 'method': [], 'delta': [], 'lambda': [], 'train_alpha': [], 'alpha': [], 'loss': [], 'pred_loss': [ ], 'fair_loss': [], 'accuracy': [], 'error': [], 'ei_disparity': [], 'rei_disparity': [], 'ei_model': []}
run_tradeoff(dataset, params, seeds, results_xl)

Training [alpha=0.000; lambda=0.99900; delta=0.500]: 100%|[38;2;0;145;255m██████████[0m| 100/100 [00:01<00:00, 74.11epochs/s]


0.05738085135817528
tensor(0.0005, grad_fn=<AbsBackward0>)


Training [alpha=1.000; lambda=0.99900; delta=0.500]:  65%|[38;2;0;145;255m██████▌   [0m| 65/100 [00:06<00:03, 10.16epochs/s]


KeyboardInterrupt: 

In [None]:
pdump(f'../results/gradientdescent/lr_synthetic_fb_alpha{params["alpha"][0]}_d1_sqloss.pkl', results_xl) 

In [None]:
columns = ['method', 'lambda', 'alpha', 'loss', 'pred_loss', 'fair_loss', 'error', 'ei_disparity', 'rei_disparity', 'theta']
df_im = pd.DataFrame(results_xl)
df_im['theta'] = df_im['ei_model'].apply(lambda model: model.model.get_theta().numpy().round(2))
df_im['theta_adv'] = df_im['ei_model'].apply(lambda model: model.model_adv.get_theta().numpy().round(2))
for i in range(0, len(df_im), 2):
    display(df_im[columns].iloc[i:i+2].style.highlight_min(subset=['fair_loss', 'error', 'ei_disparity', 'rei_disparity'], color='#D35400'))

In [None]:
for i in range(len(df_im)):
    method = df_im.iloc[i]['method']
    lamb = df_im.iloc[i]['lambda']
    alpha = df_im.iloc[i]['alpha']
    fig = px.scatter(vars(df_im['ei_model'].iloc[i].train_history), y='total_loss', color='fair_loss')
    fig.update_layout(
        title_text = f'Synthetic Dataset | {method} | alpha: {alpha} | lambda: {lamb}',
        # template='plotly_dark',
        **default_layout(width=1000, height=450), 
                      )
    fig.show()

In [None]:
fig = px.scatter(df_im[(df_im['alpha']>0)].sort_values(['method', 'lambda']), x='lambda', y='fair_loss', hover_data='lambda', color='rei_disparity', facet_col='method', color_continuous_scale=['lightblue', 'blue', 'purple', 'orange', 'red'])

fig.update_layout(
    title_text = f'{dataset.name.capitalize()} Dataset | Gradient Descent | alpha: {params["alpha"][0]}', 
    **default_layout(np.inf))
fig.show()


In [None]:
fig = px.scatter(df_im.sort_values('fair_loss'), x='fair_loss', y='error', color='method')
fig.update_layout(
    **default_layout()
    )
fig.show()