In [1]:
import torch
import pickle
import warnings
import numpy as np
import pandas as pd
import plotly.express as px
from copy import deepcopy
from tqdm import tqdm
from typing import Iterable


from model import LR
from data import FairnessDataset, SyntheticDataset, GermanDataset, IncomeDataset
from ei_effort import Optimal_Effort, PGD_Effort
from ei_utils import *
from ei_model_test import EIModel, fair_batch_proxy, covariance_proxy

In [None]:
def get_wandb(model):
    for module in model.layers:
        if hasattr(module, 'weight'):
            weights = module.weight.data[0]
        if hasattr(module, 'bias'):
            bias = module.bias.data
            
    theta = torch.cat((weights, bias), 0)
    return theta.numpy().round(2)

In [None]:
def append_res(d, seed, lamb, train_alpha, alpha, acc, fair_loss, ei, ei_model):
    d['seed'].append(seed)
    d['lambda'].append(lamb)
    d['train_alpha'].append(train_alpha)
    d['alpha'].append(alpha)
    d['accuracy'].append(acc)
    d['loss'].append(1-acc)
    d['fair_loss'].append(fair_loss)
    d['ei_disparity'].append(ei)
    d['ei_model'].append(ei_model)

In [None]:
def model_runner(dataset: FairnessDataset, hp: dict, seeds: Iterable, results: dict):
    tau = 0.5
    
    if hp['optimal_effort']:
        effort = Optimal_Effort(hp['delta'])
    else:
        effort = PGD_Effort(hp['delta'])
    
    if hp['robust_training']:
        train_alpha = hp['alpha']
    else:
        train_alpha = 0.
    
    for seed in seeds:
        train_tensors, val_tensors, test_tensors = dataset.tensor(fold=seed, z_blind=hp['z_blind'])
        train_dataset = FairnessDataset(*train_tensors, dataset.imp_feats)
        val_dataset = FairnessDataset(*val_tensors, dataset.imp_feats)
        test_dataset = FairnessDataset(*test_tensors, dataset.imp_feats)
        
        model = LR(num_features=train_dataset.X.shape[1])
        ei_model = EIModel(
            model = model,
            proxy = hp['proxy'],
            effort = effort,
            tau = tau,
            warm_start = False)
        
        ei_model.train(
            train_dataset,
            lamb=hp['lambda'],
            alpha=train_alpha,
            lr=hp['learning_rate'],
            n_epochs=hp['n_epochs'],
            batch_size=1024,
            abstol=hp['pga_abstol']
            )
        
        Y_hat, Y_hat_max, fair_loss = ei_model.predict(test_dataset, alpha=hp['alpha'], abstol=hp['pga_abstol'])
        accuracy, ei_disparity = model_performance(test_dataset.Y.detach().numpy(), test_dataset.Z.detach().numpy(), Y_hat, Y_hat_max, tau)
        append_res(results, seed, hp['lambda'], train_alpha, hp['alpha'], accuracy, fair_loss.item(), ei_disparity, ei_model)

In [None]:
def run_tradeoff(dataset, hyper_params, seeds):
    hp = hyper_params.copy()
    results = pd.DataFrame()
    
    results = {'seed': [], 'lambda': [], 'train_alpha': [], 'alpha': [], 'accuracy': [], 'loss': [], 'fair_loss': [], 'ei_disparity': [], 'ei_model': []}
    for robust_training in hyper_params['robust_training']:
        for lamb in hyper_params['lambda']:
            for alpha in hyper_params['alpha']:
                hp['robust_training'] = robust_training
                hp['lambda'] = lamb
                hp['alpha'] = alpha
            
                model_runner(dataset, hp, seeds, results)
                print()
    
    return results

In [None]:
torch.manual_seed(0)

# Dataset
dataset = SyntheticDataset(num_samples=1000, seed=0)
# dataset = GermanDataset(seed=0)
# dataset = IncomeDataset(seed=0)

# Hyperparameters
hyper_params = {}
hyper_params['lambda'] = 1-np.geomspace(0.001, 0.999, 20)
hyper_params['alpha'] = [0., 0.5, 1.5]
hyper_params['delta'] = .5 # synthetic is 0.5, german is 1, 
hyper_params['learning_rate'] = 0.01
hyper_params['n_epochs'] = 100
hyper_params['proxy'] = covariance_proxy
hyper_params['pga_abstol'] = 1e-7
hyper_params['z_blind'] = False
hyper_params['optimal_effort'] = True
hyper_params['robust_training'] = [True]

seeds = range(2)

# Run tradeoffs
results = run_tradeoff(dataset, hyper_params, seeds)

In [None]:
df = pd.DataFrame(results)
df.sort_values(['lambda', 'alpha']).reset_index(drop=True)

In [None]:
df_avg = df.groupby(['lambda', 'alpha'], as_index=False).mean(True)
px.line(df_avg, x='lambda', y='ei_disparity', color='alpha', markers=True, title=f'{dataset.__class__.__name__}')

In [None]:
# df.to_pickle(f'tradeoff_robust_{dataset.__class__.__name__}_5cv.pkl')

In [6]:
results

NameError: name 'results' is not defined