In [43]:
import sys
sys.path.append('..')

In [44]:
import torch
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from copy import deepcopy
from torch.utils.data import DataLoader

from src.model import LR
from src.effort import Optimal_Effort
from src.data import FairnessDataset, SyntheticDataset, IncomeDataset
from src.methods_dev import covariance_proxy, fair_batch_proxy_bce, fair_batch_proxy_mse, kde_proxy
from src.utils import pload, pareto_frontier

In [45]:
default_layout = lambda width = 720, height = 540, color='#5d5d5d': dict(
    width = width,
    height = height,
    font=dict(
        family='Iosevka', 
        color=color
        ),
    title=dict(
        x=0.5,
        font=dict(size=17), 
        ),
    legend=dict(
        font=dict(size=10),
        ),
    )

In [38]:
torch.manual_seed(0)

dataset = SyntheticDataset(num_samples=20000, seed=0)
# dataset = IncomeDataset(num_samples=20000, seed=0, z_blind=True)
proxy = fair_batch_proxy_bce
results = pload(f'../results/error_disparity_tradeoff/lr_{dataset.name}_{proxy.__name__}.pkl')

In [39]:
params = results['params']
test_results = results['test_results']

In [40]:
columns = ['method', 'seed', 'lambda', 'alpha', 'loss', 'pred_loss', 'fair_loss', 'error', 'ei_disparity', 'rei_disparity']
df_results = pd.DataFrame(test_results).sort_values(['alpha', 'lambda'])

df_results['theta'] = df_results['ei_model'].apply(lambda model: model.model.get_theta().numpy().round(2)) 
df_results['theta_adv'] = df_results['ei_model'].apply(lambda model: model.model_adv.get_theta().numpy().round(2))

In [41]:
df_im = df_results[columns].groupby(by=['method', 'lambda', 'alpha'], as_index=False).mean()

In [42]:
x_val = 'fair_loss'
for alpha in params['alpha']:
    df_pareto = pd.DataFrame()
    for method in ['EI', 'REI']:
        temp = df_im[(df_im['method']==method) & (df_im['alpha']==alpha)].copy().reset_index(drop=True).sort_values(by=[x_val, 'error'], ascending=[True, True])
        mask = pareto_frontier(temp[x_val], temp['error'])
        df_pareto = pd.concat((df_pareto, temp.iloc[mask]))
        
    fig = px.line(df_pareto.sort_values(['method', 'alpha', x_val]), x=x_val, y='error', color='method', hover_data='lambda', facet_col='alpha', markers=True, color_discrete_sequence=['#1991E6', '#E66E19'])
    fig.update_layout(
        title_text = f'{dataset.name.capitalize()} Dataset | Gradient Descent | {params["proxy"].__name__}', 
        template = 'plotly_dark',
        **default_layout(color='white')
        )
    fig.show()

In [None]:
df_results[(df_results['method']=='REI') & (df_results['lambda']==0.9999)][columns]

In [None]:
train_tensors, val_tensors, test_tensors = dataset.tensor(fold=4)
train_dataset = FairnessDataset(*train_tensors, dataset.imp_feats)
val_dataset = FairnessDataset(*val_tensors, dataset.imp_feats)
test_dataset = FairnessDataset(*test_tensors, dataset.imp_feats)


print(f'# of 0 [True]: {torch.sum(test_dataset.Y == 0)} | Fraction: {np.round(torch.sum(test_dataset.Y == 0) / len(test_dataset.Y),2)}')
print(f'# of 1 [True]: {torch.sum(test_dataset.Y == 1)}')
print(f'# of 0 [Pred]: {torch.sum(df_results.iloc[69]['ei_model'].model(test_dataset.X).reshape(-1) < 0.5)}')
print(f'# of 1 [Pred]: {torch.sum(df_results.iloc[69]['ei_model'].model(test_dataset.X).reshape(-1) >= 0.5)}')
print(f'Total: {len(df_results.iloc[69]['ei_model'].model(test_dataset.X).reshape(-1))}')