In [1]:
import sys
sys.path.append('..')

In [3]:
import tqdm
import torch
import pickle
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from copy import deepcopy

from src.model import LR
from src.utils import model_performance
from src.effort import Optimal_Effort, PGD_Effort
from src.methods import fair_batch_proxy
from src.data import SyntheticDataset

In [9]:
torch.manual_seed(0)

# ----- Dataset -----
dataset = SyntheticDataset(num_samples=1000, seed=0)

# ----- Hyperparameters -----
params = {}
params['lambda'] = [0.99]
params['alpha'] = [1.]
params['tau'] = 0.5
params['proxy'] = fair_batch_proxy

In [45]:
with open(f'../results/gridsearch_lr_synthetic_fb_alpha{params["alpha"][0]}_lambda{params["lambda"][0]}_d3.pkl', 'rb') as f:
    results_bce = pickle.load(f)
    
with open(f'../results/gridsearch_lr_synthetic_fb_alpha{params["alpha"][0]}_lambda{params["lambda"][0]}_d3_sqloss.pkl', 'rb') as f:
    results_sq = pickle.load(f)

In [46]:
df_bce = pd.DataFrame(results_bce)
df_bce[['theta', 'theta_adv']] = df_bce[['model', 'model_adv']].map(lambda model: model.get_theta().numpy().round(4))

df_results_bce = pd.DataFrame()
for lamb in params['lambda']:
    for method in ['EI', 'REI']:
        for alpha in df_bce['alpha'].sort_values().unique():
            if method =='EI' and alpha==0.:
                temp = df_bce[(df_bce['lambda']==lamb) & (df_bce['method']=='EI') & (df_bce['alpha']==0)]
                df_results_bce = pd.concat((df_results_bce, temp.iloc[[int(temp['loss'].argmin())]]))
                theta_ei = temp.iloc[[int(temp['loss'].argmin())]]['theta'].iloc[0]
            elif method == 'EI' and alpha > 0:
                temp = df_bce[(df_bce['lambda']==lamb) & (df_bce['method']=='EI') & (df_bce['alpha']==alpha) & (df_bce['theta'].astype(str)==str(theta_ei))]
                df_results_bce = pd.concat((df_results_bce, temp.iloc[[int(temp['loss'].argmin())]]))
            elif method == 'REI' and alpha > 0:
                temp = df_bce[(df_bce['lambda']==lamb) & (df_bce['method']=='REI') & (df_bce['alpha']==alpha) & (df_bce['fair_loss']!=0)]
                df_results_bce = pd.concat((df_results_bce, temp.iloc[[int(temp['loss'].argmin())]]))
            else:
                continue

columns = ['method', 'lambda', 'alpha', 'loss', 'pred_loss', 'fair_loss', 'error', 'disparity', 'theta', 'theta_adv']
print(f'[{dataset.name.capitalize()} Dataset][{params["proxy"].__name__} | BCE Loss] Grid Search')
for i in range(0, len(df_results_bce), 3):
    display(df_results_bce[columns].iloc[i:i+3].style.highlight_min(subset=['fair_loss', 'error', 'disparity'], color='#D35400'))

[Synthetic Dataset][fair_batch_proxy | BCE Loss] Grid Search


Unnamed: 0,method,lambda,alpha,loss,pred_loss,fair_loss,error,disparity,theta,theta_adv
4503,EI,0.99,0.0,0.006147,0.517413,0.000983,0.301562,0.006072,[-0.2143 1.2857 0.6429],[-0.2143 1.2857 0.6429]
4504,EI,0.99,1.0,1.067124,0.517413,1.072677,0.301562,0.092025,[-0.2143 1.2857 0.6429],[ 0.7857 2.2857 -0.3571]
6908,REI,0.99,1.0,0.244988,0.684123,0.240552,0.44375,0.225,[0.6429 0.8571 1.2857],[1.6429 0. 0.2857]


In [47]:
df_sq = pd.DataFrame(results_sq)
df_sq[['theta', 'theta_adv']] = df_sq[['model', 'model_adv']].map(lambda model: model.get_theta().numpy().round(4))

df_results_sq = pd.DataFrame()
for lamb in params['lambda']:
    for method in ['EI', 'REI']:
        for alpha in df_sq['alpha'].sort_values().unique():
            if method =='EI' and alpha==0.:
                temp = df_sq[(df_sq['lambda']==lamb) & (df_sq['method']=='EI') & (df_sq['alpha']==0)]
                df_results_sq = pd.concat((df_results_sq, temp.iloc[[int(temp['loss'].argmin())]]))
                theta_ei = temp.iloc[[int(temp['loss'].argmin())]]['theta'].iloc[0]
            elif method == 'EI' and alpha > 0:
                temp = df_sq[(df_sq['lambda']==lamb) & (df_sq['method']=='EI') & (df_sq['alpha']==alpha) & (df_sq['theta'].astype(str)==str(theta_ei))]
                df_results_sq = pd.concat((df_results_sq, temp.iloc[[int(temp['loss'].argmin())]]))
            elif method == 'REI' and alpha > 0:
                temp = df_sq[(df_sq['lambda']==lamb) & (df_sq['method']=='REI') & (df_sq['alpha']==alpha) & (df_sq['fair_loss']!=0)]
                df_results_sq = pd.concat((df_results_sq, temp.iloc[[int(temp['loss'].argmin())]]))
            else:
                continue

columns = ['method', 'lambda', 'alpha', 'loss', 'pred_loss', 'fair_loss', 'error', 'disparity', 'theta', 'theta_adv']
print(f'[{dataset.name.capitalize()} Dataset][{params["proxy"].__name__} | Sq Loss] Grid Search')
for i in range(0, len(df_results_sq), 3):
    display(df_results_sq[columns].iloc[i:i+3].style.highlight_min(subset=['fair_loss', 'error', 'disparity'], color='#D35400'))

[Synthetic Dataset][fair_batch_proxy | Sq Loss] Grid Search


Unnamed: 0,method,lambda,alpha,loss,pred_loss,fair_loss,error,disparity,theta,theta_adv
5115,EI,0.99,0.0,0.005132,0.497999,0.000154,0.226562,0.015052,[ 0. 1.5 -0.2143],[ 0. 1.5 -0.2143]
5116,EI,0.99,1.0,0.306624,0.497999,0.304691,0.226562,0.4,[ 0. 1.5 -0.2143],[-1. 0.5 -0.6429]
5522,REI,0.99,1.0,0.095713,0.645534,0.090159,0.457812,0.032258,[0.2143 0.4286 0.6429],[1.2143 1.4286 1.0714]


In [49]:
df_bce['loss_fn'] = 'BCE'
df_sq['loss_fn'] = 'Sq'
df = pd.concat((df_bce, df_sq))

In [58]:
fig = px.scatter(df[(df['method']=='EI') & (df['alpha']==0)], y='loss', color='fair_loss', facet_col='loss_fn', log_y=True, log_x=True)
fig.add_vline(x=4503, line_dash='dash', line_width=1.5, annotation_text=f'theta = {df.iloc[4503]['theta'].round(2)}', col=1)
fig.add_vline(x=5115, line_dash='dash', line_width=1.5, annotation_text=f'theta = {df.iloc[5115]['theta'].round(2)}', col=2)
fig.update_layout(
    # width = 720,
    # height = 540,
    font=dict(family='Iosevka', color='#5d5d5d'),
)
fig.show()