In [None]:
import sys
sys.path.append('..')

In [None]:
import tqdm
import torch
import pickle
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from copy import deepcopy
from plotly.subplots import make_subplots

from src.model import LR
from src.utils import pload
from src.effort import Optimal_Effort
from src.data import SyntheticDataset, FairnessDataset

In [None]:
default_layout = lambda width = 720, height = 540, color='#5d5d5d': dict(
    width = width,
    height = height,
    font=dict(
        family='Iosevka', 
        color=color
        ),
    title=dict(
        x=0.5,
        font=dict(size=17), 
        ),
    legend=dict(
        font=dict(size=10),
        )
    )

In [None]:
def generate_boundary(theta):
    a = theta[0]
    b = theta[1]
    c = theta[2]
    
    points = []
    if a==0 and b==0:
        for x1 in np.arange(-2, 2, 0.001).round(3):
            x2 = (-(a)*x1 - c) / (b)
            points.append((x1, x2))
    elif a == 0:
        x2 = -c/b
        for x1 in np.arange(-2,2, 0.01).round(3):
            points.append((x1, x2))
    elif b == 0:
        x1 = -c/a
        for x2 in np.arange(-2,2, 0.01).round(3):
            points.append((x1, x2))
    else:
        for x1 in np.arange(-2, 2, 0.001).round(3):
            x2 = (-(a)*x1 - c) / (b)
            points.append((x1, x2))
    return pd.DataFrame(points, columns=['x1', 'x2'])

In [None]:
def generate_Y_pred(model, data, tau=0.5):
    Y_hat_max = model(torch.from_numpy(data).float())
    Y_pred_max = (Y_hat_max >= tau) * 1
    return Y_pred_max

In [None]:
def fair_batch_proxy(Z: torch.tensor, Y_hat_max: torch.tensor, bce: bool = True):
    proxy_value = torch.tensor(0.)
    if bce:
        loss_fn = torch.nn.BCELoss(reduction='mean')
    else:
        loss_fn = torch.nn.MSELoss(reduction='mean')

    loss_mean = loss_fn(Y_hat_max, torch.ones(len(Y_hat_max)))

    for z in [0,1]:
        z = int(z)
        group_idx = (Z==z)
        if group_idx.sum() == 0:
            continue
        loss_z = loss_fn(Y_hat_max[group_idx], torch.ones(group_idx.sum()))
        proxy_value += torch.abs(loss_z - loss_mean)
    return proxy_value

In [None]:
torch.manual_seed(0)

# ----- Dataset -----
dataset = SyntheticDataset(num_samples=1000, seed=0)

train_tensors, val_tensors, test_tensors = dataset.tensor(fold=0)
X_train, Y_train, Z_train = train_tensors

symbols = ['x', 'circle']
colors = ['#636EFA', '#EF553B']
fig = go.Figure()


for z in [0, 1]:
    for y in [0, 1]:
        fig.add_trace(go.Scatter(
            x=X_train[(Z_train==z) * (Y_train==y), 0],
            y=X_train[(Z_train==z) * (Y_train==y), 1],
            marker=dict(color=colors[y], symbol=symbols[z], size=5),
            mode='markers',
            name=f'{y}, {z}'
        ))
   
fig.update_layout(
    title_text=f'{dataset.name.capitalize()} Dataset',
    legend_title='y, z',
    xaxis_title='x1',
    yaxis_title='x2',
    **default_layout()
)
fig.show()

In [None]:
# ----- Hyperparameters -----
params = {}
params['lambda'] = [0.999]
params['lambda'] = list(1-np.geomspace(0.001, 0.999, 20))
# params['lambda'] = list(1-np.geomspace(0.001, 0.999, 20)[:13])
params['alpha'] = [0.1]
params['tau'] = 0.5
params['proxy'] = fair_batch_proxy

In [None]:
# df_im = pd.DataFrame()
for lamb in sorted(params['lambda']):
    results = pload(f'../results/gridsearch/lr_synthetic_fb_alpha{params["alpha"][0]}_lambda{lamb.round(5)}_d1_sqloss.pkl')
    
    df = pd.DataFrame(results)
    df[['theta', 'theta_adv']] = df[['model', 'model_adv']].map(lambda model: model.get_theta().numpy())

    df_results = pd.DataFrame()
    for method in ['EI', 'REI']:
        for alpha in df['alpha'].sort_values().unique():
            if method =='EI' and alpha==0.:
                temp = df[(df['lambda']==lamb) & (df['method']=='EI') & (df['alpha']==0)]
                df_results = pd.concat((df_results, temp.iloc[[int(temp['loss'].argmin())]]))
                theta_ei = temp.iloc[[int(temp['loss'].argmin())]]['theta'].iloc[0]
            elif method == 'EI' and alpha > 0:
                temp = df[(df['lambda']==lamb) & (df['method']=='EI') & (df['alpha']==alpha) & (df['theta'].astype(str)==str(theta_ei))]
                df_results = pd.concat((df_results, temp.iloc[[int(temp['loss'].argmin())]]))
            elif method == 'REI' and alpha > 0:
                temp = df[(df['lambda']==lamb) & (df['method']=='REI') & (df['alpha']==alpha) & (df['fair_loss']!=0)]
                df_results = pd.concat((df_results, temp.iloc[[int(temp['loss'].argmin())]]))
            else:
                continue

    columns = ['method', 'lambda', 'alpha', 'loss', 'pred_loss', 'fair_loss', 'error', 'disparity', 'theta', 'theta_adv']
    df_im = pd.concat((df_im, df_results))

In [None]:
for i in range(0, len(df_im), 3):
    display(df_im[columns].iloc[i+1:i+3].style.highlight_min(subset=['fair_loss', 'error', 'disparity'], color='#D35400'))

In [None]:
fig = px.scatter(df_im[(df_im['alpha']==0.1)].sort_values('lambda'), x='lambda', y='fair_loss', hover_data='lambda', color='disparity', facet_col='method', color_continuous_scale=['lightblue', 'blue', 'purple', 'orange', 'red'])

fig.update_layout(
    title_text = f'{dataset.name.capitalize()} Dataset | Grid Search | alpha: {params["alpha"][0]}', 
    **default_layout(np.inf))
fig.show()



In [None]:
fig = px.scatter(df_im[df_im['alpha']>0], x='fair_loss', y='error', color='method')
fig.update_layout(
    title_text = f'{dataset.name.capitalize()} Dataset | Grid Search | alpha: {params["alpha"][0]}', 
    **default_layout()
    )
fig.show()

In [None]:
ei_theta_boundary = generate_boundary(df_results['theta'].iloc[1])
ei_theta_adv_boundary = generate_boundary(df_results['theta_adv'].iloc[1])

rei_theta = generate_boundary(df_results['theta'].iloc[-1])
rei_theta_adv = generate_boundary(df_results['theta_adv'].iloc[-1])

In [None]:
train_tensors, val_tensors, test_tensors = dataset.tensor(fold=0)
train_dataset = FairnessDataset(*train_tensors, dataset.imp_feats)
X_train, Y_train, Z_train = train_tensors

data = pd.DataFrame({'x1': X_train[:, 0], 'x2': X_train[:, 1], 'y': Y_train, 'z': Z_train})
m = 'ei'
i = 1 if m=='ei' else -1

data['y_pred'] = generate_Y_pred(df_results['model'].iloc[i], train_dataset.X.numpy())
X_hat_max = Optimal_Effort(dataset.delta)(df_results['model'].iloc[i], train_dataset, train_dataset.X)
data['y_pred_max'] = generate_Y_pred(df_results['model'].iloc[i], X_hat_max.detach().numpy())
data['y_pred_max_adv'] = generate_Y_pred(df_results['model_adv'].iloc[i], X_hat_max.detach().numpy())

colors = {
    0: {0: '#636EFA', # plotly blue
        1: '#0511a1' # dark blue
    }, 
    1: {
        0: '#EF553B', # plotly red
        1: '#EF553B', # plotly red    
        # 1: '#af260e', # dark red    
    }
}
symbols = {
    0: {
        0: {
            0: 'circle',
            1: 'circle-open'
            },
        1: {
            0: 'circle',
            1: 'circle'
        }
    },
    1: {
        0: {
            0: 'x',
            1: 'x-open'
            },
        1: {
            0: 'x',
            1: 'x'
        }
    }
}

fig = go.Figure()
for z in data['z'].sort_values().unique():
    for y_pred in data['y_pred'].sort_values().unique():
        for y_pred_max in data['y_pred_max'].unique():
            for y_pred_max_adv in data['y_pred_max_adv'].unique():
                z = int(z)
                y_pred = int(y_pred)
                y_pred_max = int(y_pred_max)
                y_pred_max_adv = int(y_pred_max_adv)
                df_im = data[(data['z']==z) & (data['y_pred']==y_pred) & (data['y_pred_max']==y_pred_max) & (data['y_pred_max_adv']==y_pred_max_adv)]
                fig.add_trace(go.Scatter(
                    x=df_im['x1'],
                    y=df_im['x2'],
                    marker = dict(color=colors[y_pred][y_pred_max], symbol=symbols[z][y_pred][y_pred_max_adv], size=4.5),
                    mode='markers',
                    name=f'{y_pred_max}, {y_pred}, {z}',
                ))
            

if m == 'ei':
    fig.add_trace(go.Scatter(
        x=ei_theta_boundary['x1'],
        y=ei_theta_boundary['x2'],
        marker=dict(color='lightgreen', size=3),
        mode='lines',
        name='ei_theta'
    ))

    fig.add_trace(go.Scatter(
        x=ei_theta_adv_boundary['x1'],
        y=ei_theta_adv_boundary['x2'],
        marker=dict(color='pink', size=3),
        mode='lines',
        name='ei_theta_adv'
    ))
else:
    fig.add_trace(go.Scatter(
        x=rei_theta['x1'],
        y=rei_theta['x2'],
        marker=dict(color='darkgreen', size=3),
        mode='lines',
        name='rei_theta'
    ))

    fig.add_trace(go.Scatter(
        x=rei_theta_adv['x1'],
        y=rei_theta_adv['x2'],
        marker=dict(color='hotpink', size=3),
        mode='lines',
        name='rei_theta_adv'
    ))


fig.update_layout(
    title_text = f'{dataset.name.capitalize()} Dataset | alpha: {params["alpha"][0]} | lambda: {lamb}', 
    legend_title = 'y_pred_max, y_pred, z',
    xaxis_title='x1',
    yaxis_title='x2',
    **default_layout()
)

fig.show()

In [None]:
X_hat_max = Optimal_Effort(dataset.delta)(df_results['model'].iloc[i], train_dataset, train_dataset.X[data['y_pred']==0])
generate_Y_pred(df_results['model_adv'].iloc[i], X_hat_max.detach().numpy())