In [None]:
import tqdm
import torch
import pickle
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from copy import deepcopy

from model import LR
from ei_utils import model_performance
from ei_effort import Optimal_Effort, PGD_Effort
from ei_model import EIModel, fair_batch_proxy, covariance_proxy
from data import FairnessDataset, Dataset, SyntheticDataset, GermanDataset, IncomeDataset

In [None]:
def generate_boundary(theta):
    a = theta[0]
    b = theta[1]
    c = theta[2]
    
    points = []
    if a == 0:
        x2 = -c/b
        for x1 in np.arange(-2,2, 0.01).round(3):
            points.append((x1, x2))
    elif b == 0:
        x1 = -c/a
        for x2 in np.arange(-2,2, 0.01).round(3):
            points.append((x1, x2))
    else:
        for x1 in np.arange(-2, 2, 0.001).round(3):
            x2 = (-a*x1 - c) / b
            points.append((x1, x2))
    return pd.DataFrame(points, columns=['x1', 'x2'])

In [None]:
def generate_Y_pred(model, data, tau=0.5):
    Y_hat_max = model(torch.from_numpy(data).float())
    Y_pred_max = (Y_hat_max >= tau) * 1
    return Y_pred_max

In [None]:
def generate_grid(center, widths, n=15):
    if isinstance(widths, int) or isinstance(widths, float):
        widths = [widths for _ in range(len(center))]
    axes = [np.linspace(center[i]-widths[i], center[i]+widths[i], n) for i in range(len(center))]
    grids = np.meshgrid(*axes)
    points = np.stack([grid.reshape(-1) for grid in grids]).T
    return np.unique(points, axis=0)

In [None]:
def append_res(d, method, delta, lamb, alpha, total_loss, pred_loss, fair_loss, disparity, error, theta, theta_adv):
    d['method'].append(method)
    d['delta'].append(delta)
    d['lambda'].append(lamb)
    d['alpha'].append(alpha)
    d['loss'].append(total_loss)
    d['pred_loss'].append(pred_loss)
    d['fair_loss'].append(fair_loss)
    d['disparity'].append(disparity)
    d['error'].append(error)
    d['model'].append(theta)
    d['model_adv'].append(theta_adv)

In [None]:
def run_gridsearch(dataset: Dataset, params: dict, results: dict, fold: int = 0):
    delta = dataset.delta
    tau = params['tau']
    proxy = params['proxy']
    effort = params['effort']
    
    for lamb in params['lambda']:
        for alpha in params['alpha']:
            loss_fn = torch.nn.BCELoss(reduction='mean')
            thetas = generate_grid(center=[0., 0., 0.,], widths=1.5, n=15)
            
            train_tensors, val_tensors, test_tensors = dataset.tensor(fold=fold)
            train_dataset = FairnessDataset(*train_tensors, dataset.imp_feats)
            
            model = LR(num_features=train_dataset.X.shape[1])
            model_adv = LR(num_features=train_dataset.X.shape[1])
            
            n = len(thetas)
            for i in tqdm.trange(n, desc=f"Grid Search [alpha={alpha:.3f}; lambda={lamb:.5f}; effort={effort.__class__.__name__}]", colour='#0091ff'):

                theta = thetas[i]
                model = model.set_theta(theta)
                
                Y_hat = model(train_dataset.X).reshape(-1)
                pred_loss =  loss_fn(Y_hat, train_dataset.Y).detach().item()
                
                if torch.sum(Y_hat<tau) > 0:
                    X_e = train_dataset.X[(Y_hat<tau)]
                    Z_e = train_dataset.Z[(Y_hat<tau)]
                        
                    X_hat_max = effort(model, train_dataset, X_e)
                    Y_hat_max = model(X_hat_max).reshape(-1)
                    fair_loss = proxy(Z_e, Y_hat_max).detach().item()
                    
                    total_loss = ((1-lamb) * pred_loss) + (lamb * fair_loss)
                    
                    X_hat_max_full = effort(model, train_dataset, train_dataset.X)
                    Y_hat_max = model(X_hat_max_full).reshape(-1)
                    
                    accuracy, ei_disparity = model_performance(train_dataset.Y.detach().numpy(), train_dataset.Z.detach().numpy(), Y_hat.clone().detach().numpy(), Y_hat_max.clone().detach().numpy(), tau)
                    
                    append_res(results, 'EI', delta, lamb, 0., total_loss, pred_loss, fair_loss, ei_disparity, 1-accuracy, deepcopy(model), deepcopy(model))

                    # Find theta adv that maximizes fair loss
                    fair_losses = []
                    theta_advs = generate_grid(center=theta, widths=alpha, n=15)
                    for theta_adv in theta_advs:
                        model_adv = model_adv.set_theta(theta_adv)
                        Y_hat_max = model_adv(X_hat_max).reshape(-1)
                        fair_loss = proxy(Z_e, Y_hat_max)
                        fair_losses.append(fair_loss.detach().item())
                    
                    max_i = int(np.argmax(fair_losses))
                    theta_adv = theta_advs[max_i]
                    fair_loss = fair_losses[max_i]
                    total_loss = ((1-lamb) * pred_loss) + (lamb * fair_loss)
                    
                    model_adv = model_adv.set_theta(theta_adv)
                    Y_hat_max = model_adv(X_hat_max_full).reshape(-1)
                    
                    accuracy, rei_disparity = model_performance(train_dataset.Y.detach().numpy(), train_dataset.Z.detach().numpy(), Y_hat.clone().detach().numpy(), Y_hat_max.clone().detach().numpy(), tau)

                    append_res(results, 'EI', delta, lamb, alpha, total_loss, pred_loss, fair_loss, rei_disparity, 1-accuracy, deepcopy(model), deepcopy(model_adv))
                    append_res(results, 'REI', delta, lamb, alpha, total_loss, pred_loss, fair_loss, rei_disparity, 1-accuracy, deepcopy(model), deepcopy(model_adv))

In [None]:
torch.manual_seed(0)

# ----- Dataset -----
dataset = SyntheticDataset(num_samples=1000, seed=0)

In [None]:
train_tensors, val_tensors, test_tensors = dataset.tensor(fold=0)
X_train, Y_train, Z_train = train_tensors

df = pd.DataFrame({'x1': X_train[:,0], 'x2': X_train[:,1], 'y': Y_train.numpy().astype(int).astype(str), 'z': Z_train.numpy().astype(int).astype(str)})
symbols = ['x', 'circle']
colors = ['#636EFA', '#EF553B']
fig = go.Figure()


for zi, z in enumerate(['0', '1']):
    for yi, y in enumerate(['0', '1']):
        temp = df[(df['z']==z) & (df['y']==y)]
        fig.add_trace(go.Scatter(
            x=temp['x1'],
            y=temp['x2'],
            marker=dict(color=colors[yi], symbol=symbols[zi], size=5),
            mode='markers',
            name=f'{y}, {z}'
        ))
        
        
fig.update_layout(
    width = 720,
    height = 540,
    font=dict(family='Iosevka', color='#5d5d5d'),
    title=dict(
        x=0.5,
        font=dict(size=20), 
        text=f'{dataset.name.capitalize()} Dataset', 
        ),
    legend=dict(
        font=dict(size=10),
        title=dict(text='y, z'),
        ),
    xaxis=dict(
        title=dict(text='x1')
    ),
    yaxis=dict(
        title=dict(text='x2')
    )
)


fig.show()

In [None]:
torch.manual_seed(0)

# ----- Dataset -----
dataset = SyntheticDataset(num_samples=1000, seed=0)

# ----- Hyperparameters -----
params = {}
# hyper_params['lambda'] = 1-np.geomspace(0.001, 0.999, 20)[:13]
params['lambda'] = [0.99]
# hyper_params['lambda'] = [0.001, 0.01, 0.1, 0.25, 0.5, 0.75]
params['alpha'] = [0.1]
params['tau'] = 0.5
params['proxy'] = fair_batch_proxy
params['z_blind'] = True
params['effort'] = Optimal_Effort(dataset.delta)

# ----- Run Experiment -----
results = {'method': [], 'delta': [], 'lambda': [], 'alpha': [], 'loss': [], 'pred_loss': [ ], 'fair_loss': [], 'disparity': [], 'error': [], 'model': [], 'model_adv': []}
run_gridsearch(dataset, params, results)

In [None]:
# UNCOMMENT THIS IF YOU WANT TO SAVE THE RESULTS
# with open(f'../gs_lr_synthetic_fb_alpha{params["alpha"]}_lamb{params["lambda"]}.pkl', 'wb') as f:
#     pickle.dump(results, f)

In [None]:
df = pd.DataFrame(results)
df[['theta', 'theta_adv']] = df[['model', 'model_adv']].map(lambda model: model.get_theta().numpy())

df_results = pd.DataFrame()
for lamb in params['lambda']:
    for method in ['EI', 'REI']:
        for alpha in df['alpha'].sort_values().unique():
            if method =='EI' and alpha==0.:
                temp = df[(df['lambda']==lamb) & (df['method']=='EI') & (df['alpha']==0)]
                df_results = pd.concat((df_results, temp.iloc[[int(temp['loss'].argmin())]]))
                theta_ei = temp.iloc[[int(temp['loss'].argmin())]]['theta'].iloc[0]
            elif method == 'EI' and alpha > 0:
                temp = df[(df['lambda']==lamb) & (df['method']=='EI') & (df['alpha']==alpha) & (df['theta'].astype(str)==str(theta_ei))]
                df_results = pd.concat((df_results, temp.iloc[[int(temp['loss'].argmin())]]))
            elif method == 'REI' and alpha > 0:
                temp = df[(df['lambda']==lamb) & (df['method']=='REI') & (df['alpha']==alpha) & (df['fair_loss']!=0)]
                df_results = pd.concat((df_results, temp.iloc[[int(temp['loss'].argmin())]]))
            else:
                continue

columns = ['method', 'lambda', 'alpha', 'loss', 'pred_loss', 'fair_loss', 'error', 'disparity', 'theta', 'theta_adv']
print(f'[{dataset.name.capitalize()} Dataset][{params["proxy"].__name__}] Grid Search')
for i in range(0, len(df_results), 3):
    display(df_results[columns].iloc[i:i+3].style.highlight_min(subset=['fair_loss', 'error', 'disparity'], color='#D35400'))

In [None]:
ei_theta = np.array([0., 1., 0.])
ei_theta_adv = np.array([2., -1., -2.])
ei_theta_boundary = generate_boundary(ei_theta)
ei_theta_adv_boundary = generate_boundary(ei_theta_adv)

In [None]:
train_tensors, val_tensors, test_tensors = dataset.tensor(fold=0, z_blind=True)
train_dataset = FairnessDataset(*train_tensors, dataset.imp_feats)
X_train, Y_train, Z_train = train_tensors

model = LR(X_train.shape[1]).set_theta(ei_theta)
model_adv = LR(X_train.shape[1]).set_theta(ei_theta_adv)

data = pd.DataFrame({'x1': X_train[:, 0], 'x2': X_train[:, 1], 'y': Y_train, 'z': Z_train})
data['y_pred'] = generate_Y_pred(model, X_train.numpy())
X_hat_max = Optimal_Effort(dataset.delta)(model, train_dataset, X_train)
data['y_pred_max'] = generate_Y_pred(model_adv, X_hat_max.detach().numpy())

colors = {
    0: {0: '#636EFA', # plotly blue
        1: '#0511A1' # dark blue
    }, 
    1: {
        0: '#EF553B', # plotly red
        1: '#EF553B', # plotly red    
        # 1: '#af260e', # dark red    
    }
}
symbols = ['circle', 'x']

fig = go.Figure()
for z in data['z'].sort_values().unique():
    for y_pred in data['y_pred'].sort_values().unique():
        for y_pred_max in data['y_pred_max'].unique():
            z = int(z)
            y_pred = int(y_pred)
            y_pred_max = int(y_pred_max)
            df_im = data[(data['z']==z) & (data['y_pred']==y_pred) & (data['y_pred_max']==y_pred_max)]
            fig.add_trace(go.Scatter(
                x=df_im['x1'],
                y=df_im['x2'],
                marker = dict(color=colors[y_pred][y_pred_max], symbol=symbols[z], size=4),
                mode='markers',
                name=f'{y_pred_max}, {y_pred}, {z}',
            ))
            
fig.add_trace(go.Scatter(
    x=ei_theta_boundary['x1'],
    y=ei_theta_boundary['x2'],
    marker=dict(color='lightgreen', size=5),
    mode='lines',
    name='ei_theta'
))

fig.add_trace(go.Scatter(
    x=ei_theta_adv_boundary['x1'],
    y=ei_theta_adv_boundary['x2'],
    marker=dict(color='pink', size=5),
    mode='lines',
    name='ei_theta_adv'
))

# fig.add_trace(go.Scatter(
#     x=rei_theta['x1'],
#     y=rei_theta['x2'],
#     marker=dict(color='darkgreen', size=5),
#     mode='lines',
#     name='rei_theta'
# ))

# fig.add_trace(go.Scatter(
#     x=rei_theta_adv['x1'],
#     y=rei_theta_adv['x2'],
#     marker=dict(color='hotpink', size=5),
#     mode='lines',
#     name='rei_theta_adv'
# ))
            
fig.update_layout(
    width = 720,
    height = 540,
    title=dict(
        x=0.5,
        font=dict(
            family='Times New Roman',
            color='black',
            size=17
            ), 
        text=f'{dataset.name.capitalize()} Dataset', 
        ),
    legend=dict(
        title='y_pred_max, y_pred, z',
        font=dict(size=10)
        ),
)

fig.show()

In [None]:
ei_theta_boundary = generate_boundary(df_results['theta'].iloc[1])
ei_theta_adv_boundary = generate_boundary(df_results['theta_adv'].iloc[1])

rei_theta = generate_boundary(df_results['theta'].iloc[-1])
rei_theta_adv = generate_boundary(df_results['theta_adv'].iloc[-1])

In [None]:
train_tensors, val_tensors, test_tensors = dataset.tensor(fold=0, z_blind=params['z_blind'])
train_dataset = FairnessDataset(*train_tensors, dataset.imp_feats)
X_train, Y_train, Z_train = train_tensors

data = pd.DataFrame({'x1': X_train[:, 0], 'x2': X_train[:, 1], 'y': Y_train, 'z': Z_train})
m = 'ei_theta'

data['y_pred'] = generate_Y_pred(df_results['model'].iloc[1], train_dataset.X.numpy())
X_hat_max = Optimal_Effort(dataset.delta)(df_results['model'].iloc[1], train_dataset, train_dataset.X)
data['y_pred_max'] = generate_Y_pred(df_results['model'].iloc[1], X_hat_max.detach().numpy())

colors = {
    0: {0: '#636EFA', # plotly blue
        1: '#0511a1' # dark blue
    }, 
    1: {
        0: '#EF553B', # plotly red
        1: '#EF553B', # plotly red    
        # 1: '#af260e', # dark red    
    }
}
symbols = ['circle', 'x']

fig = go.Figure()
for z in data['z'].sort_values().unique():
    for y_pred in data['y_pred'].sort_values().unique():
        for y_pred_max in data['y_pred_max'].unique():
            z = int(z)
            y_pred = int(y_pred)
            y_pred_max = int(y_pred_max)
            df_im = data[(data['z']==z) & (data['y_pred']==y_pred) & (data['y_pred_max']==y_pred_max)]
            fig.add_trace(go.Scatter(
                x=df_im['x1'],
                y=df_im['x2'],
                marker = dict(color=colors[y_pred][y_pred_max], symbol=symbols[z], size=4),
                mode='markers',
                name=f'{y_pred_max}, {y_pred}, {z}',
            ))
            
fig.add_trace(go.Scatter(
    x=ei_theta_boundary['x1'],
    y=ei_theta_boundary['x2'],
    marker=dict(color='lightgreen', size=5),
    mode='lines',
    name='ei_theta'
))

fig.add_trace(go.Scatter(
    x=ei_theta_adv_boundary['x1'],
    y=ei_theta_adv_boundary['x2'],
    marker=dict(color='pink', size=5),
    mode='lines',
    name='ei_theta_adv'
))

# UNCOMMENT THIS IF YOU WANT TO SHOW REI theta and theta_adv

# fig.add_trace(go.Scatter(
#     x=rei_theta['x1'],
#     y=rei_theta['x2'],
#     marker=dict(color='darkgreen', size=5),
#     mode='lines',
#     name='rei_theta'
# ))

# fig.add_trace(go.Scatter(
#     x=rei_theta_adv['x1'],
#     y=rei_theta_adv['x2'],
#     marker=dict(color='hotpink', size=5),
#     mode='lines',
#     name='rei_theta_adv'
# ))
            
fig.update_layout(
    width = 720,
    height = 540,
    title=dict(
        x=0.5,
        font=dict(
            family='Times New Roman',
            color='black',
            size=17
            ), 
        text=f'{dataset.name.capitalize()} Dataset | alpha: {params["alpha"]} | lambda: {params["lambda"][0]}', 
        ),
    legend=dict(
        title='y_pred_max, y_pred, z',
        font=dict(size=10)
        ),
)

fig.show()