In [None]:
import tqdm
import torch
import numpy as np
import pandas as pd
import plotly.express as px
import torch.optim as optim
from copy import deepcopy
from typing import Iterable

from model import LR
from data import FairnessDataset, Dataset, SyntheticDataset, GermanDataset, IncomeDataset
from ei_effort import Optimal_Effort, PGD_Effort
from ei_utils import model_performance, pareto_frontier
from ei_model import EIModel, fair_batch_proxy, covariance_proxy

In [None]:
def generate_grid(center, width, n=15):
    if isinstance(width, int) or isinstance(width, float):
        width = [width for _ in range(len(center))]
    axes = [np.linspace(center[i]-width[i], center[i]+width[i], n) for i in range(len(center))]
    grids = np.meshgrid(*axes)
    points = np.stack([grid.reshape(-1) for grid in grids]).T
    return np.unique(points, axis=0)

In [None]:
def cache_result(cache, method, lamb, alpha, total_loss, pred_loss, fair_loss, theta, theta_adv, error):
    cache['method'].append(method)
    cache['lambda'].append(lamb)
    cache['alpha'].append(alpha)
    cache['total_loss'].append(total_loss)
    cache['pred_loss'].append(pred_loss)
    cache['fair_loss'].append(fair_loss)
    cache['theta'].append(theta)
    cache['theta_adv'].append(theta_adv)
    cache['error'].append(error)

In [None]:
torch.manual_seed(0)
dataset = SyntheticDataset(num_samples=20000, seed=0)
delta = dataset.delta
z_blind = True
tau = 0.5
# 1e-7, 0.25, 0.5, 0.75, 1-1e-7
lamb = 0.1
alpha = 1.5
proxy = covariance_proxy
effort = Optimal_Effort(dataset.delta)

# Train, Val, Test Split of Data
train_tensors, val_tensors, test_tensors = dataset.tensor(fold=0, z_blind=z_blind)
train_dataset = FairnessDataset(*train_tensors, dataset.imp_feats)
test_dataset = FairnessDataset(*test_tensors, dataset.imp_feats)

# Loss and model definition
loss_fn = torch.nn.BCELoss(reduction='mean')
model = LR(num_features=train_dataset.X.shape[1])
model_adv = LR(num_features=train_dataset.X.shape[1])

# Variables to track 
results = {'method': [], 'lambda': [], 'alpha': [], 'total_loss': [], 'pred_loss': [], 'fair_loss': [], 'theta': [], 'theta_adv': [], 'error': []}

In [None]:
# for lamb in lambdas:

# Generate theta grid
thetas = generate_grid(center=[0., 0., 0.,], width=[3., 3., 3.])
for i in tqdm.trange(len(thetas), colour='#0091FF'):
    theta = thetas[i]
    # Set model theta
    model = model.set_theta(torch.from_numpy(theta).float())
    # Get Y_hat
    Y_hat = model(train_dataset.X).reshape(-1)
    # Compute prediction loss
    pred_loss = loss_fn(Y_hat, train_dataset.Y).detach().float()
    
    # Get X and Z that received score < 0.5
    X_e = train_dataset.X[(Y_hat<tau).reshape(-1)]
    Z_e = train_dataset.Z[(Y_hat<tau)]
    
    # Calculate new X after applying effort to X_e
    X_effort = effort(model, train_dataset, X_e)
    # Get Y_hat of new X 
    Y_hat_max = model(X_effort).reshape(-1).detach().float()
    # Compute fair loss
    fair_loss = proxy(Z_e, Y_hat_max).detach().float()
    # Compute total loss
    total_loss = ((1-lamb) * pred_loss) + (lamb * fair_loss)
    
    # Calculate accuracy of model
    Y_pred = (Y_hat>=tau)*1
    error = 1-np.mean(train_dataset.Y.numpy()==Y_pred.numpy())
    
    # Track results
    cache_result(results, 'EI', lamb, 0., total_loss.item(), pred_loss.item(), fair_loss.item(), theta, theta, error)
    
    # --- Grid Search to find adversarial theta that maximizes fair loss ---
    fair_losses = []
    # Generate adversarial theta grid
    thetas_adv = generate_grid(center=theta, width=alpha)
    for theta_adv in thetas_adv:
        # Set adversarial model theta
        model_adv = model_adv.set_theta(torch.from_numpy(theta_adv).float())
        # Get Y_hat of new X using adversarial theta
        Y_hat_max = model_adv(X_effort).reshape(-1)
        # Compute fair loss
        fair_loss = proxy(Z_e, Y_hat_max).detach().float()
        # Append to fair_losses list
        fair_losses.append(fair_loss)
    
    # Find index with largest fair loss
    max_i = int(torch.argmax(torch.tensor(fair_losses)))
    # Set adversarial theta to the one that yields max fair loss
    theta_adv = thetas_adv[max_i]
    # Set adversarial model theta
    model_adv = model_adv.set_theta(torch.from_numpy(theta_adv).float())
    # Get Y_hat of nex X using adversarial theta
    Y_hat_max = model_adv(X_effort).reshape(-1)
    # Compute fair loss
    fair_loss = proxy(Z_e, Y_hat_max).detach().float()
    # Compute total loss
    total_loss = ((1-lamb) * pred_loss) + (lamb * fair_loss)
    
    # Uncomment this if you want to see how the fair loss vs theta adv plot
    # df_temp = pd.DataFrame({'fair_loss': fair_losses, 'theta_adv': list(map(lambda x: str(x.round(4)), thetas_adv))})
    # fig = px.line(df_temp, y='fair_loss', x='theta_adv', markers=True)
    # fig.add_hline(y=max(fair_losses))
    # fig.show()
    
    cache_result(results, 'EI', lamb, alpha, total_loss.item(), pred_loss.item(), fair_loss.item(), theta, theta_adv, error)
    cache_result(results, 'REI', lamb, alpha, total_loss.item(), pred_loss.item(), fair_loss.item(), theta, theta_adv, error)

In [None]:
df = pd.DataFrame(results)
# Uncomment this if you want to round the lambda
# df[['theta', 'theta_adv']] = df[['theta', 'theta_adv']].map(lambda x: x.round(5))

df_res = pd.DataFrame()
for lamb in df['lambda'].unique():
    df_lamb = df[df['lambda']==lamb]
    for method in df['method'].unique():
        df_md = df_lamb[df_lamb['method']==method]
        for alpha in df_md['alpha'].unique():
            df_md_a = df_md[df_md['alpha']==alpha]
            df_res = pd.concat((df_res, df_md_a.iloc[[int(df_md_a['total_loss'].argmin())]]))
df_res