# Notebook for running the three classifiers (standard, fair_biased, fair) 

In [None]:
import os, random
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import pickle
import torch
import torch.nn as nn
import pytorch_lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.callbacks import EarlyStopping
import optuna
import numpy as np
import pandas as pd
from sklearn.utils.class_weight import compute_class_weight

from modules.helpers import seed
from modules.predictors_modules import FairClfDataModule, StandardClf, FairClf, FairClf_naive, StandardRegressor, FairRegressor

os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

seed(1)

In [None]:
experiment = "sim9e"

data_location = "../data/simulator/"+ experiment + "_full_dataframe"
save_location = "../models"

In [None]:
with open(data_location, "rb") as input:
    full_data = pickle.load(input)

full_data = full_data.drop(["USE", "UIE", "UDE"],axis=1)

if len(np.unique(full_data["Y"])) > 2:
    n_classes = len(np.unique(full_data["Y"]))
    class_weights = compute_class_weight(class_weight ='balanced', classes = np.float64(np.arange(n_classes)), y= full_data["Y"].astype(float))
else:
    n_classes = 1
    class_weights = compute_class_weight(class_weight ='balanced', classes = np.float64(np.arange(n_classes+1)), y= full_data["Y"].astype(float))

n_features = full_data.drop("Y", axis = 1).shape[1]

sensitive_attributes = full_data["A"].unique()
node_names = full_data.drop(["Y"],axis=1).columns.values

In [None]:
train_data = full_data.loc[:int(0.6*full_data.shape[0]),:].drop("Y", axis = 1)

In [None]:
train_config = dict(
    # network architecture
    criterion = nn.MSELoss,
    #criterion = nn.CrossEntropyLoss,
    hidden_dim = 64,
    dropout = 0.1,
    # fairness parameters
    sensitivity_parameter = 3.0,
    fairness_constraints = [0.2, 0.2, 0.2],   # gamma for restricting DE, IE, SE
    # training
    batch_size = 128,
    learning_rate = 0.0001,
    max_epochs = 200,                                                                                                      
    num_workers = 0,
    nested_epochs_lagrangian = 5,    
    # optimization
    number_of_trials = 1
)

In [None]:
import warnings
warnings.filterwarnings("ignore")

### A) Standard Classifier (no fairness constraint)

In [None]:
experiment_list = ["sim100d"]

for experiment in experiment_list:

    data_location = "../data/simulator/"+ experiment + "_full_dataframe"
    #data_location = "../data/prison/prison_dataframe"
    save_location = "../models"

    with open(data_location, "rb") as input:
        full_data = pickle.load(input)
    full_data = full_data.drop(["USE", "UIE", "UDE"],axis=1)
    if len(np.unique(full_data["Y"])) > 2:
        n_classes = len(np.unique(full_data["Y"]))
        class_weights = compute_class_weight(class_weight ='balanced', classes = np.float64(np.arange(n_classes)), y= full_data["Y"].astype(float))
        #class_weights = None
    else:
        n_classes = 1
        class_weights = compute_class_weight(class_weight ='balanced', classes = np.float64(np.arange(n_classes+1)), y= full_data["Y"].astype(float))
    n_features = full_data.drop("Y", axis = 1).shape[1]
    sensitive_attributes = full_data["A"].unique()
    node_names = full_data.drop(["Y"],axis=1).columns.values

    train_data = full_data.loc[:int(0.6*full_data.shape[0]),:].drop("Y", axis = 1)

    dm = FairClfDataModule(
        data_dir = data_location,
        label_col = "Y",
        batch_size = train_config['batch_size'],
        num_workers = train_config['num_workers'],
        mode = "prediction"
    )
    model = StandardClf(
        n_features = n_features,
        n_classes = n_classes,
        hidden_dim = train_config['hidden_dim'],
        dropout = train_config['dropout'],
        criterion = train_config['criterion'],
        class_weights = class_weights,
        learning_rate = train_config['learning_rate']
    )#.to("cpu", dtype=float)

#Hyperparameter optimization

    def objective(trial: optuna.trial.Trial) -> float:
       learning_rate = trial.suggest_float("learning_rate", 1e-3, 1e-1)
       batch_size = trial.suggest_categorical("batch_size",[64, 128, 256])
       hidden_dim = trial.suggest_int("hidden_dim",low=16, high=256)
       dropout = trial.suggest_float("dropout",low=0, high=0.5)
       trainer = pl.Trainer(
           max_epochs=train_config['max_epochs'],
           gpus=0,
           logger = True,
           deterministic=True,
           accelerator="cpu"
       )
       hyperparameters = dict(learning_rate = learning_rate, batch_size = batch_size, hidden_dim = hidden_dim, dropout = dropout)
       trainer.logger.log_hyperparams(hyperparameters)
       trainer.fit(model, datamodule = dm)
       return trainer.callback_metrics["val_loss"].item()

    pruner = optuna.pruners.MedianPruner()
    sampler = optuna.samplers.TPESampler(seed=1)

    study = optuna.create_study(direction="minimize", pruner=pruner, sampler = sampler)
    study.optimize(objective, n_trials=train_config["number_of_trials"], show_progress_bar=False)                

    model_name = "StandardClf_" + experiment
    print("Best trial:")
    trial = study.best_trial
    print("  Validation loss: {}".format(trial.value))
    print("  Parameters: ")
    for key, value in trial.params.items():
       print("    {}: {}".format(key, value))

    
# Train model with optimized hyperparameters

    final_model = StandardClf(
       n_features = n_features,
       n_classes = n_classes,
       hidden_dim = study.best_params["hidden_dim"],
       dropout = study.best_params["dropout"],
       criterion = train_config['criterion'],
       class_weights = class_weights,
       learning_rate = study.best_params["learning_rate"]
    )

    dm_final = FairClfDataModule(  
       num_workers = train_config['num_workers'],
       data_dir = data_location,
       label_col = "Y",
       batch_size = study.best_params["batch_size"],
       mode = "prediction"
    )

    logger = TensorBoardLogger(save_location, name = model_name)

    checkpoint_callback = ModelCheckpoint(
        monitor="val_loss",
        dirpath= save_location + "/" + model_name,
        filename= model_name + "_checkpoints",
        save_top_k=1,
        mode="min",
    )

    early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=0.00, patience=10, verbose=False, mode="min", check_on_train_epoch_end=True)

    final_trainer = pl.Trainer(
    max_epochs=train_config['max_epochs'],
    logger = logger,
    callbacks = [checkpoint_callback, early_stop_callback],
    deterministic=True,
    accelerator="cpu",
    log_every_n_steps=20
    )

    final_trainer.fit(final_model, dm_final)

### B) Naive Fair Classifier (does not account for unobserved confounding)

In [None]:
dm_naive = FairClfDataModule(
    data_dir = data_location,
    label_col = "Y",
    batch_size = train_config['batch_size'],
    num_workers = train_config['num_workers'],
    mode = "prediction"
)

model_naive = FairClf_naive(
    n_features = n_features,
    n_classes = n_classes,
    train_data = train_data,
    hidden_dim = train_config['hidden_dim'],
    dropout = train_config['dropout'],
    batch_size=train_config['batch_size'],
    criterion_pred = train_config['criterion'],
    class_weights = class_weights,
    learning_rate = train_config['learning_rate'],
    sensitive_attributes = sensitive_attributes,
    constraints = train_config['fairness_constraints'],
    nested_epochs = train_config['nested_epochs_lagrangian'],
    column_names = node_names
).to("cpu", dtype=float)

# Hyperparameter optimization

def objective(trial: optuna.trial.Trial) -> float:
    learning_rate = trial.suggest_float("learning_rate", 1e-2, 1e-1, log=True)
    batch_size = trial.suggest_categorical("batch_size",[64, 128, 256])
    hidden_dim = trial.suggest_int("hidden_dim",low=128, high=256)
    dropout = trial.suggest_float("dropout",low=0, high=0.5)
    trainer = pl.Trainer(
        max_epochs=train_config['max_epochs'],
        logger = True,
        deterministic=True,
        accelerator="cpu"
    )
    hyperparameters = dict(learning_rate = learning_rate, batch_size = batch_size, hidden_dim = hidden_dim, dropout = dropout)
    trainer.logger.log_hyperparams(hyperparameters)
    trainer.fit(model_naive, datamodule = dm_naive)
    return trainer.callback_metrics["val_loss"].item()

pruner = optuna.pruners.MedianPruner()
sampler = optuna.samplers.TPESampler(seed=1)

study = optuna.create_study(direction="minimize", pruner=pruner, sampler = sampler)
study.optimize(objective, n_trials=train_config["number_of_trials"], timeout=60000, show_progress_bar=False)                 

model_name = "FairClf_naive_" + experiment
print("Best trial:")
trial = study.best_trial
print("  Validation loss: {}".format(trial.value))
print("  Parameters: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

    
# Train model with optimized hyperparameters

final_model_naive = FairClf_naive(
    n_features = n_features,
    n_classes = n_classes,
    train_data = train_data,
    hidden_dim = study.best_params["hidden_dim"],
    dropout = study.best_params["dropout"],
    batch_size=study.best_params["batch_size"],
    criterion_pred = train_config['criterion'],
    class_weights = class_weights,
    learning_rate = study.best_params["learning_rate"],
    sensitive_attributes = sensitive_attributes,
    constraints = train_config['fairness_constraints'],
    nested_epochs = train_config['nested_epochs_lagrangian'],
    column_names = node_names
).to("cpu", dtype=float)

dm_final_naive = FairClfDataModule(  
    num_workers = train_config['num_workers'],
    data_dir = data_location,
    label_col = "Y",
    batch_size = study.best_params["batch_size"],
    mode = "prediction"
)


logger = TensorBoardLogger(save_location, name = model_name)

checkpoint_callback = ModelCheckpoint(
    monitor="val_loss",
    dirpath= save_location + "/" + model_name,
    filename= model_name + "_checkpoints",
    save_top_k=1,
    mode="min",
)

final_trainer_naive = pl.Trainer(
max_epochs=train_config['max_epochs'],
logger = logger,
callbacks = [checkpoint_callback],
deterministic=True,
accelerator="cpu"
)

final_trainer_naive.fit(final_model_naive, dm_final_naive)

In [None]:
model_name = "FairClf_naive_" + experiment

final_model_naive = FairClf_naive(
    n_features = n_features,
    n_classes = n_classes,
    train_data = train_data,
    hidden_dim = train_config["hidden_dim"],
    dropout = train_config["dropout"],
    batch_size=train_config["batch_size"],
    criterion_pred = train_config['criterion'],
    class_weights = class_weights,
    learning_rate = train_config["learning_rate"],
    sensitive_attributes = sensitive_attributes,
    constraints = train_config['fairness_constraints'],
    nested_epochs = train_config['nested_epochs_lagrangian'],
    column_names = node_names
).to("cpu", dtype=float)

dm_final_naive = FairClfDataModule(  
    num_workers = train_config['num_workers'],
    data_dir = data_location,
    label_col = "Y",
    batch_size = train_config["batch_size"],
    mode = "prediction"
)


logger = TensorBoardLogger(save_location, name = model_name)

checkpoint_callback = ModelCheckpoint(
    monitor="val_loss",
    dirpath= save_location + "/" + model_name,
    filename= model_name + "_checkpoints",
    save_top_k=1,
    mode="min",
)

final_trainer_naive = pl.Trainer(
max_epochs=train_config['max_epochs'],
logger = logger,
callbacks = [checkpoint_callback],
deterministic=True,
accelerator="cpu"
)

final_trainer_naive.fit(final_model_naive, dm_final_naive)

### C) Fair Classifier with bounds

In [None]:
experiment_list = ["sim100"]

for experiment in experiment_list:

    data_location = "../data/simulator/"+ experiment + "_full_dataframe"
    save_location = "../models"

    with open(data_location, "rb") as input:
        full_data = pickle.load(input)
    full_data = full_data.drop(["USE", "UIE", "UDE"],axis=1)
    if len(np.unique(full_data["Y"])) > 2:
        n_classes = len(np.unique(full_data["Y"]))
        class_weights = compute_class_weight(class_weight ='balanced', classes = np.float64(np.arange(n_classes)), y= full_data["Y"].astype(float))
    else:
        n_classes = 1
        class_weights = compute_class_weight(class_weight ='balanced', classes = np.float64(np.arange(n_classes+1)), y= full_data["Y"].astype(float))
    n_features = full_data.drop("Y", axis = 1).shape[1]
    sensitive_attributes = full_data["A"].unique()
    node_names = full_data.drop(["Y"],axis=1).columns.values

    train_data = full_data.loc[:int(0.6*full_data.shape[0]),:]

    checkpointpath_density = save_location + "/density_estimator_" + experiment + "/density_estimator_" + experiment + "_checkpoints.ckpt"

    model_name = "FairClf_" + experiment
    
    final_model = FairClf(
        n_features = n_features,
        n_classes = n_classes,
        train_data = train_data,
        hidden_dim = train_config["hidden_dim"],
        dropout = train_config["dropout"],
        batch_size=train_config["batch_size"],
        criterion_pred = train_config['criterion'],
        class_weights = class_weights,
        learning_rate = train_config["learning_rate"],
        sensitive_attributes = sensitive_attributes,
        sensitivity_parameter= train_config["sensitivity_parameter"],
        checkpointpath_density=checkpointpath_density,
        constraints = train_config['fairness_constraints'],
        nested_epochs = train_config['nested_epochs_lagrangian'],
        column_names = node_names
    )
    dm_final = FairClfDataModule(  
        num_workers = train_config['num_workers'],
        data_dir = data_location,
        label_col = "Y",
        batch_size = train_config["batch_size"],
        mode = "prediction"
    )
    logger = TensorBoardLogger(save_location, name = model_name)
    
    checkpoint_callback = ModelCheckpoint(
        monitor="val_loss",
        dirpath= save_location + "/" + model_name,
        filename= model_name + "_checkpoints",
        save_top_k=1,
        mode="min",
    )
    early_stop_callback = EarlyStopping(monitor="val_fairness", min_delta=0.00, patience=5, verbose=False, mode="min", check_on_train_epoch_end=True)

    final_trainer = pl.Trainer(
        max_epochs=train_config['max_epochs'],
        logger = logger,
        callbacks = [checkpoint_callback, early_stop_callback],
        deterministic=True,
        accelerator="cpu"
    )

    final_trainer.fit(final_model, dm_final)

# Real-world regressor

### A) Standard Regressor

In [None]:
experiment = "prison"

data_location = "../data/prison/prison_dataframe"
save_location = "../models"

with open(data_location, "rb") as input:
    full_data = pickle.load(input)
full_data = full_data.drop(["USE", "UIE", "UDE"],axis=1)

n_features = full_data.drop("Y", axis = 1).shape[1]
sensitive_attributes = full_data["A"].unique()
node_names = full_data.drop(["Y"],axis=1).columns.values

dm = FairClfDataModule(
    data_dir = data_location,
    label_col = "Y",
    batch_size = train_config['batch_size'],
    num_workers = train_config['num_workers'],
     mode = "prediction"
)
model = StandardRegressor(
    n_features = n_features,
    n_classes = 1,
    hidden_dim = train_config['hidden_dim'],
    dropout = train_config['dropout'],
    criterion = nn.MSELoss,
    learning_rate = train_config['learning_rate']
)

#Hyperparameter optimization

def objective(trial: optuna.trial.Trial) -> float:
    learning_rate = trial.suggest_float("learning_rate", 0.5e-4, 1e-1)
    batch_size = trial.suggest_categorical("batch_size",[8, 16, 32, 64, 128])
    hidden_dim = trial.suggest_categorical("hidden_dim",[32, 64, 128, 256])
    dropout = trial.suggest_float("dropout",low=0, high=0.5)
    early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=0.00, patience=20, verbose=False, mode="min", check_on_train_epoch_end=True)
    trainer = pl.Trainer(
        max_epochs=train_config['max_epochs'],
        gpus=0,
        logger = True,
        callbacks=early_stop_callback,
        deterministic=True,
        accelerator="cpu"
    )
    hyperparameters = dict(learning_rate = learning_rate, batch_size = batch_size, hidden_dim = hidden_dim, dropout = dropout)
    trainer.logger.log_hyperparams(hyperparameters)
    trainer.fit(model, datamodule = dm)
    return trainer.callback_metrics["val_loss"].item()

pruner = optuna.pruners.MedianPruner()
sampler = optuna.samplers.TPESampler(seed=1)

study = optuna.create_study(direction="minimize", pruner=pruner, sampler = sampler)
study.optimize(objective, n_trials=train_config["number_of_trials"], show_progress_bar=False)        

model_name = "StandardRegressor_" + experiment
print("Best trial:")
trial = study.best_trial
print("  Validation loss: {}".format(trial.value))
print("  Parameters: ")
for key, value in trial.params.items():
   print("    {}: {}".format(key, value))

    
# Train model with optimized hyperparameters

final_model = StandardRegressor(
    n_features = n_features,
    n_classes = n_classes,
    hidden_dim = study.best_params["hidden_dim"],
    dropout = study.best_params["dropout"],
    criterion = train_config['criterion'],
    learning_rate = study.best_params["learning_rate"]
)
dm_final = FairClfDataModule(  
    num_workers = train_config['num_workers'],
    data_dir = data_location,
    label_col = "Y",
    batch_size = study.best_params["batch_size"],
    mode = "prediction"
)


logger = TensorBoardLogger(save_location, name = model_name)

checkpoint_callback = ModelCheckpoint(
    monitor="val_loss",
    dirpath= save_location + "/" + model_name,
    filename= model_name + "_checkpoints",
    save_top_k=1,
    mode="min",
)

early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=0.00, patience=20, verbose=False, mode="min", check_on_train_epoch_end=True)

final_trainer = pl.Trainer(
max_epochs=train_config['max_epochs'],
logger = logger,
callbacks = [checkpoint_callback, early_stop_callback],
deterministic=True,
accelerator="cpu",
log_every_n_steps=20
)

final_trainer.fit(final_model, dm_final)

trainer = pl.Trainer(
max_epochs=train_config['max_epochs'],
gpus=0,
logger = logger,
callbacks = [checkpoint_callback, early_stop_callback],
deterministic=True,
accelerator="cpu"
)

final_trainer.fit(final_model, dm_final)

### B) Fair Regressor

In [None]:
experiment = "prison"
model_location = "../models/"
standard = StandardRegressor.load_from_checkpoint(model_location + "StandardRegressor_"+ experiment + "/StandardRegressor_"+ experiment + "_checkpoints.ckpt")


train_config = dict(
    # network architecture
    criterion = nn.MSELoss,
    hidden_dim = standard.hparams["hidden_dim"],
    dropout = standard.hparams["dropout"],
    # fairness parameters
    sensitivity_parameter = 2.0,
    fairness_constraints = [0.1, 0.1, 0.1],   # gamma for restricting DE, IE, SE
    # training
    batch_size = 32,
    learning_rate = 0.0001,
    max_epochs = 200,                                                                                                      
    num_workers = 0,
    nested_epochs_lagrangian = 3,    
    # optimization
    number_of_trials = 1
)

In [None]:
seed(5)

experiment = "prison"

data_location = "../data/prison/prison_dataframe"
save_location = "../models"

with open(data_location, "rb") as input:
    full_data = pickle.load(input)
full_data = full_data.drop(["USE", "UIE", "UDE"],axis=1)

n_features = full_data.drop("Y", axis = 1).shape[1]
sensitive_attributes = full_data["A"].unique()
node_names = full_data.drop(["Y"],axis=1).columns.values

train_data = full_data.iloc[:int(0.6*full_data.shape[0]),:]

checkpointpath_density = save_location + "/density_estimator_" + experiment + "/density_estimator_" + experiment + "_checkpoints.ckpt"

model_name = "FairRegressor_" + experiment + "_sensitivity_" + str(train_config["sensitivity_parameter"])

    
final_model = FairRegressor(
    n_features = n_features,
    n_classes = 1,
    train_data = train_data,
    hidden_dim = train_config["hidden_dim"],
    dropout = train_config["dropout"],
    batch_size=train_config["batch_size"],
    criterion_pred = train_config['criterion'],
    learning_rate = train_config["learning_rate"],
    sensitive_attributes = sensitive_attributes,
    sensitivity_param= train_config["sensitivity_parameter"],
    checkpointpath_density=checkpointpath_density,
    constraints = train_config['fairness_constraints'],
    nested_epochs = train_config['nested_epochs_lagrangian'],
    column_names = node_names
)
dm_final = FairClfDataModule(  
    num_workers = train_config['num_workers'],
    data_dir = data_location,
    label_col = "Y",
    batch_size = train_config["batch_size"],
    mode = "prediction"
)
logger = TensorBoardLogger(save_location, name = model_name)
    
checkpoint_callback = ModelCheckpoint(
    monitor="val_loss",
    dirpath= save_location + "/" + model_name,
    filename= model_name + "_checkpoints",
    save_top_k=5,
    mode="min",
)
early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=0.00, patience=5, verbose=False, mode="min", check_on_train_epoch_end=True)

final_trainer = pl.Trainer(
    max_epochs=train_config['max_epochs'],
    logger = logger,
    callbacks = [checkpoint_callback, early_stop_callback],
    deterministic=True,
    accelerator="cpu"
)

final_trainer.fit(final_model, dm_final)