In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
from collections import Counter
import cvxpy as cp

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
import torch.nn.functional as F
from tqdm.notebook import tqdm
import higher
import pandas as pd
import lale.lib.aif360

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, roc_auc_score

In [None]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    
torch.cuda.is_available()

In [None]:
dataset = "bank"
# "adult", bank, us_crime, student_por, default_credit, titanic"
sensitive = 0

class Args:
    res_epochs = 50
    momentum = 0.9
    weight_decay = 5e-4
    num_seeds = 5
    if dataset == 'adult' and sensitive == 1: # sex
        lr = 1e-3
        p_drop = 0.2
        batch_size = 512
        bargain_epochs = 15
    elif dataset == 'adult' and sensitive == 0: # race
        lr = 5e-4
        p_drop = 0.4
        batch_size = 512
        bargain_epochs = 15
    elif dataset == 'bank':
        lr = 1e-3 
        p_drop = 0.3
        batch_size = 512
        bargain_epochs = 15
    elif dataset == 'us_crime':
        lr = 1e-4 
        p_drop = 0.2
        batch_size = 32 
        bargain_epochs = 15
    elif dataset == 'default_credit':
        res_epochs = 100
        lr = 1e-3
        p_drop = 0.5
        batch_size = 512
        bargain_epochs = 100
    elif dataset == 'student_por':
        lr = 1e-3 
        p_drop = 0.05
        batch_size = 32
        bargain_epochs = 10
    elif dataset == 'titanic':
        lr = 1e-3 
        p_drop = 0.4
        batch_size = 128
        bargain_epochs = 50
args=Args()

In [None]:
dataset_names = {
    "adult": "adult",
    "bank": "bank",
    "default_credit": "default_credit",
    "titanic": "titanic",
    "student_por": "student_por",
    "tae": "tae",
    "us_crime": "us_crime",
}

def try_fetch(dataset_name):
    long_name = dataset_names[dataset_name]
    fetcher_function = getattr(lale.lib.aif360.datasets, f"fetch_{long_name}_df")
    try:
        X, y, fairness_info = fetcher_function()
    except SystemExit:
        print(f"skipping {dataset_name} because it is not downloaded")
        return None
    return X, y, fairness_info

In [None]:
X_pre, y, fairness_info = try_fetch(dataset)
y_true = np.asarray((y == fairness_info['favorable_labels'][0]).astype(int))
n_classes = y_true.max()+1
X_pre = X_pre.reset_index(drop=True)
sesitive_feature = [fairness_info['protected_attributes'][i]['feature'] for i in range(len(fairness_info['protected_attributes']))]
if dataset == 'titanic':
    X_pre.drop(columns="name", inplace=True)
    X_pre = X_pre.fillna(0)

In [None]:
# start with the first sensitive feature for experiment
current_sensitive = sesitive_feature[sensitive]
sensitive_attr = X_pre[current_sensitive]

if current_sensitive == 'age':
    sensitive_attr = sensitive_attr > fairness_info['protected_attributes'][sensitive]['reference_group'][0][0]
    sensitive_attr = sensitive_attr.astype(int)
    sensitive_attr = sensitive_attr.rename('age')
    
X = pd.get_dummies(X_pre)

In [None]:
X = X.sort_index(axis=1)
unique_groups = sensitive_attr.unique()
indict = np.arange(sensitive_attr.shape[0])
scaler = StandardScaler().fit(X)
X = scaler.transform(X)

In [None]:
def load_combined_arrays(file_path):
    """
    Load three numpy arrays from a single file.

    Parameters:
    file_path (str): The path of the file containing the arrays.

    Returns:
    tuple: A tuple containing three numpy arrays.
    """

    # Load the arrays from the file
    data = np.load(file_path)
    return data['ind_train'], data['ind_test'], data['val_idx']

ind_train, ind_test, val_idx = load_combined_arrays(f'./data_idx/{dataset}/{current_sensitive}/combined_arrays.npz')

In [None]:
X_train, X_test, X_val = X[ind_train], X[ind_test], X[val_idx]
y_train, y_test, y_val = y_true[ind_train], y_true[ind_test], y_true[val_idx]
s_train, s_test, s_val = sensitive_attr[ind_train].values, sensitive_attr[ind_test].values, sensitive_attr[val_idx].values
val_size = len(val_idx)


In [None]:
print('\nTrain Distribution')
print(sensitive_attr[ind_train].value_counts())
for i in range(len(unique_groups)):
    print(unique_groups[i], Counter(y_train[sensitive_attr[ind_train] == unique_groups[i]]))


print('\nTest Distribution')
print(sensitive_attr[ind_test].value_counts())
for i in range(len(unique_groups)):
    print(unique_groups[i], Counter(y_test[sensitive_attr[ind_test] == unique_groups[i]]))



print('\nVal(Meta) Distribution')
print(sensitive_attr[val_idx].value_counts())
print(Counter(y_val))
for i in range(len(unique_groups)):
    print(unique_groups[i], Counter(y_val[sensitive_attr[val_idx] == unique_groups[i]]))

In [None]:
class PandasDataSet(TensorDataset):
    
    def __init__(self, *dataframes):
        tensors = (self._df_to_tensor(df) for df in dataframes)
        super(PandasDataSet, self).__init__(*tensors)

    def _df_to_tensor(self, df):
        if isinstance(df, np.ndarray):
            return torch.from_numpy(df).float()
        return torch.from_numpy(df.values).float()

    
train_data = PandasDataSet(X_train, y_train, ind_train)
train_loader = DataLoader(train_data, batch_size=args.batch_size, shuffle=True, drop_last=True)

In [None]:
def fair_eval(y_test,y_pred_labels,unique_groups):
    AUC = []
    TPR = []
    FPR = []
    F1 = []
    for group in unique_groups:
        mask = sensitive_attr[ind_test] == group
        auc_roc = roc_auc_score(y_test[mask], y_pred_labels[mask])
        AUC.append(auc_roc)
        tn, fp, fn, tp = confusion_matrix(y_test[mask], y_pred_labels[mask]).ravel()
        tpr = tp / (tp + fn + 1e-8)  # True Positive Rate
        TPR.append(tpr)
        fpr = fp / (fp + tn + 1e-8)  # False Positive Rate
        FPR.append(fpr)
        F1.append(2*tp/(2*tp+fn+fp))
    # other_mask = sensitive_attr[ind_test] != unique_groups[AUC.index(min(AUC))]
    # other_AUC = roc_auc_score(y_test[other_mask], y_pred_labels[other_mask])
    return AUC, TPR, FPR, F1 #, other_AUC

n_hid = 64 #512
def bargaining_fail_eval(grads, overall_grad):
    similarity = np.asarray([torch.dot(overall_grad,v).item() for v in grads.values()])
    if (similarity <= 0).sum():
        return 1, similarity
    else:
        return 0, similarity

bargain_groups = 2*len(unique_groups)

def mean_std(group_mets):
    means = np.mean(np.asarray(group_mets), axis=0)
    stds = np.std(np.asarray(group_mets), axis=0)
    return(np.asarray(list(zip(np.min(means, axis=1), stds[np.arange(len(means)), np.argmin(means, axis=1)]))))

def _stop_criteria(gtg, alpha_t):
    return (
        (alpha_param.value is None)
        or (np.linalg.norm(gtg @ alpha_t - 1 / (alpha_t + 1e-10)) < 1e-3)
        or (
            np.linalg.norm(alpha_param.value - prvs_alpha_param.value)
            < 1e-3
        )
    )


def return_weights(grads, prvs_alpha):
    G = torch.stack(tuple(v for v in grads.values()))
    GTG = torch.mm(G, G.t())
    normalization_factor = (
        torch.norm(GTG).detach().cpu().numpy().reshape((1,)) + 1e-6
        )
    if (np.isnan(normalization_factor) | np.isinf(normalization_factor)).any():
        normalization_factor = np.array([1.0])
    GTG = GTG / normalization_factor.item()
    gtg = GTG.cpu().detach().numpy()
    G_param.value = gtg
    normalization_factor_param.value = normalization_factor

    optim_niter=100
    alpha_t = prvs_alpha
    for _ in range(optim_niter):
        try:
            alpha_param.value = alpha_t
            prvs_alpha_param.value = alpha_t
            # try:
            prob.solve(solver=cp.ECOS, warm_start=True, max_iters=100)
        except:
            alpha_param.value = prvs_alpha_param.value

        if _stop_criteria(gtg, alpha_t):
            break

        alpha_t = alpha_param.value
    if alpha_t is not None and not (np.isnan(alpha_t) | np.isinf(alpha_t)).any():
        return alpha_t
    else:
        return prvs_alpha
    
    
import warnings
warnings.filterwarnings('ignore')

def F1Loss(y_f, y):
    y = F.one_hot(y, num_classes=2)
    activation = nn.Sigmoid()
    #compute TP,FP,FN,TN
    tp = activation(y_f)*y
    fp = activation(y_f)*(1-y)
    fn = (1-activation(y_f))*y
    tn = (1-activation(y_f))*(1-y)
    return activation(torch.sum(-2*tp/(2*tp+fn+fp),axis=1))

In [None]:
class Classifier(nn.Module):
    
    def __init__(self, n_features, n_class=2, n_hidden=64, p_dropout=args.p_drop):
        super(Classifier, self).__init__()
        self.network = nn.Sequential(
            nn.Linear(n_features, n_hidden*2),
            nn.ReLU(),
            nn.Dropout(p_dropout),
            nn.Linear(n_hidden*2, n_hidden),
            nn.ReLU(),
            nn.Dropout(p_dropout),
            nn.Linear(n_hidden, n_class),
        )

    def forward(self, x):
        return self.network(x)

In [None]:
import os
import json

def save_metrics(method, metrics, meta_methods, new_nego_rec=None):
    """
    Saves the given metrics to files in a directory specific to the method.
    
    :param method: The name of the method, used to create a directory.
    :param metrics: A dictionary containing the metrics to be saved.
    :param meta_methods: A list of methods considered as meta methods.
    :param new_nego_rec: Additional metrics specific to meta methods.
    """
    # Create a directory for the method if it doesn't exist
    directory = f"./results/{dataset}/{current_sensitive}/{method}/{seed}"
    if not os.path.exists(directory):
        os.makedirs(directory)

    # If the method is a meta method, add new_nego_rec to the metrics
    if method in meta_methods and new_nego_rec is not None:
        arr = 1-np.asarray(new_nego_rec)
        arr = np.asarray(arr.reshape(-1,len(train_loader)).sum(axis=1))/len(train_loader)
        metrics['nego_success'] = arr

    # Iterate over the metrics and save each one in a separate file
    for metric_name, metric_values in metrics.items():
        file_path = os.path.join(directory, f"{metric_name}.json")
        with open(file_path, 'w') as file:
            # Convert numpy arrays to lists for JSON serialization
            if isinstance(metric_values, np.ndarray):
                metric_values = metric_values.tolist()
            json.dump(metric_values, file)

    print(f"Metrics for {method} saved in {directory}")


In [None]:
classic_methods = ['baseline', 'DRO']
meta_methods = ['loss', 'FORML', 'meta_group_dro', 'nash_loss', 'nash_meta_group_dro', 'nash_FORML']
methods = ['baseline', 'DRO', 'FORML', 'nash_FORML', 'loss', 'nash_loss', 'meta_group_dro', 'nash_meta_group_dro']


weight_bargain_all, weight_post_bargain_all = {}, {}
suweight_bargain_all, suweight_post_bargain_all = {}, {}
nego_sims_all = {}

for method in methods:
    gpu = 0
    epoch_AUCs, group_AUCs, new_nego_recs = [], [], []
    
    nego_sims = []
    weight_bargain, weight_post_bargain = [], []
    suweight_bargain, suweight_post_bargain = [], []
    
    for seed in range(args.num_seeds):
        torch.cuda.empty_cache()
        n_features = X.shape[1]
        model = Classifier(n_features=n_features, n_hidden=n_hid,n_class=n_classes)
        device = torch.device(f'cuda:{gpu}')
        set_seed(seed)
        model.to(device)

        # Define loss function and optimizer
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
        if method in meta_methods:
            new_nego_rec = []
            nego_sim = []

        # Training loop
        epoch_AUC, group_AUC, group_TPR, group_FPR, group_F1 = [], [], [], [], []
        F_group_AUC, S_group_AUC = [], []
        
        for ep in tqdm(range(1, args.res_epochs+1)):
            model.train()  # Set model to training mode
            
            for _, (inputs, labels, ind) in enumerate(train_loader):
                inputs, labels = inputs.to(device=device, non_blocking=True),\
                                labels.to(device=device, non_blocking=True)

                if 'nash' in method:
                    #### Convex Optimization Problem (bargaining game) Initialization ####
                    # Optimization Variable (current alpha)
                    alpha_param = cp.Variable(shape=(bargain_groups,), nonneg=True) # shape: [K,]
                    # G^T G: gradient matrix product
                    init_gtg = np.eye(bargain_groups) # shape: [K, K]
                    G_param = cp.Parameter(
                        shape=(bargain_groups, bargain_groups), value=init_gtg
                        ) # will be updated in-loop with the current GTG
                    # Normalization parameter for the Phi_alpha_(tao)
                    normalization_factor_param = cp.Parameter(
                        shape=(1,), value=np.array([1.0])
                        ) # will be updated in-loop with torch.norm(GTG).detach().cpu().numpy().reshape((1,))
                    # Alpha from iteration 'Tao'
                    prvs_alpha = np.ones(bargain_groups, dtype=np.float32)
                    prvs_alpha_param = cp.Parameter(
                        shape=(bargain_groups,), value=prvs_alpha
                        ) # shape: [K,]
                    # First-order approximation of Phi_alpha using Phi_alpha_(tao)
                    G_prvs_alpha = G_param @ prvs_alpha_param
                    prvs_phi_tag = 1 / prvs_alpha_param + (1 / G_prvs_alpha) @ G_param
                    phi_alpha = prvs_phi_tag @ (alpha_param - prvs_alpha_param)
                    # Beta(alpha)
                    G_alpha = G_param @ alpha_param
                    # Constraint: For any i, Phi_i_alpha >= 0
                    constraint = []
                    for i in range(bargain_groups):
                        constraint.append(
                            -cp.log(alpha_param[i] * normalization_factor_param)
                            - cp.log(G_alpha[i])
                            <= 0
                        )
                    obj = cp.Minimize(
                        cp.sum(G_alpha) + phi_alpha / normalization_factor_param
                    )
                    prob = cp.Problem(obj, constraint)
                    #####################################################

                if method in meta_methods:
                    # 0. entering the meta-loop for calculating the meta-weights for the outer
                    with higher.innerloop_ctx(model, optimizer) as (meta_model, meta_opt):
                        # 1. Update meta model on training data
                        meta_train_outputs = meta_model(inputs)
                        criterion.reduction = 'none'
                        meta_train_loss = criterion(meta_train_outputs, labels.to(torch.int64))

                        eps = torch.zeros(meta_train_loss.size(), requires_grad=True, device=device)

                        meta_train_loss = torch.sum(eps * meta_train_loss)
                        meta_opt.step(meta_train_loss)

                        # 2. Compute grads of eps on meta validation data
                        meta_inputs, meta_labels =  torch.from_numpy(X_val).float().to(device), torch.from_numpy(y_val).float().to(device)
                        meta_inputs, meta_labels = meta_inputs.to(device=device, non_blocking=True),\
                                            meta_labels.to(device=device, non_blocking=True)

                        meta_val_outputs = meta_model(meta_inputs)
                        criterion.reduction = 'none'
                        meta_val_loss = criterion(meta_val_outputs, meta_labels.to(torch.int64))
                        meta_val_loss += F1Loss(meta_val_outputs, meta_labels.to(torch.int64))
                        bargain_group_loss = meta_val_loss.view(-1,val_size//bargain_groups).mean(axis=1)
                        grads = {}
                        for sensi_idx in range(bargain_groups):
                            grads[sensi_idx] = torch.autograd.grad(bargain_group_loss[sensi_idx], eps, create_graph=True)[0].detach()
                        ###################
                        
                        if method == 'loss':
                            eps_grads = torch.autograd.grad(meta_val_loss.mean(), eps)[0].detach()
                        elif method == 'FORML':
                            val_group_loss = meta_val_loss.view(-1,val_size//len(unique_groups)).mean(axis=1)
                            val_dif_loss = val_group_loss.max() - val_group_loss.min()
                            eps_grads = torch.autograd.grad(val_dif_loss, eps)[0].detach()
                        elif method == 'meta_group_dro':
                            val_group_loss = meta_val_loss.view(-1,val_size//len(unique_groups)).mean(axis=1)
                            val_dif_loss = val_group_loss.max()
                            eps_grads = torch.autograd.grad(val_dif_loss, eps)[0].detach()
                        elif 'nash' in method:
                            if ep<args.bargain_epochs:
                                prvs_alpha = return_weights(grads, prvs_alpha)
                                if np.all(prvs_alpha == 1):
                                    # Bargaining failed, initiate default
                                    if method == 'nash_meta_group_dro':
                                        nash_val_loss = meta_val_loss.view(-1,val_size//(len(unique_groups))).mean(axis=1).max()
                                        eps_grads = torch.autograd.grad(nash_val_loss, eps)[0].detach()
                                    elif method == 'nash_FORML':
                                        val_group_loss = meta_val_loss.view(-1,val_size//len(unique_groups)).mean(axis=1)
                                        nash_val_loss = val_group_loss.max() - val_group_loss.min()
                                        eps_grads = torch.autograd.grad(nash_val_loss, eps)[0].detach()
                                else:
                                    nash_val_loss = sum([bargain_group_loss[i] * prvs_alpha[i] for i in range(len(prvs_alpha))])
                                    eps_grads = torch.autograd.grad(nash_val_loss, eps)[0].detach()
                            else:
                                if method == 'nash_loss':
                                    eps_grads = torch.autograd.grad(meta_val_loss.mean(), eps)[0].detach()
                                elif method == 'nash_meta_group_dro':
                                    val_group_loss = meta_val_loss.view(-1,val_size//len(unique_groups)).mean(axis=1)
                                    val_dif_loss = val_group_loss.max()
                                    eps_grads = torch.autograd.grad(val_dif_loss, eps)[0].detach()
                                elif method == 'nash_FORML':
                                    val_group_loss = meta_val_loss.view(-1,val_size//len(unique_groups)).mean(axis=1)
                                    val_dif_loss = val_group_loss.max() - val_group_loss.min()
                                    eps_grads = torch.autograd.grad(val_dif_loss, eps)[0].detach()

                        else:
                            raise NotImplementedError
                        bargaining_fail,_ = bargaining_fail_eval(grads, eps_grads)
                        new_nego_rec.append(bargaining_fail)

                # Forward pass
                outputs = model(inputs)
                criterion.reduction = 'none'
                minibatch_loss = criterion(outputs, labels.to(torch.int64))


                # 3. Compute weights for current training batch
                if method in classic_methods:
                    w_tilde = torch.ones_like(minibatch_loss)
                else:
                    w_tilde = torch.clamp(-eps_grads, min=0)

                l2_norm = torch.norm(w_tilde)
                if l2_norm != 0:
                    w = w_tilde / l2_norm
                else:
                    w = w_tilde


                optimizer.zero_grad()
                if method == 'DRO':
                    k = int(len(inputs)*0.4)
                    top_k_losses,_ = torch.topk(minibatch_loss, k)
                    loss = top_k_losses.sum()
                    loss.backward()
                    optimizer.step()

                # Backward pass and optimization
                else:
                    minibatch_loss = torch.sum(w * minibatch_loss)
                    minibatch_loss.backward()
                    optimizer.step()
                
            # inference after each epoch
            model.eval()
            with torch.no_grad(): 
                y_pred = model(torch.from_numpy(X_test).float().to(device))
                y_pred_labels = y_pred.argmax(dim=1).detach().cpu().numpy()
                print(f'|Test AUC: {roc_auc_score(y_test, y_pred_labels):.3f}|', end='\r')
                epoch_AUC.append(roc_auc_score(y_test, y_pred_labels))

                # Fair Metrics
                AUC, TPR, FPR, F1 = fair_eval(y_test,y_pred_labels,unique_groups)
                group_AUC.append(AUC)
                group_TPR.append(TPR)
                group_FPR.append(FPR)
                group_F1.append(F1)
                
        weight_bargain_all[method] = weight_bargain
        weight_post_bargain_all[method] = weight_post_bargain
        suweight_bargain_all[method] = suweight_bargain
        suweight_post_bargain_all[method] = suweight_post_bargain
        
        # Save metrics
        metrics_to_save = {
            'epoch_AUC': np.asarray(epoch_AUC),
            'group_AUC': np.asarray(group_AUC),
            'group_TPR': np.asarray(group_TPR),
            'group_FPR': np.asarray(group_FPR),
            'group_F1': np.asarray(group_F1)
        }
        if method in meta_methods:
            save_metrics(method, metrics_to_save, meta_methods, new_nego_rec=np.asarray(new_nego_rec))
        else:
            save_metrics(method, metrics_to_save, meta_methods)
        
        if method in meta_methods:
            new_nego_recs.append(new_nego_rec)
            nego_sims.append(nego_sim)
        epoch_AUCs.append(epoch_AUC)
        group_AUCs.append(group_AUC)
        

    print('\nFinal Results of the {}:'.format(method))
    print('Overall AUC:   {:.3f}±{:.3f}'.format(np.mean(np.asarray(epoch_AUCs)[:,-1]), np.std(np.asarray(epoch_AUCs)[:,-1])))
    print('Group-wise AUC:')
    AUCs = np.asarray(group_AUCs)[:,-1]
    for i in range(len(np.mean(AUCs, axis=0))):
        print('   {:.3f}±{:.3f} ---- {}'.format(np.mean(AUCs, axis=0)[i], np.std(AUCs, axis=0)[i], unique_groups[i]))

In [None]:
def get_metric_data(data, metric, method):
    return np.array(data[method][metric])


def load_and_compute_metrics(base_path, dataset, sensitive_attribute, methods, seeds):
    # Initialize data structure
    data = {method: {metric: [] for metric in ["average_AUC", "TPRD", "group_AUC_disparity", "worst_group_AUC"]} for method in methods}
    # Load and compute metrics for each method and seed
    for method in methods:
        if method not in ['baseline',"DRO"]:
                data[method]["nego_success"] = []
        for seed in seeds:
            # seed_path = os.path.join(base_path, dataset+"_new", sensitive_attribute, method, str(seed))
            seed_path = os.path.join(base_path, dataset, sensitive_attribute, method, str(seed))
            
            # Load group metrics_new
            group_metrics = {}
            for metric in ["AUC", "TPR"]:
                with open(os.path.join(seed_path, f"group_{metric}.json"), 'r') as file:
                    group_metrics[metric] = np.array(json.load(file))
            
            # Load epoch AUC
            with open(os.path.join(seed_path, "epoch_AUC.json"), 'r') as file:
                epoch_AUC = json.load(file)
            
            # Store average AUC
            data[method]["average_AUC"].append(epoch_AUC)
            
            if method not in ['baseline',"DRO"]:
                # Load epoch negotiation successrate
                with open(os.path.join(seed_path, "nego_success.json"), 'r') as file:
                    nego_success = json.load(file)
                data[method]["nego_success"].append(nego_success)
            
            
            
            # Initialize temporary lists for each metric
            temp_TPRD = []
            temp_group_AUC_disparity = []
            temp_worst_group_AUC = []

            # Compute and store other metrics for each epoch
            for epoch in range(len(epoch_AUC)):
                # TPRD (True Positive Rate Disparity)_new
                # print(group_metrics['TPR'][epoch])
                TPRD = group_metrics['TPR'][epoch].max() - group_metrics['TPR'][epoch].min()
                temp_TPRD.append(TPRD)

                # Group AUC Disparity
                group_AUC_disparity = group_metrics['AUC'][epoch].max() - group_metrics['AUC'][epoch].min()
                temp_group_AUC_disparity.append(group_AUC_disparity)

                # Worst Group's AUC
                worst_group_AUC = group_metrics['AUC'][epoch].min()
                temp_worst_group_AUC.append(worst_group_AUC)
            
            # Append the temporary lists to the main data structure
            data[method]["TPRD"].append(temp_TPRD)
            data[method]["group_AUC_disparity"].append(temp_group_AUC_disparity)
            data[method]["worst_group_AUC"].append(temp_worst_group_AUC)
    
    # Convert lists to arrays for easier manipulation
    for method in methods:
        for metric in data[method]:
            data[method][metric] = np.array(data[method][metric])

    return data

# Load data
base_path = "./results"
seeds = range(args.num_seeds)
data = load_and_compute_metrics(base_path, dataset, current_sensitive, methods, seeds)

In [None]:
for metric in ["average_AUC", "group_AUC_disparity", "worst_group_AUC"]:
    for i in range(len(methods)):
        method = methods[i]
        metric = metric
        metric_data = get_metric_data(data, metric, method)
        
        performace_data = get_metric_data(data, "average_AUC", method)
        best_idx = np.argmax(performace_data.mean(axis=0))
        print(method, metric, f'{metric_data.mean(axis=0)[best_idx]:.3f}', f'{metric_data.std(axis=0)[best_idx]:.3f}')
    print("\n")