In [1]:
import json
import os

import numpy as np
import pandas as pd 

from bin.dataset import Dataset
from bin.experiment import Experiment
from bin.metrics import Metrics

from collections import defaultdict

from models.LR import Lr
from models.reduction import Reduction
from models.reweight import Reweight
from models.fair_reduction import FairReduction

from scipy.special import xlog1py
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_auc_score, f1_score, precision_score, accuracy_score, recall_score
from fairlearn.metrics import (
    MetricFrame, plot_model_comparison,
    selection_rate, demographic_parity_difference, demographic_parity_ratio,
    false_positive_rate, false_negative_rate,
    false_positive_rate_difference, false_negative_rate_difference,true_positive_rate, 
    equalized_odds_difference)

import warnings
warnings.filterwarnings('ignore')


In [2]:
model = {
    "FR": FairReduction
}

In [3]:
def read_config(path):
    """Reads the config file and returns a dictionary."""
    try:
        with open(path) as f:
            config = json.load(f)
    except FileNotFoundError:
        print("Config file not found.")
        config = None
    return config

def load_csv(path):
    """Loads the csv file and returns a dataframe."""
    try:
        df = pd.read_csv(path)
    except FileNotFoundError:
        print("CSV file not found.")
        df = None
    return df

In [4]:
exp_conf = "configs/adult_noisy.json"
# exp_conf = "configs/compas_noisy.json"
# exp_conf = "configs/synthetic_20_noisy.json"
# exp_conf = "configs/income_noisy.json"
# exp_conf = "configs/baseline_config.json"

EXP = read_config(exp_conf)

In [5]:
datasets = {}
try:
    for name, value in EXP.items():
        # if name in ['adult_bias_0.1','adult_bias_0.3']: continue
        # if name in ['COMPAS_balanced_0.1']: continue
        # if not name in ['income_balanced_0.1']: continue
        # if name in ['synthetic_20_balanced_0.1']: continue
        # print(f"{name=} and {value=}")
        if (not name in [
            'adult_flip_0.3'
        ]): continue
        datasets[name] = Dataset(value)
        datasets[name].calculate_probabilities("fair")
        datasets[name].calculate_probabilities("emp")

except Exception as e:
    print(e)

In [6]:
datasets

{'adult_flip_0.3': <bin.dataset.Dataset at 0x174bf5310>}

In [7]:
# Run Fair Reduction on every set 
base_path = 'results'
for exp_name, data_obj in datasets.items():
    # if exp_name in ['adult_bias_0.1', 'adult_bias_0.3', 'adult_flip_0.1']: continue
    print(exp_name)
    eval_labels =  data_obj.eval_labels()
    meta = {"name": data_obj.name, "noise": data_obj.noise_type, "level": data_obj.noise_level}
    res = []
    
    pred_dict = defaultdict(dict)
    for fold, _data in data_obj.foldwise_data.items():
        print(fold)
        train_data, test_data = _data['train'], _data['test']
        x_train, x_test = train_data.drop(data_obj.cols_to_drop, axis = 1,errors='ignore'), test_data.drop(data_obj.cols_to_drop, axis = 1,errors='ignore')
        y_train, y_test = train_data[data_obj.label], test_data[data_obj.label]
        sv_train, sv_test = train_data[data_obj.sensitive_attribute], test_data[data_obj.sensitive_attribute]
        wts = {}
        # if 'prob' in train_data.columns:
        #     w_dict = {
        #         "weights": train_data['prob'], 
        #         "p_y" : data_obj.fair_prob_map[fold]['p_y'],
        #         "p_y_s": data_obj.fair_prob_map[fold]['p_y_s']
        #     }
        #     wts['fair'] = w_dict

        if 'emp_prob' in train_data.columns:
            
            w_dict = {
                "weights": train_data['emp_prob'], 
                "p_y" :  data_obj.emp_prob_map[fold]['train']['p_y'],
                "p_y_s": data_obj.emp_prob_map[fold]['train']['p_y_s'],
            }
            wts['emp'] = w_dict
        
        for w_name, w in wts.items():
            print(w_name)
            # w_dict = {
            #     "weights": w, 
            #     "p_y" : data_obj.p_y,
            #     "p_y_s": data_obj.p_y_s
            # }
            # w_dict = {
            #     "weights": w['weight'], 
            #     "p_y" : data_obj.fair_prob_map[fold]['p_y'],
            #     "p_y_s": data_obj.fair_prob_map[fold]['p_y_s']
            # }
            _model = FairReduction().fit(x_train, y_train, sv_train, weights = w)
            y_pred = _model.predict(x_test)
            pred_path = os.path.join(base_path, "fair_reduction", data_obj.name, data_obj.exp_name, f"FR_{w_name}", fold)
            os.makedirs(pred_path, exist_ok=True)
            pd.DataFrame(y_pred).to_csv(os.path.join(pred_path, "preds_2.csv"))
            pred_dict[w_name][fold] = y_pred
            meta['fold'] = fold
            meta['weights'] = w_name
            for eval_type, _label in eval_labels.items():
                if eval_type != 'ground': continue
                meta['eval_type'] = eval_type
                perf_dict = Metrics().performance_metrics(_label[fold], y_pred, 0.5, meta)
                fair_dict = Metrics().fairness_metrics(_label[fold], y_pred, sv_test, threshold =  0.5, meta = perf_dict)
                print(fair_dict)
                res.append(fair_dict)
    save_path = os.path.join(base_path, data_obj.name,data_obj.exp_name)
    os.makedirs(save_path, exist_ok=True)
    pd.DataFrame(res).to_csv(os.path.join(base_path, data_obj.name,data_obj.exp_name, "fair_reduction_2.csv"))

adult_flip_0.3
x1
emp


NameError: name 'e_y_0' is not defined