In [41]:
import json
import os

import numpy as np
import pandas as pd 

from bin.dataset import Dataset
from bin.experiment import Experiment
from bin.metrics import Metrics

from collections import defaultdict

from models.LR import Lr
from models.reduction import Reduction
from models.reweight import Reweight
from models.fair_reduction import FairReduction

from scipy.special import xlog1py
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_auc_score, f1_score, precision_score, accuracy_score, recall_score
from fairlearn.metrics import (
    MetricFrame, plot_model_comparison,
    selection_rate, demographic_parity_difference, demographic_parity_ratio,
    false_positive_rate, false_negative_rate,
    false_positive_rate_difference, false_negative_rate_difference,true_positive_rate, 
    equalized_odds_difference)

import warnings
warnings.filterwarnings('ignore')


In [42]:
model = {
    "LR": Lr, 
    "Reduction": Reduction,
    "Reweight": Reweight
}

## UTILS

In [43]:
def read_config(path):
    """Reads the config file and returns a dictionary."""
    try:
        with open(path) as f:
            config = json.load(f)
    except FileNotFoundError:
        print("Config file not found.")
        config = None
    return config

def load_csv(path):
    """Loads the csv file and returns a dataframe."""
    try:
        df = pd.read_csv(path)
    except FileNotFoundError:
        print("CSV file not found.")
        df = None
    return df

## MAIN

In [44]:
# exp_conf = "configs/adult_noisy.json"
# exp_conf = "configs/compas_noisy.json"
# exp_conf = "configs/synthetic_20_noisy.json"
exp_conf = "configs/income_noisy.json"
# exp_conf = "configs/baseline_config.json"

EXP = read_config(exp_conf)

In [45]:
EXP.keys()

dict_keys(['income_bias_0.1', 'income_bias_0.3', 'income_flip_0.1', 'income_flip_0.3', 'income_balanced_0.1', 'income_balanced_0.3'])

In [46]:
datasets = {}
try:
    for name, value in EXP.items():
        # if name in ['adult_bias_0.1','adult_bias_0.3']: continue
        # if name in ['COMPAS_balanced_0.1']: continue
        if not name in ['income_balanced_0.1', 'income_balanced_0.3']: continue
        # if name  not in ['synthetic_20_balanced_0.1', 'synthetic_20_balanced_0.3']: continue
        datasets[name] = Dataset(value)
        # datasets[name].calculate_probabilities()

except Exception as e:
    print(e)

In [47]:
datasets

{'income_balanced_0.1': <bin.dataset.Dataset at 0x1656a3950>,
 'income_balanced_0.3': <bin.dataset.Dataset at 0x1655cd4d0>}

In [48]:
# Run audit on every set 
base_path = "results"
for exp_name, data_obj in datasets.items():
    print(exp_name)
    # if exp_name not in ['adult_bias_0.1','adult_bias_0.3']: continue
    eval_labels =  data_obj.eval_labels()
    meta = {"name": data_obj.name, "noise": data_obj.noise_type, "level": data_obj.noise_level}
    res = []
    pred_dict = defaultdict(dict)
    for fold, _data in data_obj.foldwise_data.items():
        print(fold)
        train_data, test_data = _data['train'], _data['test']
        x_train, x_test = train_data.drop(data_obj.cols_to_drop, axis = 1, errors='ignore'), test_data.drop(data_obj.cols_to_drop, axis = 1, errors='ignore')
        y_train, y_test = train_data[data_obj.label], test_data[data_obj.label]
        sv_train, sv_test = train_data[data_obj.sensitive_attribute], test_data[data_obj.sensitive_attribute]
        
        
        for m_name, clf in model.items():
            _model = clf().fit(x_train, y_train, sv_train)
            y_pred = _model.predict(x_test)
            pred_path = os.path.join(base_path, "audit", data_obj.name, data_obj.exp_name, m_name, fold)
            os.makedirs(pred_path, exist_ok=True)
            pd.DataFrame(y_pred).to_csv(os.path.join(pred_path, "preds.csv"))
            meta['fold'] = fold
            meta['model'] = m_name

            
            for eval_type, _label in eval_labels.items():
                print(eval_type)
                meta['eval_type'] = eval_type
                perf_dict = Metrics().performance_metrics(_label[fold], y_pred, 0.5, meta)
                fair_dict = Metrics().fairness_metrics(_label[fold], y_pred, sv_test, threshold =  0.5, meta = perf_dict)
                # expected_dict = Metrics.estimated_metrics()
                # print(fair_dict)
                res.append(fair_dict)
    save_path = os.path.join(base_path    
                             , data_obj.name,data_obj.exp_name)
    os.makedirs(save_path, exist_ok=True)
    pd.DataFrame(res).to_csv(os.path.join(save_path, "audit.csv"))

income_balanced_0.1
x1
fair_clean
emp_clean
noisy
ground
fair_clean
emp_clean
noisy
ground
fair_clean
emp_clean
noisy
ground
x2
fair_clean
emp_clean
noisy
ground
fair_clean
emp_clean
noisy
ground
fair_clean
emp_clean
noisy
ground
x3
fair_clean
emp_clean
noisy
ground
fair_clean
emp_clean
noisy
ground
fair_clean
emp_clean
noisy
ground
x4
fair_clean
emp_clean
noisy
ground
fair_clean
emp_clean
noisy
ground
fair_clean
emp_clean
noisy
ground
x5
fair_clean
emp_clean
noisy
ground
fair_clean
emp_clean
noisy
ground
fair_clean
emp_clean
noisy
ground
x6
fair_clean
emp_clean
noisy
ground
fair_clean
emp_clean
noisy
ground
fair_clean
emp_clean
noisy
ground
x7
fair_clean
emp_clean
noisy
ground
fair_clean
emp_clean
noisy
ground
fair_clean
emp_clean
noisy
ground
x8
fair_clean
emp_clean
noisy
ground
fair_clean
emp_clean
noisy
ground
fair_clean
emp_clean
noisy
ground
x9
fair_clean
emp_clean
noisy
ground
fair_clean
emp_clean
noisy
ground
fair_clean
emp_clean
noisy
ground
x10
fair_clean
emp_clean
noisy
grou