In [19]:
from bin.dataset import Dataset
from collections import defaultdict
from bin.metrics import Metrics


import json
import os

import numpy as np
import pandas as pd 


In [20]:
model = {
    "LR": None, 
    "Reduction": None,
    "Reweight": None
}

In [21]:
def read_config(path):
    """Reads the config file and returns a dictionary."""
    try:
        with open(path) as f:
            config = json.load(f)
    except FileNotFoundError:
        print("Config file not found.")
        config = None
    return config

def load_csv(path):
    """Loads the csv file and returns a dataframe."""
    try:
        df = pd.read_csv(path)
    except FileNotFoundError:
        print("CSV file not found.")
        df = None
    return df

In [22]:
# exp_conf = "configs/adult_noisy.json"
# exp_conf = "configs/compas_noisy.json"
# exp_conf = "configs/synthetic_20_noisy.json"
exp_conf = "configs/income_noisy.json"
# exp_conf = "configs/baseline_config.json"

EXP = read_config(exp_conf)

In [23]:
datasets = {}
try:
    for name, value in EXP.items():
        # if name in ['COMPAS_bias_0.1', 'COMPAS_balanced_0.1']: continue
        # if name in ['income_balanced_0.1', 'income_balanced_0.3']: continue
        # if not name in ['synthetic_20_balanced_0.1']: continue
        datasets[name] = Dataset(value)
        datasets[name].calculate_probabilities("fair")
        datasets[name].calculate_probabilities("emp")
        

except Exception as e:
    print(e)

In [24]:
datasets

{'income_bias_0.1': <bin.dataset.Dataset at 0x2a2314b10>,
 'income_bias_0.3': <bin.dataset.Dataset at 0x2a2160b50>,
 'income_flip_0.1': <bin.dataset.Dataset at 0x13d4968d0>,
 'income_flip_0.3': <bin.dataset.Dataset at 0x2a251a490>,
 'income_balanced_0.1': <bin.dataset.Dataset at 0x2a250da10>,
 'income_balanced_0.3': <bin.dataset.Dataset at 0x2a22fe150>}

In [26]:
# Run audit on every set 
base_path = "results"
for exp_name, data_obj in datasets.items():
    # if exp_name != 'adult_flip_0.1': continue
    print(exp_name)
    eval_labels =  data_obj.eval_labels()
    meta = {"name": data_obj.name, "noise": data_obj.noise_type, "level": data_obj.noise_level}
    res = []
    pred_dict = defaultdict(dict)
    wts = {}
    for fold, _data in data_obj.foldwise_data.items():
        # if fold != 'x2': continue
        print(fold)    
        sv_test =  _data['test'][data_obj.sensitive_attribute]
        if 'prob' in _data['train'].columns:
            w_dict = {
                "weights": _data['test']['prob'], 
                "p_y" : data_obj.fair_prob_map[fold]['p_y'],
                "p_y_s": data_obj.fair_prob_map[fold]['p_y_s']
            }
            wts['fair'] = w_dict

        if 'emp_prob' in _data['train'].columns:
            
            w_dict = {
                "weights": _data['test']['emp_prob'], 
                "p_y" :  data_obj.emp_prob_map[fold]['test']['p_y'],
                "p_y_s": data_obj.emp_prob_map[fold]['test']['p_y_s'],
            }
            wts['emp'] = w_dict
        for m_name in model.keys():
            pred_path = os.path.join(base_path, "audit", data_obj.name, data_obj.exp_name, m_name, fold)
            pred = pd.read_csv(os.path.join(pred_path, 'preds.csv'))
            y_pred = pred['0']
            meta['fold'] = fold
            meta['model'] = m_name

            
            for w_name, w in wts.items():
                print(w_name)
                meta['eval_type'] = w_name
                # perf_dict = Metrics().performance_metrics(_label[fold], y_pred, 0.5, meta)
                # fair_dict = Metrics().fairness_metrics(_label[fold], y_pred, sv_test, threshold =  0.5, meta = perf_dict)
                expected_dict = Metrics().estimated_metrics(eval_labels['ground'][fold], y_pred, sv_test, w,meta = meta)
                print(expected_dict)
                res.append(expected_dict)
    save_path = os.path.join(base_path    
                             , data_obj.name,data_obj.exp_name)
    os.makedirs(save_path, exist_ok=True)
    pd.DataFrame(res).to_csv(os.path.join(save_path, "expected_audit.csv"))

income_bias_0.1
x1
fair


KeyError: 0

In [38]:
# Run Clean up

base_path = "results"
# Run Fair Reduction on every set 
for exp_name, data_obj in datasets.items():
    # if exp_name not in ['synthetic_20_balanced_0.1', 'synthetic_20_balanced_0.3']: continue
    # if not name  in ['income_balanced_0.1', 'income_balanced_0.3']: continue 
    print(exp_name)
    eval_labels =  data_obj.eval_labels()
    meta = {"name": data_obj.name, "noise": data_obj.noise_type, "level": data_obj.noise_level}
    res = []
    pred_dict = defaultdict(dict)
    for fold, _data in data_obj.foldwise_data.items():
        print(fold)
        train_data, test_data = _data['train'], _data['test']
        x_train, x_test = train_data.drop(data_obj.cols_to_drop, axis = 1, errors='ignore'), test_data.drop(data_obj.cols_to_drop, axis = 1, errors='ignore')
        y_train, y_test = train_data[data_obj.label], test_data[data_obj.label]
        y_map = {
            "fair":  data_obj.process_preds(train_data['prob'], 0.5),
            "emp":  data_obj.process_preds(train_data['emp_prob'], 0.5)
        }
        sv_train, sv_test = train_data[data_obj.sensitive_attribute], test_data[data_obj.sensitive_attribute]
        
        for label_type in y_map.keys():
            for m_name, clf in model.items():
                pred_path = os.path.join(base_path, "cleanup", data_obj.name, data_obj.exp_name, m_name, fold, label_type )
                pred = pd.read_csv(os.path.join(pred_path, 'preds.csv'))
                y_pred = pred['0']

                meta['fold'] = fold
                meta['model'] = m_name
                meta['label_type'] = label_type
                for eval_type, _label in eval_labels.items():
                    # if eval_type not in ['ground']: continue
                    meta['eval_type'] = eval_type
                    perf_dict = Metrics().performance_metrics(_label[fold], y_pred, 0.5, meta)
                    fair_dict = Metrics().fairness_metrics(_label[fold], y_pred, sv_test, threshold =  0.5, meta = perf_dict)
                    print(fair_dict)
                    res.append(fair_dict)
            save_path = os.path.join(base_path, data_obj.name,data_obj.exp_name)
            os.makedirs(save_path, exist_ok=True)
            pd.DataFrame(res).to_csv(os.path.join(base_path, data_obj.name,data_obj.exp_name, f"{label_type}_cleanup.csv"))

synthetic_20_balanced_0.1
x1


FileNotFoundError: [Errno 2] No such file or directory: 'results/cleanup/synthetic_20/synthetic_20_balanced_0.1/LR/x1/fair/preds.csv'