In [11]:
from bin.dataset import Dataset
from collections import defaultdict
from bin.metrics import Metrics


import json 
import numpy as np
import pandas as pd 
import os

In [3]:
model = {
    "LR": None, 
    "Reduction": None,
    "Reweight": None
}

In [4]:
def read_config(path):
    """Reads the config file and returns a dictionary."""
    try:
        with open(path) as f:
            config = json.load(f)
    except FileNotFoundError:
        print("Config file not found.")
        config = None
    return config

def load_csv(path):
    """Loads the csv file and returns a dataframe."""
    try:
        df = pd.read_csv(path)
    except FileNotFoundError:
        print("CSV file not found.")
        df = None
    return df

In [7]:
# exp_conf = "configs/adult_noisy.json"
# exp_conf = "configs/compas_noisy.json"
exp_conf = "configs/synthetic_20_noisy.json"
# exp_conf = "configs/income_noisy.json"
# exp_conf = "configs/baseline_config.json"

EXP = read_config(exp_conf)

In [10]:
datasets = {}
try:
    for name, value in EXP.items():
        # if name in ['COMPAS_bias_0.1', 'COMPAS_balanced_0.1']: continue
        # if name in ['income_balanced_0.1', 'income_balanced_0.3']: continue
        # if not name in ['synthetic_20_balanced_0.1']: continue
        datasets[name] = Dataset(value)
        datasets[name].calculate_probabilities("fair")
        datasets[name].calculate_probabilities("emp")
        

except Exception as e:
    print(e)

In [None]:
noises = ['bias', 'balanced', 'flip']
levels = ['0.1','0.3']

In [None]:
def create_audit_table(noises, levels, base_path, name):
    cols = ['metric', 'LR', 'reduction', 'reweight']
    df = []
    for noise in noises:
        for level in ['0.1','0.3']:
            audit_df = pd.read_csv(os.path.join(base_path, f"{name}_{noise}_{level}", "audit.csv"))
            meta = [noise, level]
            for metric in ['accuracy', 'Equal_Opportunity','equalized_odds']:
                for eval_type in [ 'noisy','fair_clean', 'emp_clean']:
                    temp = [metric, eval_type]
                    for m in ['LR', 'Reduction', 'Reweight']:
                        temp_df = audit_df[(audit_df['model'] == m)]
                        # temp.append((temp_df[temp_df['eval_type'] == eval_type][metric].reset_index() - temp_df[temp_df['eval_type'] == 'ground'][metric].reset_index())[metric].mean())
                        temp.append(
                            f"{(temp_df[temp_df['eval_type'] == eval_type][metric].reset_index() - temp_df[temp_df['eval_type'] == 'ground'][metric].reset_index())[metric].mean():.3f} + {(temp_df[temp_df['eval_type'] == eval_type][metric].reset_index() - temp_df[temp_df['eval_type'] == 'ground'][metric].reset_index())[metric].std():.3f}"
                            )
                    df.append( meta + temp)
            # print(audit_df)
            