In [1]:
import mlflow
import pandas as pd
import matplotlib.pyplot as plt
import os

In [2]:
algorithms = ['PL', 'STC', 'CC', 'HLNC', 'OBNC', 'BE']

In [3]:
experiments = ['adult_sex_Male', 'adult_race_White', 'german_sex_Male', 'compas_sex', 'compas_race_Caucasian', 'ricci_Race_W', 'diabetes_race_Caucasian', 'titanic_sex']

In [4]:
runs = {}
for exp in experiments:
    for alg in algorithms:
        run = mlflow.search_runs(experiment_names=[f'{exp}_{alg}'], order_by=['start_time DESC'])[:4]
        if len(run) > 0:
            runs[f'{exp}_{alg}'] = run

In [5]:
metrics = [
    'accuracy',
    'roc_auc',
    'equal_opportunity_difference', 
    'predictive_equality_difference',
    'demographic_parity_difference',
    'equalized_odds_difference']

In [6]:
def get_results(test_set, experiments):
    results = {}

    for alg in algorithms:
        results_df = pd.DataFrame(columns=metrics)

        for exp in experiments:
            if f'{exp}_{alg}' not in runs:
                print(f'{exp}_{alg} must be repeated')
                continue

            run = runs[f'{exp}_{alg}']
            noisy = run.loc[(run['tags.train_set'] == 'noisy') & (run['tags.test_set'] == test_set)]
            corrected = run.loc[(run['tags.train_set'] == 'corrected') & (run['tags.test_set'] == test_set)]

            row = []
            for metric in metrics:
                if noisy[f'metrics.{metric}'].values[0] == 0:
                    row.append(0)
                else:
                    if metric == 'accuracy' or metric == 'roc_auc':
                        row.append((corrected[f'metrics.{metric}'].values[0] - noisy[f'metrics.{metric}'].values[0])/noisy[f'metrics.{metric}'].values[0])
                    else:
                        row.append((noisy[f'metrics.{metric}'].values[0] - corrected[f'metrics.{metric}'].values[0])/noisy[f'metrics.{metric}'].values[0])
            results_df.loc[exp] = row
            
        results[alg] = results_df

    return results

In [7]:
def store_results_by_alg(results, path):
    if not os.path.exists(f'{path}/correction alg results'):
        os.mkdir(f'{path}/correction alg results')
        
    for alg in algorithms:
        results[alg].to_csv(f'{path}/correction alg results/{alg}.csv')

In [8]:
def store_aggregated_results(results, path):
    if not os.path.exists(f'{path}/aggregated results'):
        os.mkdir(f'{path}/aggregated results')
        
    for metric in metrics:
        agg_results = pd.DataFrame(columns=['mean', 'std', 'min', 'max'])
        for alg in algorithms:
            agg_results.loc[alg] = results[alg][metric].describe()[['mean', 'std', 'min', 'max']].values
        agg_results.to_csv(f'{path}/aggregated results/{metric}.csv')

In [9]:
def store_results(results, path):
    if not os.path.exists(path):
        os.mkdir(path)

    for op in ['mean', 'std', 'min', 'max']:
        df = pd.DataFrame(columns=metrics)
        for alg in algorithms:
            df.loc[alg] = [results[alg][metric].describe()[op] for metric in metrics]
        df.to_csv(f'{path}/{op}_results.csv')
    
    store_results_by_alg(results, path)
    store_aggregated_results(results, path)

In [10]:
if not os.path.exists('fairness_results'):
    os.mkdir('fairness_results')

for test_set in ['noisy', 'corrected']:
    path = f'fairness_results/{test_set} test set'
    if not os.path.exists(path):
        os.mkdir(path)

    results_fair = get_results(test_set, experiments)
    store_results(results_fair, path)