In [None]:
#save_results ISNetDANN

In [6]:
import os
import json
from pathlib import Path
import pickle
import pandas as pd

In [7]:
def process_results(attr_name: str, base_dir: Path, results_dir: Path, fairness_classes=(0, 1)):
    dirs = []
    test_results = []
    lr_uncond_results = []
    lr_cond_avg_results = []

    c0, c1 = fairness_classes  # c0 será restado menos c1

    for i in base_dir.glob('**/done_eval'):
        dirs.append(str(i.parent))
        args = json.load((i.parent / 'args.json').open('r'))

        run_dir = base_dir / args['store_name'] / 'final_results_eval.pkl'
        with open(run_dir, "rb") as f:
            results = pickle.load(f)

        # Overall Test
        results_te_overall = results['te']['overall']
        df_results_te_overall = pd.DataFrame.from_dict(results_te_overall, orient='index')

        # Overall Val
        results_va_overall = results['va']['overall']
        df_results_va_overall = pd.DataFrame.from_dict(results_va_overall, orient='index')

        # Test per attribute
        results_te_attr = results['te']['per_attribute']
        df_te_attr = pd.DataFrame(results_te_attr)

        
        # Fairness Gap
        if args['task'] == "No Finding":
            fair_gap = (df_te_attr.loc['FPR_opt'][c0] - df_te_attr.loc['FPR_opt'][c1]) * 100
            fair_metric = 'FPR_opt'
        else:
            fair_gap = (df_te_attr.loc['FNR_opt'][c0] - df_te_attr.loc['FNR_opt'][c1]) * 100
            fair_metric = 'FNR_opt'

        entry = {
            'store_name': args['store_name'],
            'seed': args['seed'],
            'hparams_seed': args['hparams_seed'],
            'task': args['task'],
            'attr':args['attr'],
            'AUROC_te': df_results_te_overall.loc['AUROC'].values[0],
            'balanced_acc_opt_te': df_results_te_overall.loc['balanced_acc_opt'].values[0],
            'FNR_opt_te': df_results_te_overall.loc['FNR_opt'].values[0],
            'FPR_opt_te': df_results_te_overall.loc['FPR_opt'].values[0],
            'TNR_opt_te': df_results_te_overall.loc['TNR_opt'].values[0],
            'TPR_opt_te': df_results_te_overall.loc['TPR_opt'].values[0],
            'TP_opt_te': df_results_te_overall.loc['TP_opt'].values[0],
            'TN_opt_te': df_results_te_overall.loc['TN_opt'].values[0],
            'FP_opt_te': df_results_te_overall.loc['FP_opt'].values[0],
            'FN_opt_te': df_results_te_overall.loc['FN_opt'].values[0],
            'brier_te': df_results_te_overall.loc['brier'].values[0],
            'AUROC_va': df_results_va_overall.loc['AUROC'].values[0],
            'balanced_acc_opt_va': df_results_va_overall.loc['balanced_acc_opt'].values[0],
            'FNR_opt_va': df_results_va_overall.loc['FNR_opt'].values[0],
            'FPR_opt_va': df_results_va_overall.loc['FPR_opt'].values[0],
            'TNR_opt_va': df_results_va_overall.loc['TNR_opt'].values[0],
            'TPR_opt_va': df_results_va_overall.loc['TPR_opt'].values[0],
            'TP_opt_va': df_results_va_overall.loc['TP_opt'].values[0],
            'TN_opt_va': df_results_va_overall.loc['TN_opt'].values[0],
            'FP_opt_va': df_results_va_overall.loc['FP_opt'].values[0],
            'FN_opt_va': df_results_va_overall.loc['FN_opt'].values[0],
            'brier_va': df_results_va_overall.loc['brier'].values[0],
            'fair_gap': fair_gap,
            'fair_metric': fair_metric,
            'fair_gap_desc': f'{fair_metric} class {c0} - class {c1}'
        }

        # Determinar cuántos subgrupos hay según la tarea
        if args['attr'] == "sex":
            attr_range = range(2)
            #print("entró")
        elif args['attr'] in ["ethnicity", "age"]:
            attr_range = range(4)
        elif args['attr'] == "sex_ethnicity":
            attr_range = range(8)
        else:
            attr_range = []
        
        # Agregar los valores por subgrupo demográfico
        for i in attr_range:
            entry.update({
                f'AUROC_attr_{i}': df_te_attr.loc['AUROC'][i],
                f'balanced_acc_attr_{i}': df_te_attr.loc['balanced_acc_opt'][i],
                f'TN_opt_attr_{i}': df_te_attr.loc['TN_opt'][i],
                f'FN_opt_attr_{i}': df_te_attr.loc['FN_opt'][i],
                f'TP_opt_attr_{i}': df_te_attr.loc['TP_opt'][i],
                f'FP_opt_attr_{i}': df_te_attr.loc['FP_opt'][i],
                f'FNR_opt_attr_{i}': df_te_attr.loc['FNR_opt'][i],
                f'FPR_opt_attr_{i}': df_te_attr.loc['FPR_opt'][i],
                f'TPR_opt_attr_{i}': df_te_attr.loc['TPR_opt'][i],
                f'TNR_opt_attr_{i}': df_te_attr.loc['TNR_opt'][i],
                f'prevalence_opt_attr_{i}': df_te_attr.loc['prevalence_opt'][i],
                f'pred_prevalence_opt_attr_{i}': df_te_attr.loc['pred_prevalence_opt'][i],
                f'brier_attr_{i}': df_te_attr.loc['brier'][i],
            })
        
        test_results.append(entry)

        """
        # Guardar resultados de la regresión logística incondicional
        df_results_lr_uncond = pd.DataFrame(results['lr_uncond'])
        
        lr_uncond_results.append({
            'store_name': args['store_name'],
            'seed': args['seed'],
            'hparams_seed': args['hparams_seed'],
            'task': args['task'],
            'attr':args['attr'],
            'accuracy_va': df_results_lr_uncond.loc['accuracy']['va'],
            'accuracy_te': df_results_lr_uncond.loc['accuracy']['te'],
            'balanced_acc_va': df_results_lr_uncond.loc['balanced_acc']['va'],
            'balanced_acc_te': df_results_lr_uncond.loc['balanced_acc']['te'],
            #'FP_te': df_results_lr_uncond.loc['FP']['te'],
            #'FN_te': df_results_lr_uncond.loc['FN']['te'],
            #'TP_te': df_results_lr_uncond.loc['TP']['te'],
            #'TN_te': df_results_lr_uncond.loc['TN']['te'],
            #'FP_va': df_results_lr_uncond.loc['FP']['va'],
            #'FN_va': df_results_lr_uncond.loc['FN']['va'],
            #'TP_va': df_results_lr_uncond.loc['TP']['va'],
            #'TN_va': df_results_lr_uncond.loc['TN']['va'],
            'AUROC_va': df_results_lr_uncond.loc['AUROC']['va'],
            'AUROC_te': df_results_lr_uncond.loc['AUROC']['te'],
            f'class_{c0}_AUROC_va': df_results_lr_uncond.loc[f'class_{c0}_AUROC']['va'],
            f'class_{c0}_AUROC_te': df_results_lr_uncond.loc[f'class_{c0}_AUROC']['te'],
            f'class_{c1}_AUROC_va': df_results_lr_uncond.loc[f'class_{c1}_AUROC']['va'],
            f'class_{c1}_AUROC_te': df_results_lr_uncond.loc[f'class_{c1}_AUROC']['te']
        })
        """

         # Guardar resultados de la regresión logística conditional by class avg - to avoid clinical counfunders
        df_results_lr_cond_avg = pd.DataFrame(results['lr_cond_avg'])

        lr_cond_avg_results.append({
            'store_name': args['store_name'],
            'seed': args['seed'],
            'hparams_seed': args['hparams_seed'],
            'task': args['task'],
            'attr':args['attr'],
            'accuracy_va': df_results_lr_cond_avg.loc['accuracy']['va'],
            'accuracy_te': df_results_lr_cond_avg.loc['accuracy']['te'],
            'balanced_acc_va': df_results_lr_cond_avg.loc['balanced_acc']['va'],
            'balanced_acc_te': df_results_lr_cond_avg.loc['balanced_acc']['te'],
            'AUROC_va': df_results_lr_cond_avg.loc['AUROC']['va'],
            'AUROC_te': df_results_lr_cond_avg.loc['AUROC']['te'],
            f'class_{c0}_AUROC_va': df_results_lr_cond_avg.loc[f'class_{c0}_AUROC']['va'],
            f'class_{c0}_AUROC_te': df_results_lr_cond_avg.loc[f'class_{c0}_AUROC']['te'],
            f'class_{c1}_AUROC_va': df_results_lr_cond_avg.loc[f'class_{c1}_AUROC']['va'],
            f'class_{c1}_AUROC_te': df_results_lr_cond_avg.loc[f'class_{c1}_AUROC']['te']
        })

    # Crear y guardar DataFrames
    df_test = (pd.DataFrame(test_results)).sort_values(by='task')
    #df_lr_uncond = (pd.DataFrame(lr_uncond_results)).sort_values(by='task')
    df_lr_cond_avg = (pd.DataFrame(lr_cond_avg_results)).sort_values(by='task')
     
    #CSV
    results_dir.mkdir(parents=True, exist_ok=True)
    df_test.to_csv(results_dir / f'test_results_all_{attr_name}.csv', index=False)
    #df_lr_uncond.to_csv(results_dir / f'lr_uncond_results_all_{attr_name}.csv', index=False)
    df_lr_cond_avg.to_csv(results_dir / f'lr_cond_avg_results_all_{attr_name}.csv', index=False)
    
    print(f"CSV save in: {results_dir}")
    print(f'Total runs for {attr_name}:', len(dirs))
    
    #return  df_test, df_lr_uncond, df_lr_cond_avg
    return  df_test, df_lr_cond_avg


In [None]:
#========================SEX===================================

In [10]:
#0:female, 1:male
df_test_results_sex, df_lr_results_sex=process_results(
    attr_name='sex',
    base_dir=Path('/home/lchanch/model_training/ISNetDANN/train/grid_sex_ISNetDANN/'), 
    results_dir=Path('/home/lchanch/models/ISNetDANN/eval/'),
    fairness_classes=(0, 1)
)

CSV save in: /home/lchanch/models/ISNetDANN/eval
Total runs for sex: 32


In [9]:
#independet runs
#0:female, 1:male
df_test_results_sex, df_lr_results_sex=process_results(
    attr_name='sex',
    base_dir=Path('/home/lchanch/model_training/ISNetDANN/train/grid_sex_ISNetDANN_hp8_final/'), 
    results_dir=Path('/home/lchanch/models/ISNetDANN/eval/independet_runs/'),
    fairness_classes=(0, 1)
)

CSV save in: /home/lchanch/models/ISNetDANN/eval/independet_runs
Total runs for sex: 10


In [None]:
#========================SEX===================================

In [None]:
#========================AGE===================================