In [1]:
import numpy as np
import pandas as pd
import os
import glob
import torch
import torch.nn.functional as F
import joblib
import itertools
import scipy
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
import warnings
import string
from sklearn.metrics import roc_auc_score, average_precision_score, brier_score_loss, recall_score, precision_score
from prediction_utils.util import df_dict_concat, yaml_read
from matplotlib.ticker import FormatStrFormatter

In [2]:
project_dir = '/share/pi/nigam/projects/spfohl/cohorts/admissions/mimic_omop/'
experiment_name_baseline = 'baseline_tuning_fold_1_10'
experiment_name_fair = 'fair_tuning_fold_1_10'
tasks = ['los_icu_3days', 'los_icu_7days', 'mortality_hospital', 'mortality_icu']
cohort_path = os.path.join(project_dir, 'cohort', 'cohort.parquet')
row_id_map_path = os.path.join(
    project_dir, 'merged_features_binary/features_sparse/features_row_id_map.parquet'
)
result_path = os.path.join(project_dir, 'experiments', 'merged_results_fold_1_10')
os.makedirs(result_path, exist_ok=True)

In [3]:
attributes = ['gender_concept_name', 'age_group', 'race_eth']

In [4]:
cohort = pd.read_parquet(cohort_path)
row_id_map = pd.read_parquet(row_id_map_path)
cohort = cohort.merge(row_id_map)

### Generate the cohort table

In [5]:
### Cohort table
cohort_df_long = (
    cohort
    .melt(
        id_vars = ['person_id'] + attributes,
        value_vars = tasks,
        var_name = 'task',
        value_name = 'labels'
    )
    .melt(
        id_vars = ['person_id', 'task', 'labels'],
        value_vars = attributes,
        var_name = 'attribute',
        value_name = 'group'
    )
)

In [6]:
cohort_statistics_df = (
    cohort_df_long
    .groupby(['task', 'attribute', 'group'])
    .agg(
        prevalence=('labels', 'mean'),
    )
    .reset_index()
    .groupby('attribute')
    .apply(lambda x: x.pivot_table(index = 'group', columns = 'task', values = 'prevalence'))
    .reset_index()
)

group_size_df = (
    cohort_df_long
    .groupby(['task', 'attribute', 'group'])
    .agg(
        size = ('labels', lambda x: x.shape[0])
    )
    .reset_index()
    .drop(columns = 'task')
    .drop_duplicates()
)

cohort_statistics_df = cohort_statistics_df.merge(group_size_df)
cohort_statistics_df = (
    cohort_statistics_df
    .set_index(['attribute', 'group'])
    [['size'] + tasks]
)

In [7]:
cohort_statistics_df

Unnamed: 0_level_0,Unnamed: 1_level_0,size,los_icu_3days,los_icu_7days,mortality_hospital,mortality_icu
attribute,group,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
age_group,[15-30),1345,0.274349,0.049071,0.038662,0.023792
age_group,[30-45),2621,0.27356,0.049981,0.054178,0.033193
age_group,[45-55),3865,0.296507,0.050453,0.074256,0.042173
age_group,[55-65),5358,0.307577,0.052445,0.076894,0.045539
age_group,[65-75),5620,0.327758,0.057117,0.096085,0.055694
age_group,[75-91),7361,0.356473,0.05828,0.14047,0.079337
gender_concept_name,FEMALE,11108,0.325981,0.056806,0.101548,0.059327
gender_concept_name,MALE,15062,0.313703,0.052583,0.088899,0.050724
race_eth,Other,7639,0.325173,0.057861,0.105773,0.062443
race_eth,White,18531,0.316335,0.052938,0.089526,0.05105


In [8]:
## Write to Latex
table_path = './../figures/mimic_omop/icu_admission_cohort/'
os.makedirs(table_path, exist_ok=True)
with open(os.path.join(table_path, 'cohort_table.txt'), 'w') as fp:
    (
        cohort_statistics_df
        .reset_index().drop(columns='attribute').set_index(['group'])
        .to_latex(
            fp, 
            float_format = '%.3g', 
            index_names = False, 
            index=True
        )
    )

### Get the results

In [9]:
def get_result_df_baseline(base_path, filename='result_df_group_standard_eval.parquet'):
    """
    Gets the results for training the baseline models
    """
    selected_models_path = os.path.join(
        base_path, 
        'config',
        'selected_models', '**', '*.yaml'
    )
    selected_models_dict = {
        filename.split('/')[-2]: filename.split('/')[-1]
        for filename in glob.glob(selected_models_path, recursive=True)
    }
    paths = [
        glob.glob(
            os.path.join(
                base_path,
                'performance',
                task, 
                config_filename, 
                '**', 
                filename
            ),
            recursive=True
        )
        for task, config_filename in selected_models_dict.items()
    ]
    paths = list(itertools.chain(*paths))
    result_df_baseline = df_dict_concat(
        {
            tuple(filename.split('/'))[-4:-1]:
            pd.read_parquet(filename)
            for filename in paths
        },
        ['task2', 'config_filename', 'fold_id']
    ).drop(columns='task2')
    return result_df_baseline

In [10]:
result_df_baseline = get_result_df_baseline(
    os.path.join(
        project_dir,
        'experiments',
        experiment_name_baseline,  
    )
)

In [11]:
result_df_baseline.task.unique()

array(['los_icu_3days', 'mortality_hospital', 'los_icu_7days',
       'mortality_icu'], dtype=object)

In [12]:
result_df_baseline.task.unique()

array(['los_icu_3days', 'mortality_hospital', 'los_icu_7days',
       'mortality_icu'], dtype=object)

In [13]:
result_df_calibration_baseline = get_result_df_baseline(
    os.path.join(
        project_dir,
        'experiments',
        experiment_name_baseline,  
    ),
    filename='calibration_result.parquet'
)
id_vars = ['fold_id', 'phase', 'config_filename', 'task', 'attribute', 'group']
result_df_calibration_baseline = result_df_calibration_baseline.melt(
    id_vars = id_vars,
    value_vars = set(result_df_calibration_baseline.columns) - set(id_vars),
    var_name = 'metric',
    value_name = 'performance'
).query('metric != "brier"')

In [14]:
result_df_calibration_baseline.metric.unique()

array(['calib_group_error', 'brier_signed', 'calib_group_error_signed',
       'calib_error_signed', 'calib_error'], dtype=object)

In [15]:
# Import fair_ova metrics
result_df_ova_baseline = get_result_df_baseline(
    os.path.join(
        project_dir,
        'experiments',
        experiment_name_baseline,  
    ),
    filename='result_df_group_fair_ova.parquet'
)
# id_vars = ['fold_id', 'phase', 'config_filename', 'task', 'attribute', 'group']
# result_df_ova_baseline = result_df_ova_baseline.melt(
#     id_vars = id_vars,
#     value_vars = set(result_df_ova_baseline.columns) - set(id_vars),
#     var_name = 'metric',
#     value_name = 'performance'
# )


In [16]:
result_df_baseline = pd.concat([result_df_baseline, result_df_calibration_baseline, result_df_ova_baseline], ignore_index=True)

In [17]:
result_df_baseline

Unnamed: 0,config_filename,fold_id,metric,phase,task,attribute,group,performance,performance_overall
0,42.yaml,7,auc,test,los_icu_3days,age_group,[15-30),0.763242,0.723223
1,42.yaml,7,auc,test,los_icu_3days,age_group,[30-45),0.815010,0.723223
2,42.yaml,7,auc,test,los_icu_3days,age_group,[45-55),0.700487,0.723223
3,42.yaml,7,auc,test,los_icu_3days,age_group,[55-65),0.767783,0.723223
4,42.yaml,7,auc,test,los_icu_3days,age_group,[65-75),0.711882,0.723223
...,...,...,...,...,...,...,...,...,...
15173,42.yaml,9,xauc_0,val,mortality_icu,age_group,[15-30),0.955650,
15174,42.yaml,9,xauc_0,val,mortality_icu,gender_concept_name,MALE,0.927233,
15175,42.yaml,9,xauc_0,val,mortality_icu,gender_concept_name,FEMALE,0.923364,
15176,42.yaml,9,xauc_0,val,mortality_icu,race_eth,White,0.921987,


In [18]:
def flatten_multicolumns(df):
    """
    Converts multi-index columns into single colum
    """
    df.columns = ['_'.join([el for el in col if el != '']).strip() for col in df.columns.values if len(col) > 1]
    return df

In [19]:
# result_df_baseline.performance.isna()

In [22]:
result_df_baseline_mean = (
    result_df_baseline
    .groupby(list(set(result_df_baseline.columns) - set(['fold_id', 'performance', 'performance_overall'])))
    [['performance', 'performance_overall']]
#     [['performance']]
    .agg(['mean', 'std', 'sem'])
#     .agg('max')
    .reset_index()
)
result_df_baseline_mean = result_df_baseline_mean.rename(
    columns={
        'performance': 'performance_baseline',
        'performance_overall': 'performance_overall_baseline'
    }
)
result_df_baseline_mean = flatten_multicolumns(result_df_baseline_mean)

In [23]:
result_df_baseline_mean

Unnamed: 0,attribute,metric,task,config_filename,phase,group,performance_baseline_mean,performance_baseline_std,performance_baseline_sem,performance_overall_baseline_mean,performance_overall_baseline_std,performance_overall_baseline_sem
0,age_group,auc,los_icu_3days,42.yaml,test,[15-30),0.763269,0.007879,0.002492,0.72107,0.001976,0.000625
1,age_group,auc,los_icu_3days,42.yaml,test,[30-45),0.805604,0.005258,0.001663,0.72107,0.001976,0.000625
2,age_group,auc,los_icu_3days,42.yaml,test,[45-55),0.704552,0.004427,0.001400,0.72107,0.001976,0.000625
3,age_group,auc,los_icu_3days,42.yaml,test,[55-65),0.771511,0.004677,0.001479,0.72107,0.001976,0.000625
4,age_group,auc,los_icu_3days,42.yaml,test,[65-75),0.707298,0.006437,0.002035,0.72107,0.001976,0.000625
...,...,...,...,...,...,...,...,...,...,...,...,...
1515,race_eth,xauc_ova_1,mortality_hospital,42.yaml,val,White,0.921265,0.008574,0.002711,,,
1516,race_eth,xauc_ova_1,mortality_icu,42.yaml,test,Other,0.935799,0.002706,0.000856,,,
1517,race_eth,xauc_ova_1,mortality_icu,42.yaml,test,White,0.938338,0.001101,0.000348,,,
1518,race_eth,xauc_ova_1,mortality_icu,42.yaml,val,Other,0.932707,0.016092,0.005089,,,


In [25]:
def get_result_df_fair(base_path=None, filename='result_df_group_standard_eval.parquet', paths=None):
    if paths is None:
        performance_path = os.path.join(
            base_path,
            'performance',
        )
        paths = glob.glob(os.path.join(performance_path, '**', filename), recursive=True)
    result_df_fair = df_dict_concat(
        {
            tuple(file_name.split('/'))[-5:-1]:
            pd.read_parquet(file_name)
            for file_name in paths
        },
        ['task2', 'sensitive_attribute', 'config_filename', 'fold_id']
    ).drop(columns='task2')
    return result_df_fair

In [26]:
# Fair results
result_df_fair = get_result_df_fair(
    os.path.join(
        project_dir,
        'experiments',
        experiment_name_fair
    )
)

In [None]:
# # List config_filenames without ten results
# (
#     result_df_fair
#     .groupby(
#         list(set(result_df_fair.columns) - set(['fold_id', 'performance', 'performance_overall']))
#     )
#     .agg(lambda x: len(x))
#     .query("fold_id != 10")
#     .reset_index()
#     .config_filename
#     .sort_values()
#     .unique()
# )

In [27]:
result_df_calibration_fair = get_result_df_fair(
    os.path.join(
        project_dir,
        'experiments',
        experiment_name_fair
    ),
    filename='calibration_result.parquet'
)

id_vars = ['fold_id', 'phase', 'config_filename', 'task', 'sensitive_attribute', 'attribute', 'group']
result_df_calibration_fair = result_df_calibration_fair.melt(
    id_vars = id_vars,
    value_vars = set(result_df_calibration_fair.columns) - set(id_vars),
    var_name = 'metric',
    value_name = 'performance'
).query('metric != "brier"')

In [28]:
result_df_ova_fair = get_result_df_fair(
    os.path.join(
        project_dir,
        'experiments',
        experiment_name_fair
    ),
    filename='result_df_group_fair_ova.parquet'
)

# id_vars = ['fold_id', 'phase', 'config_filename', 'task', 'sensitive_attribute', 'attribute', 'group']
# result_df_ova_fair = result_df_ova_fair.melt(
#     id_vars = id_vars,
#     value_vars = set(result_df_ova_fair.columns) - set(id_vars),
#     var_name = 'metric',
#     value_name = 'performance'
# )

In [29]:
result_df_ova_fair

Unnamed: 0,sensitive_attribute,config_filename,fold_id,phase,task,attribute,group,metric,performance
0,gender_concept_name,8.yaml,7,test,los_icu_3days,age_group,[45-55),emd_ova,0.015198
1,gender_concept_name,8.yaml,7,test,los_icu_3days,age_group,[30-45),emd_ova,0.040534
2,gender_concept_name,8.yaml,7,test,los_icu_3days,age_group,[75-91),emd_ova,0.033915
3,gender_concept_name,8.yaml,7,test,los_icu_3days,age_group,[65-75),emd_ova,0.009370
4,gender_concept_name,8.yaml,7,test,los_icu_3days,age_group,[55-65),emd_ova,0.008732
...,...,...,...,...,...,...,...,...,...
1438915,race_eth,49.yaml,9,val,mortality_icu,age_group,[15-30),xauc_ova_0,0.885464
1438916,race_eth,49.yaml,9,val,mortality_icu,gender_concept_name,MALE,xauc_ova_0,0.874771
1438917,race_eth,49.yaml,9,val,mortality_icu,gender_concept_name,FEMALE,xauc_ova_0,0.858424
1438918,race_eth,49.yaml,9,val,mortality_icu,race_eth,White,xauc_ova_0,0.869127


In [30]:
result_df_fair = pd.concat([result_df_fair, result_df_calibration_fair, result_df_ova_fair], ignore_index=True)

In [31]:
result_df_fair_mean = (
    result_df_fair
    .groupby(list(set(result_df_fair.columns) - set(['fold_id', 'performance', 'performance_overall'])))
    [['performance', 'performance_overall']]
    .agg(['mean', 'std', 'sem'])
    .reset_index()
)
result_df_fair_mean = flatten_multicolumns(result_df_fair_mean)

In [32]:
ci_func = lambda x: x * 1.96
result_df_fair_mean = result_df_fair_mean.assign(
    performance_CI = lambda x: ci_func(x['performance_sem']),
    performance_overall_CI = lambda x: ci_func(x['performance_overall_sem']),
)

In [33]:
def label_fair_mode(df):
    df['fair_mode'] = (
    df['regularization_metric']
    .where(~df['regularization_metric'].str.match('mmd'), 
           df['regularization_metric'].astype(str) + '_' + df['mmd_mode'].astype(str), 
           axis=0)
    )
    df['fair_mode'] = (
        df['fair_mode']
        .where(~df['fair_mode'].str.match('mean_prediction'), 
               df['fair_mode'].astype(str) + '_' + df['mean_prediction_mode'].astype(str), 
               axis=0
              )
    )
    return df

In [34]:
def get_fair_config_df(base_path):
    config_path = os.path.join(
        base_path,
        'config',
    )
    fair_config_files = glob.glob(
        os.path.join(config_path, '**', '*.yaml'),
        recursive=True
    )
    fair_config_dict_dict = {
        tuple(file_name.split('/'))[-2:]:
        yaml_read(file_name)
        for file_name in fair_config_files
    }

    fair_config_df = df_dict_concat(
        {
            key: pd.DataFrame(value, index=[key])
            for key, value in fair_config_dict_dict.items()
        },
        ['task', 'config_filename']
    )
    fair_config_df = label_fair_mode(fair_config_df)[['task', 'config_filename', 'fair_mode', 'lambda_group_regularization']]
    return fair_config_df

In [35]:
fair_config_df = get_fair_config_df(
    os.path.join(
        project_dir,
        'experiments',
        experiment_name_fair
    )
)

In [36]:
fair_config_df

Unnamed: 0,task,config_filename,fair_mode,lambda_group_regularization
0,los_icu_3days,8.yaml,mean_prediction_conditional_pos,0.002783
1,los_icu_3days,37.yaml,mean_prediction_unconditional,0.464159
2,los_icu_3days,36.yaml,mean_prediction_conditional,0.464159
3,los_icu_3days,0.yaml,mean_prediction_conditional,0.001000
4,los_icu_3days,35.yaml,mmd_conditional_pos,0.166810
...,...,...,...,...
235,mortality_icu,40.yaml,mmd_unconditional,0.464159
236,mortality_icu,57.yaml,mmd_conditional,10.000000
237,mortality_icu,52.yaml,mmd_unconditional,3.593814
238,mortality_icu,5.yaml,mmd_conditional_pos,0.001000


In [37]:
result_df_fair_mean.task.unique()

array(['los_icu_3days', 'los_icu_7days', 'mortality_hospital',
       'mortality_icu'], dtype=object)

In [38]:
result_df = pd.merge(result_df_baseline_mean.drop(columns='config_filename'), result_df_fair_mean,
                    how='outer', indicator=True).merge(fair_config_df)
assert result_df_fair_mean.shape[0] == result_df.shape[0]
result_df.head()

Unnamed: 0,attribute,metric,task,phase,group,performance_baseline_mean,performance_baseline_std,performance_baseline_sem,performance_overall_baseline_mean,performance_overall_baseline_std,...,performance_std,performance_sem,performance_overall_mean,performance_overall_std,performance_overall_sem,performance_CI,performance_overall_CI,_merge,fair_mode,lambda_group_regularization
0,age_group,auc,los_icu_3days,test,[15-30),0.763269,0.007879,0.002492,0.72107,0.001976,...,0.00593,0.001875,0.720133,0.002311,0.000731,0.003675,0.001432,both,mean_prediction_conditional,0.001
1,age_group,auc,los_icu_3days,test,[15-30),0.763269,0.007879,0.002492,0.72107,0.001976,...,0.004465,0.001412,0.720593,0.002171,0.000686,0.002768,0.001345,both,mean_prediction_conditional,0.001
2,age_group,auc,los_icu_3days,test,[15-30),0.763269,0.007879,0.002492,0.72107,0.001976,...,0.008329,0.002634,0.720754,0.001851,0.000585,0.005162,0.001147,both,mean_prediction_conditional,0.001
3,age_group,auc,los_icu_3days,test,[30-45),0.805604,0.005258,0.001663,0.72107,0.001976,...,0.006672,0.00211,0.720133,0.002311,0.000731,0.004136,0.001432,both,mean_prediction_conditional,0.001
4,age_group,auc,los_icu_3days,test,[30-45),0.805604,0.005258,0.001663,0.72107,0.001976,...,0.003029,0.000958,0.720593,0.002171,0.000686,0.001877,0.001345,both,mean_prediction_conditional,0.001


In [39]:
result_df.query('_merge == "right_only"')

Unnamed: 0,attribute,metric,task,phase,group,performance_baseline_mean,performance_baseline_std,performance_baseline_sem,performance_overall_baseline_mean,performance_overall_baseline_std,...,performance_std,performance_sem,performance_overall_mean,performance_overall_std,performance_overall_sem,performance_CI,performance_overall_CI,_merge,fair_mode,lambda_group_regularization


In [40]:
result_df.metric.unique()

array(['auc', 'auprc', 'brier', 'brier_signed', 'calib_error',
       'calib_error_signed', 'calib_group_error',
       'calib_group_error_signed', 'emd_ova', 'emd_ova_0', 'emd_ova_1',
       'loss_bce', 'mean_prediction', 'mean_prediction_0',
       'mean_prediction_1', 'xauc_0', 'xauc_1', 'xauc_ova_0',
       'xauc_ova_1'], dtype=object)

In [41]:
result_df = result_df.query('phase == "test"')

In [42]:
result_df = result_df.drop(columns = '_merge')

In [43]:
result_df.to_csv(os.path.join(result_path, 'group_results.csv'), index=False)