In [1]:
import pandas as pd
import os

In [4]:
base_path = "/om/user/shobhita/data/6.864/"
pf_race_nodro_path = base_path + "task_phenotype_first_outputs_dro_False_group_ethnicity_to_use_freezebert_True/"
pf_race_dro_path = base_path + "task_phenotype_first_outputs_dro_True_group_ethnicity_to_use_freezebert_True/"
pf_gender_nodro_path = base_path + "task_phenotype_first_outputs_dro_False_group_gender_freezebert_True/"
pf_gender_dro_path = base_path + "task_phenotype_first_outputs_dro_True_group_gender_freezebert_True/"

In [8]:
os.listdir(pf_race_nodro_path)

['phenotype_first_baseline_clinical_BERT_1_epoch_512_Cardiac dysrhythmias',
 'phenotype_first_baseline_clinical_BERT_1_epoch_512_Diabetes mellitus without complication',
 'phenotype_first_baseline_clinical_BERT_1_epoch_512_Other liver diseases',
 'phenotype_first_baseline_clinical_BERT_1_epoch_512_Complications of surgical procedures or medical care',
 'phenotype_first_baseline_clinical_BERT_1_epoch_512_Essential hypertension',
 'phenotype_first_baseline_clinical_BERT_1_epoch_512_any_acute',
 'phenotype_first_baseline_clinical_BERT_1_epoch_512_Chronic obstructive pulmonary disease and bronchiectasis',
 'phenotype_first_baseline_clinical_BERT_1_epoch_512_Other lower respiratory disease',
 'phenotype_first_baseline_clinical_BERT_1_epoch_512_Disorders of lipid metabolism',
 'phenotype_first_baseline_clinical_BERT_1_epoch_512_Septicemia (except in labor)',
 'phenotype_first_baseline_clinical_BERT_1_epoch_512_Coronary atherosclerosis and other heart disease',
 'phenotype_first_baseline_clin

In [9]:
def to_dict(results, reverse=True):
    results_df_dict = []
    for col, col_results in results.items():
        col_results["Task"] = col
        col_results["Improved?"] = abs(col_results["DRO"]) < abs(col_results["NO DRO"]) if not reverse else abs(col_results["NO DRO"]) < abs(col_results["DRO"]) 
        results_df_dict.append(col_results)
    results = pd.DataFrame(results_df_dict)
    return results

def get_gender_results(as_df=False):
    results = {}

    for using_dro, path in zip(["DRO", "NO DRO"], [pf_gender_dro_path, pf_gender_nodro_path]):
        for col in os.listdir(path):
            full_path = path + col + "/group_metrics.pkl"
            result = pd.read_pickle(full_path)
            name = col.split("_")[-1]
            accuracy_result = result['group metrics'][0]['acc'] - result['group metrics'][1]['acc']
            if name in results:
                results[name][using_dro] = accuracy_result
            else:
                results[name] = {}
                results[name][using_dro] = accuracy_result
    if as_df:
        results = to_dict(results)
    return results

In [29]:
from statistics import mean

def get_race_results(option="all", as_df=False):
    results = {}

    for using_dro, path in zip(["DRO", "NO DRO"], [pf_race_dro_path, pf_race_nodro_path]):
        for col in os.listdir(path):
            full_path = path + col + "/group_metrics.pkl"
            result = pd.read_pickle(full_path)
            name = col.split("_")[-1]
            if option == "white_v_nonwhite":
                accuracy_result = result['group metrics'][0]['acc'] - mean([result['group metrics'][x]['acc'] for x in range(1, 4)])
            elif option == "white_v_black":
                accuracy_result = result['group metrics'][0]['acc'] - result['group metrics'][1]['acc']
            elif option == ""
            else:
                accuracy_result = {k: v['acc'] for k, v in result['group metrics'].items()}
            
            if name in results:
                results[name][using_dro] = accuracy_result
            else:
                results[name] = {}
                results[name][using_dro] = accuracy_result
    if as_df: 
        if option == "white_v_nonwhite" or option == "white_v_black":
            return to_dict(results)
            
        elif option == "worst_group":
            results = {col: {"DRO": min(list(col_res["DRO"].values())), "NO DRO": min(list(col_res["NO DRO"].values()))} for col, col_res in results.items()}
            return to_dict(results, reverse=True)
    return results

In [11]:
gender_results = get_gender_results(as_df=True)

In [12]:
gender_results

Unnamed: 0,DRO,NO DRO,Task,Improved?
0,-0.041388,-0.021768,Cardiac dysrhythmias,True
1,0.007284,0.008937,Diabetes mellitus without complication,False
2,-0.009468,-0.009714,Other liver diseases,False
3,0.005836,0.006662,Complications of surgical procedures or medica...,False
4,0.015202,-0.005365,Essential hypertension,True
5,-0.029859,-0.026585,acute,True
6,0.001045,-0.002444,Chronic obstructive pulmonary disease and bron...,False
7,0.010325,0.009499,Other lower respiratory disease,True
8,-0.003466,-0.000343,Disorders of lipid metabolism,True
9,0.013923,0.00716,Septicemia (except in labor),True


In [13]:
percent_improved = len(gender_results[gender_results["Improved?"] == True])/len(gender_results)
mean_bias = gender_results["NO DRO"].abs().mean()
std_bias = gender_results["NO DRO"].abs().std()

In [14]:
print("percent improved ", percent_improved)
print("mean bias (in either direction) ", mean_bias)
print("std bias ", std_bias)

percent improved  0.6071428571428571
mean bias (in either direction)  0.014388496226150602
std bias  0.01161428140118907


In [30]:
race_results = get_race_results(option="white_v_nonwhite", as_df=True)

In [31]:
race_results

Unnamed: 0,DRO,NO DRO,Task,Improved?
0,-0.042824,-0.046423,Cardiac dysrhythmias,False
1,0.039155,0.04514,Diabetes mellitus without complication,False
2,0.00165,0.00077,Other liver diseases,True
3,-0.036728,-0.036218,Complications of surgical procedures or medica...,True
4,0.027876,0.002248,Essential hypertension,True
5,-0.005488,0.012952,acute,False
6,-0.057835,-0.05497,Chronic obstructive pulmonary disease and bron...,True
7,-0.011631,-0.007121,Other lower respiratory disease,True
8,-0.003418,0.013766,Disorders of lipid metabolism,False
9,0.02111,0.035568,Septicemia (except in labor),False


In [33]:
percent_improved = len(race_results[race_results["Improved?"] == True])/len(race_results)
mean_bias = race_results["NO DRO"].abs().mean()
std_bias = race_results["NO DRO"].abs().std()

In [34]:
print("percent improved ", percent_improved)
print("mean bias (in either direction) ", mean_bias)
print("std bias ", std_bias)

percent improved  0.5
mean bias (in either direction)  0.023954165319191274
std bias  0.01818793115067145


In [35]:
race_results = get_race_results(option="white_v_black", as_df=True)

In [37]:
race_results

Unnamed: 0,DRO,NO DRO,Task,Improved?
0,-0.02689,-0.029071,Cardiac dysrhythmias,False
1,0.020384,0.019986,Diabetes mellitus without complication,True
2,0.025717,0.016326,Other liver diseases,True
3,-0.016445,-0.018771,Complications of surgical procedures or medica...,False
4,0.049531,-0.017421,Essential hypertension,True
5,-0.040996,-0.02823,acute,True
6,-0.035518,-0.029816,Chronic obstructive pulmonary disease and bron...,True
7,-0.015934,-0.01497,Other lower respiratory disease,True
8,-0.027058,-0.016305,Disorders of lipid metabolism,True
9,0.037327,0.058688,Septicemia (except in labor),False


In [38]:
percent_improved = len(race_results[race_results["Improved?"] == True])/len(race_results)
mean_bias = race_results["NO DRO"].abs().mean()
std_bias = race_results["NO DRO"].abs().std()
print("percent improved ", percent_improved)
print("mean bias (in either direction) ", mean_bias)
print("std bias ", std_bias)

percent improved  0.6428571428571429
mean bias (in either direction)  0.03481723348565098
std bias  0.03887844955910475


In [25]:
race_results = get_race_results(option="worst_group", as_df=True)

In [26]:
percent_improved = len(race_results[race_results["Improved?"] == True])/len(race_results)
mean_bias = race_results["NO DRO"].abs().mean()
std_bias = race_results["NO DRO"].abs().std()
print("percent improved ", percent_improved)
print("mean bias (in either direction) ", mean_bias)
print("std bias ", std_bias)

percent improved  0.32142857142857145
mean bias (in either direction)  0.8135475701651812
std bias  0.10519679683262602


In [34]:
race_results = get_race_results(option="all", as_df=True)

In [35]:
race_results

{'Cardiac dysrhythmias': {'DRO': {0: 0.7348119575699132,
   1: 0.7617021276595745,
   2: 0.7733333333333333,
   3: 0.7978723404255319,
   4: 0.8305084745762712,
   5: 0.7100977198697068},
  'NO DRO': {0: 0.7251687560270009,
   1: 0.7361702127659574,
   2: 0.7866666666666666,
   3: 0.7978723404255319,
   4: 0.7966101694915254,
   5: 0.6938110749185668}},
 'Diabetes mellitus without complication': {'DRO': {0: 0.793635486981678,
   1: 0.7914893617021277,
   2: 0.8133333333333334,
   3: 0.7021276595744681,
   4: 0.7457627118644068,
   5: 0.749185667752443},
  'NO DRO': {0: 0.8095467695274832,
   1: 0.7914893617021277,
   2: 0.8266666666666667,
   3: 0.6914893617021277,
   4: 0.7457627118644068,
   5: 0.7654723127035831}},
 'Other liver diseases': {'DRO': {0: 0.9334619093539055,
   1: 0.9276595744680851,
   2: 0.92,
   3: 0.9361702127659575,
   4: 0.8983050847457628,
   5: 0.9771986970684039},
  'NO DRO': {0: 0.940212150433944,
   1: 0.9063829787234042,
   2: 0.92,
   3: 0.9468085106382979,

{'group metrics': {0: {'auc': 0.7040496910697018,
   'acc': 0.7148102815177478,
   'auprc': 0.490976659700489},
  1: {'auc': 0.7456522733429735,
   'acc': 0.756198347107438,
   'auprc': 0.5077437430117916}},
 'acc': 0.7324191279887482,
 'auprc': 0.4945367652846202,
 'log loss': 0.5344687637916137,
 'auc': 0.7188779855081026}

In [55]:
import pickle

base_path = "/om/user/shobhita/data/6.864/"
output_path = "/om/user/shobhita/data/6.864/metric_results/"
pf_race_nodro_path = base_path + "task_phenotype_first_outputs_dro_False_group_ethnicity_to_use_freezebert_True/"
pf_race_dro_path = base_path + "task_phenotype_first_outputs_dro_True_group_ethnicity_to_use_freezebert_True/"
pf_gender_nodro_path = base_path + "task_phenotype_first_outputs_dro_False_group_gender_freezebert_True/"
pf_gender_dro_path = base_path + "task_phenotype_first_outputs_dro_True_group_gender_freezebert_True/"

for path, name in zip([pf_race_nodro_path, pf_race_dro_path, pf_gender_nodro_path, pf_gender_dro_path], ["race_false", "race_true", "gender_false", "gender_true"]):
    for col in os.listdir(path):
        full_path = path + col + "/group_metrics.pkl"
        disease = col.split("_")[-1]
        new_path = output_path + f"{name}_{disease}.pkl"
        result = pd.read_pickle(full_path)
        with open(new_path, "wb") as handle:
            pickle.dump(result, handle)
        print(f"Done with {name}, {col}")

Done with race_false, phenotype_first_baseline_clinical_BERT_1_epoch_512_Cardiac dysrhythmias
Done with race_false, phenotype_first_baseline_clinical_BERT_1_epoch_512_Diabetes mellitus without complication
Done with race_false, phenotype_first_baseline_clinical_BERT_1_epoch_512_Other liver diseases
Done with race_false, phenotype_first_baseline_clinical_BERT_1_epoch_512_Complications of surgical procedures or medical care
Done with race_false, phenotype_first_baseline_clinical_BERT_1_epoch_512_Essential hypertension
Done with race_false, phenotype_first_baseline_clinical_BERT_1_epoch_512_any_acute
Done with race_false, phenotype_first_baseline_clinical_BERT_1_epoch_512_Chronic obstructive pulmonary disease and bronchiectasis
Done with race_false, phenotype_first_baseline_clinical_BERT_1_epoch_512_Other lower respiratory disease
Done with race_false, phenotype_first_baseline_clinical_BERT_1_epoch_512_Disorders of lipid metabolism
Done with race_false, phenotype_first_baseline_clinical_B

Done with gender_true, phenotype_first_baseline_clinical_BERT_1_epoch_512_Complications of surgical procedures or medical care
Done with gender_true, phenotype_first_baseline_clinical_BERT_1_epoch_512_Essential hypertension
Done with gender_true, phenotype_first_baseline_clinical_BERT_1_epoch_512_any_acute
Done with gender_true, phenotype_first_baseline_clinical_BERT_1_epoch_512_Chronic obstructive pulmonary disease and bronchiectasis
Done with gender_true, phenotype_first_baseline_clinical_BERT_1_epoch_512_Other lower respiratory disease
Done with gender_true, phenotype_first_baseline_clinical_BERT_1_epoch_512_Disorders of lipid metabolism
Done with gender_true, phenotype_first_baseline_clinical_BERT_1_epoch_512_Septicemia (except in labor)
Done with gender_true, phenotype_first_baseline_clinical_BERT_1_epoch_512_Coronary atherosclerosis and other heart disease
Done with gender_true, phenotype_first_baseline_clinical_BERT_1_epoch_512_Pneumonia (except that caused by tuberculosis or se

In [53]:
new_path

'/om/user/shobhita/data/6.864/metric_results/race_false_Cardiac dysrhythmias.pkl'