# Analyze results

Load numeric results from `results` folder (metrics evaluated in test set)

In [1]:
import os
import pandas as pd
import json

In [2]:
%run utils.py

In [3]:
RESULTS_FOLDER = os.path.join(BASE_DIR, "results")
RESULTS_FOLDER

'/mnt/data/chest-x-ray-8/results'

In [4]:
# First 8 diseases
diseases_8 = ALL_DISEASES[:8]
diseases_8

['Atelectasis',
 'Cardiomegaly',
 'Effusion',
 'Infiltration',
 'Mass',
 'Nodule',
 'Pneumonia',
 'Pneumothorax']

## Load all results into df

In [5]:
available_names = os.listdir(RESULTS_FOLDER)
available_names

['1120_200238_resnet_v3_Cardiomegaly.json',
 '1219_140800_resnet_v3_Nodule.json',
 '0109_224853_resnet_v3_Nodule.json',
 'paper14.json',
 'paper8.json',
 '0114_143957_Cardiomegaly_label0.json',
 '0108_232310_resnet_v3_Pneumothorax.json',
 '0107_132136_resnet_v3_Cardiomegaly.json',
 '1218_184642_resnet_v3_Effusion.json',
 '1219_203708_resnet_v3_Pneumonia.json',
 '0127_141448_v4_Atelectasis.json',
 '0124_164749_densenet_v4_all.json',
 '1220_022543_resnet_v3_Mass.json',
 '1202_231001_resnet_v3_all.json',
 '1124_021931_resnet_v3_Pneumothorax.json',
 '0114_152248_Cardiomegaly_label1.json',
 '0121_141726_all_random.json',
 '0115_223657_resnet_v3_os_Cardiomegaly.json',
 '0117_004226_resnet_v3_os_Nodule.json',
 '1103_132133_Cardiomegaly.json',
 '0110_182856_resnet_v3_os_Cardiomegaly.json',
 '0127_135730_v4_Cardiomegaly.json',
 '1119_115517_resnet_v2_Cardiomegaly.json',
 '0122_231522_resnet_v3_all.json',
 '1109_001107_resume_resnet_all.json',
 '1112_170236_resnet_v1_Cardiomegaly.json',
 '1217_1

In [6]:
results_dict = {}

for name in available_names:
    fname = os.path.join(RESULTS_FOLDER, name)
    if not os.path.isfile(fname):
        continue
        
    with open(fname, "r") as f:
        d = json.load(f)
    
    if name.endswith(".json"):
        name = name[:-5]
    results_dict[name] = d

len(results_dict)

34

In [7]:
results_df = pd.DataFrame.from_dict(results_dict, orient="index")
print(results_df.shape)
results_df.head()

(34, 67)


Unnamed: 0,cm_Effusion,recall_Effusion,acc_Effusion,roc_auc_Effusion,prec_Effusion,cm_Nodule,recall_Nodule,prec_Nodule,acc_Nodule,roc_auc_Nodule,...,acc_Pleural_Thickening,prec_Pneumonia,cm_Cardiomegaly,cm_Hernia,cm_Mass,cm_Pneumothorax,cm_Atelectasis,cm_Fibrosis,cm_Infiltration,cm_Pneumonia
0106_121119_resnet_Nodule,,,,,,"[[22691, 1282], [1472, 151]]",0.093038,0.105373,0.892405,0.57252,...,,,,,,,,,,
0107_132136_resnet_v3_Cardiomegaly,,,,,,,,,,,...,,,"[[23701, 826], [701, 368]]",,,,,,,
0108_232310_resnet_v3_Pneumothorax,,,,,,,,,,,...,,,,,,"[[21992, 939], [2038, 627]]",,,,
0109_224853_resnet_v3_Nodule,,,,,,"[[23032, 941], [1422, 201]]",0.123845,0.176007,0.907681,0.641915,...,,,,,,,,,,
0110_182856_resnet_v3_os_Cardiomegaly,,,,,,,,,,,...,,,"[[24239, 288], [915, 154]]",,,,,,,


## Filter ROC-AUC metrics

In [18]:
# TODO: create two functions, one for must and one for may
def filter_has(must=None, may=None, columns=results_df.columns):
    has_must = lambda x: must is None or all(s in x for s in must)
    has_may = lambda x: may is None or any(s in x for s in may)
    return list(sorted(filter(lambda x: has_must(x) and has_may(x), columns)))

In [9]:
cols_roc_auc_8 = filter_has(["roc_auc"], diseases_8)
cols_roc_auc_8

['roc_auc_Atelectasis',
 'roc_auc_Cardiomegaly',
 'roc_auc_Effusion',
 'roc_auc_Infiltration',
 'roc_auc_Mass',
 'roc_auc_Nodule',
 'roc_auc_Pneumonia',
 'roc_auc_Pneumothorax']

In [10]:
filtered_results = results_df[cols_roc_auc_8]
filtered_results

Unnamed: 0,roc_auc_Atelectasis,roc_auc_Cardiomegaly,roc_auc_Effusion,roc_auc_Infiltration,roc_auc_Mass,roc_auc_Nodule,roc_auc_Pneumonia,roc_auc_Pneumothorax
0106_121119_resnet_Nodule,,,,,,0.57252,,
0107_132136_resnet_v3_Cardiomegaly,,0.823725,,,,,,
0108_232310_resnet_v3_Pneumothorax,,,,,,,,0.779539
0109_224853_resnet_v3_Nodule,,,,,,0.641915,,
0110_182856_resnet_v3_os_Cardiomegaly,,0.748706,,,,,,
0114_143957_Cardiomegaly_label0,,0.5,,,,,,
0114_152248_Cardiomegaly_label1,,0.5,,,,,,
0115_223657_resnet_v3_os_Cardiomegaly,,0.761915,,,,,,
0117_004226_resnet_v3_os_Nodule,,,,,,0.60111,,
0117_230624_resnet_v3_os_Effusion,,,0.756816,,,,,


### Filter models with all diseases

In [11]:
cols_roc_auc_14 = filter_has(["roc_auc"], ALL_DISEASES)
df_14 = results_df[cols_roc_auc_14]
df_14 = df_14.loc[(df_14.index.str.contains("all")) | (df_14.index.str.contains("paper"))]

In [12]:
df_14

Unnamed: 0,roc_auc_Atelectasis,roc_auc_Cardiomegaly,roc_auc_Consolidation,roc_auc_Edema,roc_auc_Effusion,roc_auc_Emphysema,roc_auc_Fibrosis,roc_auc_Hernia,roc_auc_Infiltration,roc_auc_Mass,roc_auc_Nodule,roc_auc_Pleural_Thickening,roc_auc_Pneumonia,roc_auc_Pneumothorax
0121_141726_all_random,0.50477,0.488521,0.494939,0.505463,0.497304,0.495828,0.510674,0.531336,0.498664,0.497903,0.505588,0.495765,0.498893,0.502233
0122_231522_resnet_v3_all,0.69428,0.78086,0.66881,0.785772,0.762095,0.768534,0.7267,0.765208,0.611678,0.661172,0.688137,0.671929,0.642739,0.772084
0123_121505_resnet_v3_all,0.717299,0.809163,0.700908,0.79728,0.786126,0.771707,0.73884,0.722831,0.609738,0.644045,0.656555,0.699836,0.657282,0.820873
0124_164749_densenet_v4_all,0.600755,0.610403,0.615557,0.714922,0.660002,0.573084,0.586563,0.632837,0.589129,0.555879,0.508598,0.588095,0.564818,0.591071
0127_135448_v4_all,0.5,0.514419,0.521823,0.41931,0.5,0.478107,0.5,0.639139,0.5,0.544236,0.538986,0.559358,0.49358,0.535491
1109_001107_resume_resnet_all,0.419551,0.823317,0.67025,0.726651,0.760288,0.598294,0.54147,0.46479,0.659808,0.546926,0.471328,0.518571,0.504083,0.579473
1112_170223_resnet_v1_all,0.495002,0.605583,0.583074,0.504427,0.626553,0.401838,0.560363,0.661837,0.574919,0.536431,0.510046,0.568674,0.5749,0.448227
1202_231001_resnet_v3_all,0.599348,0.520659,0.516307,0.65948,0.482248,0.457896,0.445364,0.472068,0.517088,0.514737,0.551762,0.534136,0.493206,0.606396
1217_150033_resnet_v3_all,0.501409,0.514842,0.616539,0.450106,0.596423,0.522878,0.538117,0.495748,0.497259,0.60136,0.524439,0.521002,0.577265,0.520727
paper14,0.7003,0.81,0.7032,0.8052,0.7585,0.833,0.7859,0.8717,0.6614,0.6933,0.6687,0.6835,0.658,0.7993


## Filter papers

In [13]:
papers = results_df[cols_roc_auc_8]
papers = papers.loc[papers.index.str.contains("paper")]
papers

Unnamed: 0,roc_auc_Atelectasis,roc_auc_Cardiomegaly,roc_auc_Effusion,roc_auc_Infiltration,roc_auc_Mass,roc_auc_Nodule,roc_auc_Pneumonia,roc_auc_Pneumothorax
paper14,0.7003,0.81,0.7585,0.6614,0.6933,0.6687,0.658,0.7993
paper8,0.7069,0.8141,0.7362,0.6128,0.5609,0.7164,0.6333,0.7891


## Precision y recall on multitask

In [14]:
cols_prec_recall_8 = filter_has(None, ["prec", "recall"], filter_has(None, diseases_8))
prec_recall_8 = results_df[cols_prec_recall_8]
prec_recall_8.loc["0123_121505_resnet_v3_all"]

prec_Atelectasis       0.162546
prec_Cardiomegaly      0.152203
prec_Effusion          0.259942
prec_Infiltration      0.238823
prec_Mass              0.105631
prec_Nodule            0.076466
prec_Pneumonia         0.049052
prec_Pneumothorax      0.320139
recall_Atelectasis     0.898140
recall_Cardiomegaly    0.539757
recall_Effusion        0.938815
recall_Infiltration    0.999836
recall_Mass            0.483982
recall_Nodule          0.856439
recall_Pneumonia       0.079279
recall_Pneumothorax    0.622139
Name: 0123_121505_resnet_v3_all, dtype: float64

In [15]:
a = prec_recall_8[~prec_recall_8.index.str.contains("all")]
b = a[~a.index.str.contains("label")]
b

Unnamed: 0,prec_Atelectasis,prec_Cardiomegaly,prec_Effusion,prec_Infiltration,prec_Mass,prec_Nodule,prec_Pneumonia,prec_Pneumothorax,recall_Atelectasis,recall_Cardiomegaly,recall_Effusion,recall_Infiltration,recall_Mass,recall_Nodule,recall_Pneumonia,recall_Pneumothorax
0106_121119_resnet_Nodule,,,,,,0.105373,,,,,,,,0.093038,,
0107_132136_resnet_v3_Cardiomegaly,,0.308208,,,,,,,,0.344247,,,,,,
0108_232310_resnet_v3_Pneumothorax,,,,,,,,0.400383,,,,,,,,0.235272
0109_224853_resnet_v3_Nodule,,,,,,0.176007,,,,,,,,0.123845,,
0110_182856_resnet_v3_os_Cardiomegaly,,0.348416,,,,,,,,0.14406,,,,,,
0115_223657_resnet_v3_os_Cardiomegaly,,0.346715,,,,,,,,0.177736,,,,,,
0117_004226_resnet_v3_os_Nodule,,,,,,0.173575,,,,,,,,0.082563,,
0117_230624_resnet_v3_os_Effusion,,,0.412328,,,,,,,,0.481108,,,,,
0127_135730_v4_Cardiomegaly,,0.077411,,,,,,,,0.159963,,,,,,
0127_141448_v4_Atelectasis,0.156545,,,,,,,,0.52516,,,,,,,


In [16]:
cols = filter_has(None, ["recall", "roc_auc", "prec"], filter_has(None, diseases_8))
results_df[cols].loc["0123_121505_resnet_v3_all"]

prec_Atelectasis        0.162546
prec_Cardiomegaly       0.152203
prec_Effusion           0.259942
prec_Infiltration       0.238823
prec_Mass               0.105631
prec_Nodule             0.076466
prec_Pneumonia          0.049052
prec_Pneumothorax       0.320139
recall_Atelectasis      0.898140
recall_Cardiomegaly     0.539757
recall_Effusion         0.938815
recall_Infiltration     0.999836
recall_Mass             0.483982
recall_Nodule           0.856439
recall_Pneumonia        0.079279
recall_Pneumothorax     0.622139
roc_auc_Atelectasis     0.717299
roc_auc_Cardiomegaly    0.809163
roc_auc_Effusion        0.786126
roc_auc_Infiltration    0.609738
roc_auc_Mass            0.644045
roc_auc_Nodule          0.656555
roc_auc_Pneumonia       0.657282
roc_auc_Pneumothorax    0.820873
Name: 0123_121505_resnet_v3_all, dtype: float64

In [17]:
cols = filter_has(["cm"], diseases_8)
results_df[cols].loc["0122_231522_resnet_v3_all"]

cm_Atelectasis     NaN
cm_Cardiomegaly    NaN
cm_Effusion        NaN
cm_Infiltration    NaN
cm_Mass            NaN
cm_Nodule          NaN
cm_Pneumonia       NaN
cm_Pneumothorax    NaN
Name: 0122_231522_resnet_v3_all, dtype: object