In [None]:
import pandas as pd
import glob
import os
from pathlib import Path

# Get repository root directory
# Try multiple methods to find the repo root
cwd = Path.cwd()
if (cwd / 'data').exists():
    REPO_ROOT = cwd
elif (cwd.parent / 'data').exists():
    REPO_ROOT = cwd.parent
else:
    # Fallback: assume we're in dev_notebooks and go up one level
    REPO_ROOT = cwd.parent

In [None]:
# Investigate how good the tools are at predicting the labels
tr_df = pd.read_csv(REPO_ROOT / 'data' / 'openi' / 'labels' / 'Train.csv')
tr_df['id'] = tr_df['id'].apply(lambda x: str(x) + '.jpg')
tr_df.set_index('id', inplace=True)

tool_pred_csvs = glob.glob(str(REPO_ROOT / 'data' / 'openi' / 'predictions' / '*_train.csv'))

tool_metrics = {}
for tool in tool_pred_csvs:
    tool_name = tool.split('/')[-1][:-10]
    preds_df = pd.read_csv(tool).set_index('filename')
    # Check which columns have all values equal to 0.5
    cols_all_05 = [col for col in preds_df.columns if (preds_df[col] == 0.5).all()]
    preds_df.drop(cols_all_05, axis=1, inplace=True)
    preds_df = (preds_df > 0.5).astype(int)
    acc = (tr_df.loc[preds_df.index, preds_df.columns].values.flatten() == preds_df.values.flatten()).mean()

    tool_metrics[tool_name] = {
        'cols_doesnt_predict': cols_all_05,
        'acc': acc,
    }

In [69]:
tool_metrics

{'resnet_mgca_pt_openi': {'cols_doesnt_predict': ['Lung Lesion',
   'Fracture',
   'Lung Opacity',
   'Enlarged Cardiomediastinum'],
  'acc': np.float64(0.9494270435446907)},
 'densenet121_res224_chex': {'cols_doesnt_predict': ['Infiltration',
   'Emphysema',
   'Fibrosis',
   'Pleural_Thickening',
   'Nodule',
   'Mass',
   'Hernia'],
  'acc': np.float64(0.6384054448225571)},
 'densenet121_res224_all': {'cols_doesnt_predict': [],
  'acc': np.float64(0.6884135472370766)},
 'densenet_medical_mae_pt_openi': {'cols_doesnt_predict': ['Lung Lesion',
   'Fracture',
   'Lung Opacity',
   'Enlarged Cardiomediastinum'],
  'acc': np.float64(0.9497326203208556)},
 'densenet_mocov2_pt_openi': {'cols_doesnt_predict': ['Lung Lesion',
   'Fracture',
   'Lung Opacity',
   'Enlarged Cardiomediastinum'],
  'acc': np.float64(0.9493506493506494)},
 'densenet121_res224_mimic_nb': {'cols_doesnt_predict': ['Infiltration',
   'Emphysema',
   'Fibrosis',
   'Pleural_Thickening',
   'Nodule',
   'Mass',
   'Her

In [61]:
# Check which columns have all values equal to 0.5
cols_all_05 = [col for col in preds_df.columns if (preds_df[col] == 0.5).all()]
cols_all_05

['Lung Lesion', 'Fracture', 'Lung Opacity', 'Enlarged Cardiomediastinum']

In [57]:
tool_metrics

{'resnet_mgca_pt_openi': {'acc': np.float64(0.9206773618538324)},
 'densenet121_res224_chex': {'acc': np.float64(0.7776589423648247)},
 'densenet121_res224_all': {'acc': np.float64(0.6884135472370766)},
 'densenet_medical_mae_pt_openi': {'acc': np.float64(0.9209150326797385)},
 'densenet_mocov2_pt_openi': {'acc': np.float64(0.9206179441473559)},
 'densenet121_res224_mimic_nb': {'acc': np.float64(0.6451574569221628)},
 'densenet121_res224_nih': {'acc': np.float64(0.6611408199643494)},
 'resnet_biovil_pt_openi': {'acc': np.float64(0.920736779560309)},
 'densenet121_res224_mimic_ch': {'acc': np.float64(0.5779560308972074)},
 'densenet121_res224_rsna': {'acc': np.float64(0.9075460487225193)},
 'evax_base_cxr__pt_openi': {'acc': np.float64(0.9202020202020202)},
 'densenet121_res224_pc': {'acc': np.float64(0.6995246583481878)},
 'evax_small_chexpert_pt_openi': {'acc': np.float64(0.919964349376114)},
 'evax_tiny_cxr__pt_openi': {'acc': np.float64(0.9211527035056447)},
 'evax_small_cxr__pt_ope