## Imports

In [None]:
import os
import pandas as pd
import json
import re

In [None]:
%run ../utils/__init__.py
%run ../utils/files.py
%run ../metrics/__init__.py

In [None]:
pd.options.display.max_columns = None

## Choose task

In [None]:
# TASK = 'seg'
TASK = 'rg'
# TASK = 'cls'

In [None]:
KEY_COLS = ['run_name', 'dataset_type']
if TASK == 'rg':
    KEY_COLS.append('free')
KEY_COLS

## Functions

In [None]:
TASK_FOLDER = _get_task_folder(TASK)
BASE_FOLDER = os.path.join(WORKSPACE_DIR, TASK_FOLDER)
RESULTS_FOLDER = os.path.join(BASE_FOLDER, 'results')

In [None]:
def get_suffix(filename):
    match = re.search('.*metrics-(?P<suffix>\w*)\.json', filename)
    if match is None:
        suffix = ''
    else:
        suffix = match.group('suffix')
    return suffix

In [None]:
METRIC_TYPES = [
    'chexpert',
    'grad-cam',
    'mirqi',
]

In [None]:
def load_results():
    results_by_metric_type = {}

    for run_name in os.listdir(RESULTS_FOLDER):
        if run_name == 'debug':
            continue

        folder = os.path.join(RESULTS_FOLDER, run_name)
        for filename in os.listdir(folder):
            filepath = os.path.join(folder, filename)
            if not os.path.isfile(filepath) or not filename.endswith('json'):
                continue

            metric_type = next(
                (met for met in METRIC_TYPES if met in filename),
                'base', # Default if no specific metric_type is found
            )

            with open(filepath, 'r') as f:
                results_dict = json.load(f)
   
            results_df = pd.DataFrame.from_dict(results_dict, orient='index')
            results_df.reset_index(inplace=True)
            results_df.rename(columns={'index': 'dataset_type'}, inplace=True)
            results_df['run_name'] = run_name
            if TASK == 'rg':
                results_df['free'] = get_suffix(filename)           
            
            if metric_type not in results_by_metric_type:
                results_by_metric_type[metric_type] = results_df
            else:
                prev = results_by_metric_type[metric_type]
                results_by_metric_type[metric_type] = prev.append(results_df, ignore_index=True)

    df = None
    cols_in_order = list(KEY_COLS)
    for results in results_by_metric_type.values():
        cols_in_order += [col for col in results.columns if col not in cols_in_order]
        
        if df is None:
            df = results
        else:
            df = df.merge(results, on=KEY_COLS, how='outer')
                
    return df[cols_in_order], results_by_metric_type

In [None]:
def filter_results(dataset_type=None, metrics=None,
                   metrics_contain=None, free=None,
                   contains=None, doesnt_contain=None,
                   drop=None, drop_na_rows=False, drop_key_cols=False):
    df = RESULTS_DF
    
    if dataset_type:
        if isinstance(dataset_type, str):
            df = df[df['dataset_type'] == dataset_type]
        elif isinstance(dataset_type, (list, tuple)):
            dataset_type = set(dataset_type)
            df = df[df['dataset_type'].isin(dataset_type)]
    
    if free is not None:
        free_str = 'free' if free else 'notfree'
        df = df.loc[df['free'] == free_str]
    
    if contains:
        filter_contains = lambda d, s: d.loc[d['run_name'].str.contains(s)]
        if isinstance(contains, (list, tuple)):
            for c in contains:
                df = filter_contains(df, c)
        elif isinstance(contains, str):
            df = filter_contains(df, contains)
    
    if doesnt_contain:
        filter_doesnt_contain = lambda d, s: d.loc[~d['run_name'].str.contains(s)]
        if isinstance(doesnt_contain, (list, tuple)):
            for c in doesnt_contain:
                df = filter_doesnt_contain(df, c)
        elif isinstance(doesnt_contain, str):
            df = filter_doesnt_contain(df, doesnt_contain)
    
    if drop:
        df = df.loc[~df['run_name'].str.contains(drop)]
        
    if metrics_contain:
        columns = KEY_COLS + [c for c in df.columns if metrics_contain in c]
        df = df[columns]
    elif metrics:
        columns = KEY_COLS + metrics
        df = df[columns]
    
    if drop_na_rows:
        df.dropna(axis=0, how='any', inplace=True)

    # Drop cols with all na
    df.dropna(axis=1, how='all', inplace=True)

    if drop_key_cols:
        columns = [c for c in df.columns if c == 'run_name' or c not in KEY_COLS]
        df = df[columns]
    
    return df

## Load results

In [None]:
RESULTS_DF, debug = load_results()
print(len(RESULTS_DF))
RESULTS_DF.head(2)

In [None]:
set(
    col.replace('-', '_').split('_')[0]
    for col in RESULTS_DF.columns
)

## Segmentation

In [None]:
def add_macro_avg_column(target_col):
    matching_cols = [c for c in RESULTS_DF.columns if c.startswith(target_col)]
    assert len(matching_cols) == 3, f'Matching cols not 3: {matching_cols}'
    averages = RESULTS_DF[matching_cols].mean(axis=1)
    RESULTS_DF[target_col] = averages
    print(f'Calculated col {target_col}')

In [None]:
add_macro_avg_column('n-shapes-gen')
add_macro_avg_column('n-holes-gen')

In [None]:
SEG_METRICS = []
organs = ('heart', 'left lung', 'right lung')
def _add_metric(metric_name, macro=True):
    if macro: SEG_METRICS.append(metric_name)
    SEG_METRICS.extend(f'{metric_name}-{organ}' for organ in organs)
_add_metric('iou')
# _add_metric('dice')
_add_metric('n-shapes-gen')
_add_metric('n-holes-gen')
SEG_METRICS

In [None]:
replace_strs = [
    # (r'^\d{4}_\d{6}_', ''),
    (r'jsrt_scan_', ''),
#     ('most-similar-image', '1nn'),
#     ('_lr[\d\.]+', ''),
#     ('_size256', ''),
#     (r'_\d{4}_\d{6}_.*', ''),
#     ('dummy-', ''),
#     ('common', 'top'),
#     ('-v2', ''),
#     (r'top-(\w)\w+-(\d+)', r'top-\1-\2'),
#     ('_densenet-121', ''),
]

def rename_runs(run_name):
    s = run_name
    for target, replace_with in replace_strs:
        s = re.sub(target, replace_with, s)
    return s

In [None]:
filter_results(
    metrics=SEG_METRICS,
    dataset_type='test',
    drop='1105_180035',
).sort_values(
    ['n-shapes-gen', 'n-holes-gen'],
    ascending=True,
).set_index('run_name').rename(index=rename_runs)

## Report generation

In [None]:
# NLP_METRICS = ['bleu1', 'bleu2', 'bleu3', 'bleu4', 'bleu', 'rougeL', 'ciderD']
# CHEXPERT_METRICS = ['recall', 'prec', 'f1'] # 'acc', 'roc_auc', 
CHEXPERT_DISEASE_METRICS = [
    c
    for c in RESULTS_DF.columns
    if any(
        c.startswith(f'{ch}-')
        for ch in ('f1', 'recall', 'prec')
    ) and not c.endswith('-woNF')
]
# CHEXPERT_RUNTIME_METRICS = [col for col in RESULTS_DF.columns if col.startswith('chex')]
# VAR_METRICS = [c for c in RESULTS_DF.columns if 'distinct' in c]
# MIRQI_METRICS = [c for c in RESULTS_DF.columns if 'MIRQI' in c]
MIRQI_METRICS_v1 = ['MIRQI-r', 'MIRQI-p', 'MIRQI-f']

In [None]:
ESSENTIAL_METRICS = [
    'bleu', 'ciderD', 'rougeL',
    # 'chex_f1', 'chex_acc', # 'chex_recall', 'chex_prec', # Runtime-chexpert
    # 'MIRQI-v2-f',
    # 
    # Holistic-chexpert:
    'acc', 'f1', 'prec', 'recall', 'f1-woNF', 'prec-woNF', 'recall-woNF',
    # 'pr_auc', 'pr_auc-woNF',
    # 'acc',
    # *CHEXPERT_DISEASE_METRICS,
    *MIRQI_METRICS_v1,
]

In [None]:
pd.options.display.float_format = '{:.3f}'.format

In [None]:
replace_strs = [
    (r'^\d{4}_\d{6}_', ''),
    (r'_precnn-\d{4}-\d{6}', ''),
    ('mimic-cxr_', ''),
    # ('most-similar-image', '1nn'),
    # (r'_lr[\d\.]+', ''),
    # (r'_lr-emb[\d\.]+', ''),
    ('_size256', ''),
    ('-v2', ''),
    ('_front', ''),
    (r'__[\w\-]*', ''),
]

def rename_runs(run_name):
    s = run_name
    for target, replace_with in replace_strs:
        s = re.sub(target, replace_with, s)
    return s

In [None]:
res = filter_results(
    # contains='^01\d{2}_\d{6}_(h-|lstm)',
    # contains=r'dummy|tpl|__bug-v4|0510_235335|paper', # __best-iu
    # contains='__att',
    contains='mimic-cxr',
    # doesnt_contain='dummy-baseline|mimic',
    dataset_type='test',
    free=True,
    metrics=ESSENTIAL_METRICS, # CHEXPERT_RUNTIME_METRICS,
    drop_key_cols=True,
    # drop_na_rows=True,
)
# res = res.replace(r'^\d{4}_\d{6}_(.*)', r'\1', regex=True)
# res = res.sort_values('run_name')
res = res.set_index('run_name').rename(
    index=rename_runs,
)
res.sort_index()

### Compare runtime chexpert vs holistic chexpert

In [None]:
def subtract_cols(df, cols_a, cols_b, drop_na_rows=True):
    array_a = df[cols_a].to_numpy()
    array_b = df[cols_b].to_numpy()
    
    df_2 = df[KEY_COLS].copy()
    df_2 = pd.concat([df_2, pd.DataFrame(array_a - array_b, columns=cols_a)], axis=1)
    
    if drop_na_rows:
        df_2.dropna(axis=0, inplace=True, how='any')
    
    return df_2

In [None]:
metric = 'f1'

runtime_chexpert = [c for c in RESULTS_DF.columns if c.startswith(f'chex_{metric}')]
holistic_chexpert = [c for c in RESULTS_DF.columns if c.startswith(metric)]

In [None]:
df = RESULTS_DF
df = df.loc[~df['run_name'].str.contains('dummy')]
len(df)

In [None]:
set(df['run_name'])

In [None]:
df = subtract_cols(df, runtime_chexpert, holistic_chexpert)
df.head()

In [None]:
df.describe()

In [None]:
df

In [None]:
from collections import Counter

In [None]:
run_name = '0112_154506_lstm-v2_lr0.001_densenet-121-v2_noes'
debug = False
d1 = load_rg_outputs(run_name, debug=debug, free=True)
d2 = load_rg_outputs(run_name, debug=debug, free=False)
len(d1), len(d2)

In [None]:
c1 = Counter(d1['filename'])
c2 = Counter(d2['filename'])
len(c1), len(c2)

In [None]:
for fname in c1.keys():
    v1 = c1[fname]
    v2 = c2[fname]
    if v1 != v2:
        print('Wrong: ', fname, v1, v2)

In [None]:
d2.head()

In [None]:
set(d2['dataset_type'])

### Pretty-print (latex)

In [None]:
replace_strs = [
    (r'^\d{4}_\d{6}_', ''),
    ('most-similar-image', '1nn'),
    ('_lr[\d\.]+', ''),
    ('_size256', ''),
    (r'_\d{4}_\d{6}_.*', ''),
    ('dummy-', ''),
    ('common', 'top'),
    ('-v2', ''),
    (r'top-(\w)\w+-(\d+)', r'top-\1-\2'),
    ('_densenet-121', ''),
]

def rename_runs(run_name):
    s = run_name
    for target, replace_with in replace_strs:
        s = re.sub(target, replace_with, s)
    return s

In [None]:
columns = ['bleu', 'rougeL', 'ciderD'] + CHEXPERT_METRICS + MIRQI_METRICS

In [None]:
df = filter_results(dataset_type='test',
                    free=True,
                    metrics=columns,
                    contains='(?=_lstm-att-v2.*densenet|_lstm-v2.*densenet|dummy)',
                    drop='0915_173951|0915_174222|0916_104739',
                    drop_na_rows=True,
                   )

In [None]:
df

In [None]:
shorten_cols = lambda s: s.replace('MIRQI-v2', 'v2')

In [None]:
print(df.set_index('run_name').rename(
    index=rename_runs,
    columns=shorten_cols,
).sort_index().to_latex(
    columns=[shorten_cols(c) for c in columns],
    float_format='%.3f',
    column_format='l' + 'c' * len(columns),
))

## Classification

In [None]:
# contains = 'covid-x'
# contains = 'cxr14'
# contains = 'e0'
# contains = '0717_120222_covid-x_densenet-121_lr1e-06_os_aug-covid'
# contains = '0717_101812_covid-x_densenet-121_lr1e-06_os-max2_aug-covid'
run_name = '0717_120222_covid-x_densenet-121_lr1e-06_os_aug-covid' # WINNER

contains = '0717_101812_covid-x_densenet-121_lr1e-06_os-max2_aug-covid'
contains = 'covid-uc'

In [None]:
ESSENTIAL_METRICS = [
    'acc', 'roc_auc', 'hamming', # 'prec', 'recall',
]

In [None]:
metrics = [
    'acc', 'roc_auc', 'prec', 'recall', 'roc_auc_Cardiomegaly', 'roc_auc_Pneumonia',
    'recall_Cardiomegaly', 'recall_Pneumonia',
    'iobb-masks', 'iobb-masks-Cardiomegaly', 'iobb-masks-Pneumonia',
]

In [None]:
pd.options.display.float_format = '{:.3f}'.format

In [None]:
def simplify_names(s):
    model_name = re.sub(r'^\d{4}_\d{6}_cxr14_(.*)_lr.*', r'\1', s)
    if 'hint' in s:
        return f'{model_name}_hint'
    return model_name

In [None]:
d = filter_results(
    contains=r'^(01|12|02)\w+_cxr14_',
    doesnt_contain=[
        r'_Card',
        r'_Pneu',
        '1027_144914', # really bad results
    ],
    dataset_type=('val'), # -bbox
    # metrics_contain='iou',
    metrics=metrics, # ESSENTIAL_METRICS,
    drop_key_cols=True,
    # drop_na_rows=True,
).sort_values('roc_auc', ascending=False)
# d['run_name'] = d['run_name'].apply(simplify_names)
d

## Report-generation: results at different report lengths

In [None]:
vals_words = [20, 25, 27, 33, 44, None]
vals_sents = [3, 4, 5, 6, None]

In [None]:
max_words = vals_words[0]
suffix = f'max-words-{max_words}' if max_words else ''
all_results = load_results(suffix)
results_df_test = create_results_df(all_results, 'test')
results_df_test

## Save baseline results to file

In [None]:
import json
import os
import numpy as np

In [None]:
%run ../datasets/common/constants.py
%run ../utils/__init__.py
%run ../utils/files.py

In [None]:
def _save_metrics(folder, filename, results_dict):
    filepath = os.path.join(folder, filename)
    with open(filepath, 'w') as f:
        json.dump(results_dict, f, indent=2)
    print(f'Saved dict to {filepath}')

In [None]:
def save_mirqi_metrics(folder, results):
    _save_metrics(folder, 'mirqi-metrics-free.json', results)

def save_chexpert_metrics(folder, results):
    _save_metrics(folder, 'chexpert-metrics-free.json', results)

def save_runtime_metrics(folder, results):
    _save_metrics(folder, 'metrics-free.json', results)

In [None]:
def calculate_avg_woNF(metrics, prefixes, diseases=CHEXPERT_DISEASES, verbose=False):
    if isinstance(prefixes, str):
        prefixes = (prefixes,)

    macro_avg_woNF = {}

    for prefix in prefixes:
        keys = [
            f'{prefix}-{disease}'
            for disease in diseases
            if disease.lower() != 'no finding'
        ]
        macro_avg = np.mean([metrics[k] for k in keys])
            
        if verbose:
            print(f'Prefix={prefix}, averaging: {keys}')
        macro_avg_woNF[f'{prefix}-woNF'] = macro_avg
    return macro_avg_woNF

### Paper MIRQI

In [None]:
folder = get_results_folder(RunId('paper_iu-x-ray_zhang-et-al-mirqi', False, 'rg'), save_mode=True)

In [None]:
bleu1, bleu2, bleu3, bleu4 = 0.441, 0.291, 0.203, 0.147
runtime_results = {
    'test': {
        'bleu1': bleu1, 'bleu2': bleu2, 'bleu3': bleu3, 'bleu4': bleu4,
        'bleu': np.mean([bleu1, bleu2, bleu3, bleu4]),
        'ciderD': 0.304,
        'rougeL': 0.367,
    }
}
mirqi_results = {
    'test': {
        'MIRQI-r': 0.483,
        'MIRQI-p': 0.490,
        'MIRQI-f': 0.478,
    }
}

In [None]:
save_mirqi_metrics(folder, mirqi_results)
save_runtime_metrics(folder, runtime_results)

### Lovelace et al

In [None]:
folder = get_results_folder(RunId('paper_mimic-cxr_lovelace-et-al', False, 'rg'), save_mode=True)

In [None]:
# Using their transformer w/fine-tuning ablation
bleu1, bleu2, bleu3, bleu4 = 0.415, 0.272, 0.193, 0.146
runtime_results = {
    'test': {
        'bleu1': bleu1, 'bleu2': bleu2, 'bleu3': bleu3, 'bleu4': bleu4,
        'bleu': np.mean([bleu1, bleu2, bleu3, bleu4]),
        'ciderD': 0.316, # not sure if Cider-D or Cider
        'rougeL': 0.318,
    }
}
_values = {
    'f1': 22.8,
    'prec': 33.3,
    'recall': 21.7,

    'f1-Atelectasis': 32.2,
    'f1-Cardiomegaly': 43.3,
    'f1-Consolidation': 7.3,
    'f1-Edema': 29.8,
    'f1-Enlarged Cardiomediastinum': 5.9,
    'f1-Fracture': 0,
    'f1-Lung Lesion': 1.4,
    'f1-Lung Opacity': 17.1,
    'f1-No Finding': 54.1,
    'f1-Pleural Effusion': 48.0,
    'f1-Pleural Other': 0.9,
    'f1-Pneumonia': 3.9,
    'f1-Pneumothorax': 9.8,
    'f1-Support Devices': 66.0,

    'prec-Atelectasis': 43.0,
    'prec-Cardiomegaly': 46.9,
    'prec-Consolidation': 15.7,
    'prec-Edema': 37.6,
    'prec-Enlarged Cardiomediastinum': 12.3,
    'prec-Fracture': 0,
    'prec-Lung Lesion': 23.8,
    'prec-Lung Opacity': 64.0,
    'prec-No Finding': 39.0,
    'prec-Pleural Effusion': 71.2,
    'prec-Pleural Other': 16.1,
    'prec-Pneumonia': 7,
    'prec-Pneumothorax': 12.9,
    'prec-Support Devices': 77.0,

    'recall-Atelectasis': 25.8,
    'recall-Cardiomegaly': 40.2,
    'recall-Consolidation': 4.8,
    'recall-Edema': 24.6,
    'recall-Enlarged Cardiomediastinum': 3.9,
    'recall-Fracture': 0,
    'recall-Lung Lesion': 0.7,
    'recall-Lung Opacity': 9.9,
    'recall-No Finding': 88.2,
    'recall-Pleural Effusion': 36.2,
    'recall-Pleural Other': 0.5,
    'recall-Pneumonia': 2.7,
    'recall-Pneumothorax': 7.8,
    'recall-Support Devices': 57.8,
}
woNF = calculate_avg_woNF(_values, ['f1', 'recall', 'prec'])
_values.update(woNF)
chexpert_results = {
    'test': {
        k: value / 100
        for k, value in _values.items()
    },
}
woNF

In [None]:
save_chexpert_metrics(folder, chexpert_results)
save_runtime_metrics(folder, runtime_results)

### Boag et al

#### 1NN

In [None]:
folder = get_results_folder(RunId('paper_mimic-cxr_boag-et-al-1nn', False, 'rg'), save_mode=True)

In [None]:
# Using their 1-NN model
bleu1, bleu2, bleu3, bleu4 = 0.305, 0.171, 0.098, 0.057
runtime_results = {
    'test': {
        'bleu1': bleu1, 'bleu2': bleu2, 'bleu3': bleu3, 'bleu4': bleu4,
        'bleu': np.mean([bleu1, bleu2, bleu3, bleu4]),
        'ciderD': 0.755, # not sure if Cider-D or Cider
    }
}
_values = {
    'acc': 0.818,
    'prec': 0.253,
    'f1': 0.258,

    'f1-Support Devices': 0.527,
    'f1-Lung Opacity': 0.417,
    'f1-Cardiomegaly': 0.445,
    'f1-Atelectasis': 0.375,
    'f1-No Finding': 0.455,
    'f1-Pleural Effusion': 0.532,
    'f1-Edema': 0.286,
    'f1-Enlarged Cardiomediastinum': 0.142,
    'f1-Pneumonia': 0.08,
    'f1-Pneumothorax': 0.111,
    'f1-Fracture': 0.060,
    'f1-Lung Lesion': 0.062,
    'f1-Consolidation': 0.085,
    'f1-Pleural Other': 0.039,
}
woNF = calculate_avg_woNF(_values, 'f1')
_values.update(woNF)
chexpert_results = {
    'test': _values,
}
woNF

In [None]:
save_chexpert_metrics(folder, chexpert_results)
save_runtime_metrics(folder, runtime_results)

#### cnn-rnn-beam

In [None]:
folder = get_results_folder(RunId('paper_mimic-cxr_boag-et-al-cnn-rnn-beam', False, 'rg'), save_mode=True)

In [None]:
# Using their CNN-RNN-beam
bleu1, bleu2, bleu3, bleu4 = 0.305, 0.201, 0.137, 0.092
runtime_results = {
    'test': {
        'bleu1': bleu1, 'bleu2': bleu2, 'bleu3': bleu3, 'bleu4': bleu4,
        'bleu': np.mean([bleu1, bleu2, bleu3, bleu4]),
        'ciderD': 0.850, # not sure if Cider-D or Cider
    }
}
_values = {
    'acc': 0.837, 
    'prec': 0.304,
    'f1': 0.186,

    'f1-Support Devices': 0.613,
    'f1-Lung Opacity': 0.077,
    'f1-Cardiomegaly': 0.390,
    'f1-Atelectasis': 0.146,
    'f1-No Finding': 0.407,
    'f1-Pleural Effusion': 0.473,
    'f1-Edema': 0.271,
    'f1-Enlarged Cardiomediastinum': 0.134,
    'f1-Pneumonia': 0.03,
    'f1-Pneumothorax': 0.043,
    'f1-Fracture': 0.001,
    'f1-Lung Lesion': 0.001, # less than that
    'f1-Consolidation': 0.014,
    'f1-Pleural Other': 0.001, # less than that
}
woNF = calculate_avg_woNF(_values, ['f1'], CHEXPERT_DISEASES)
_values.update(woNF)
chexpert_results = {
    'test': _values,
}
woNF

In [None]:
save_chexpert_metrics(folder, chexpert_results)
save_runtime_metrics(folder, runtime_results)

### Liu et al

#### CCR

In [None]:
folder = get_results_folder(RunId('paper_mimic-cxr_liu-et-al-ccr', False, 'rg'), save_mode=True)

In [None]:
# Using their CCR ablation
bleu1, bleu2, bleu3, bleu4 = 0.294, 0.190, 0.134, 0.094
runtime_results = {
    'test': {
        'bleu1': bleu1, 'bleu2': bleu2, 'bleu3': bleu3, 'bleu4': bleu4,
        'bleu': np.mean([bleu1, bleu2, bleu3, bleu4]),
        'ciderD': 0.956,
        'rougeL': 0.284,
    }
}
_values = {
    'acc': 0.868,
    'prec': 0.313,
    'recall': 0.126,

    'prec-No Finding': 0.491,
    'prec-Enlarged Cardiomediastinum': 0.202,
    'prec-Cardiomegaly': 0.678,
    'prec-Lung Lesion': 0,
    'prec-Lung Opacity': 0.640,
    'prec-Edema': 0.280,
    'prec-Consolidation': 0.037,
    'prec-Pneumonia': 0,
    'prec-Atelectasis': 0.476,
    'prec-Pneumothorax': 0.039,
    'prec-Pleural Effusion': 0.683,
    'prec-Pleural Other': 0,
    'prec-Fracture': 0,
    'prec-Support Devices': 0.849,
}
woNF = calculate_avg_woNF(_values, ['prec'], CHEXPERT_DISEASES, verbose=False)
_values.update(woNF)
chexpert_results = {
    'test': _values,
}
woNF

In [None]:
save_chexpert_metrics(folder, chexpert_results)
save_runtime_metrics(folder, runtime_results)

#### Full

In [None]:
folder = get_results_folder(RunId('paper_mimic-cxr_liu-et-al-full', False, 'rg'), save_mode=True)

In [None]:
# Using their CCR ablation
bleu1, bleu2, bleu3, bleu4 = 0.313, 0.206, 0.146, 0.103
runtime_results = {
    'test': {
        'bleu1': bleu1, 'bleu2': bleu2, 'bleu3': bleu3, 'bleu4': bleu4,
        'bleu': np.mean([bleu1, bleu2, bleu3, bleu4]),
        'ciderD': 1.046,
        'rougeL': 0.306,
    }
}
_values = {
    'acc': 0.867,
    'prec': 0.309,
    'recall': 0.134,

    'prec-No Finding': 0.405,
    'prec-Enlarged Cardiomediastinum': 0.167,
    'prec-Cardiomegaly': 0.704,
    'prec-Lung Lesion': 0,
    'prec-Lung Opacity': 0.460,
    'prec-Edema': 0,
    'prec-Consolidation': 0,
    'prec-Pneumonia': 0.4,
    'prec-Atelectasis': 0.521,
    'prec-Pneumothorax': 0.098,
    'prec-Pleural Effusion': 0.689,
    'prec-Pleural Other': 0,
    'prec-Fracture': 0,
    'prec-Support Devices': 0.880,
}
woNF = calculate_avg_woNF(_values, ['prec'], CHEXPERT_DISEASES, verbose=False)
woNF

In [None]:
_values.update(woNF)

chexpert_results = {
    'test': _values,
}

In [None]:
save_chexpert_metrics(folder, chexpert_results)
save_runtime_metrics(folder, runtime_results)