In [1]:
import os
import pandas as pd
import json

In [2]:
%run ../utils/__init__.py
%run ../metrics/__init__.py

In [3]:
classification = False

In [4]:
base_folder = os.path.join(WORKSPACE_DIR,
                           'classification' if classification else 'report_generation')
results_folder = os.path.join(base_folder, 'results')
# runs_folder = os.path.join(base_folder, 'runs')
# run_names = os.listdir(results_folder)
# _tb_run_names = os.listdir(runs_folder)
# run_names = [r for r in run_names if r in _tb_run_names or 'dummy' in r]
# run_names = list(run_names)
# if 'debug' in run_names: run_names.remove('debug')
# run_names

In [9]:
def load_results(chexpert=False):
    all_results = []

    for run_name in os.listdir(results_folder): # run_names:
        if run_name == 'debug':
            continue
            
        if not run_name.startswith('09'):
            continue

        folder = os.path.join(results_folder, run_name)
        for filename in os.listdir(folder):
            if not filename.endswith('json'):
                continue

            is_chexpert_file = filename.startswith('chexpert')
            if not chexpert and is_chexpert_file:
                continue
            elif chexpert and not is_chexpert_file:
                continue
            
            filepath = os.path.join(folder, filename)

            if not os.path.isfile(filepath):
                continue

            with open(filepath, 'r') as f:
                results = json.load(f)

            additional_name = filename
            additional_name = additional_name.replace('chexpert-', '')
            additional_name = additional_name.replace('metrics', '')
            additional_name = additional_name.replace('.json', '')
            all_results.append((run_name, additional_name, results))

    return all_results

In [17]:
all_results = load_results(chexpert=False)
len(all_results)

58

In [40]:
def create_results_df(all_results):
    df_content = []
    for run_name, additional_name, results in all_results:
        for key, values in results.items():
            df_content.append({
                'run_name': f'{run_name}-{additional_name}',
                'dataset_type': key,
                **values,
            })

#     run_names, results_dicts = zip(*[
#         (f'{run}-{name}', results_dict[key])
#         for run, name, results_dict in results if key in results_dict
#     ])
    
    # df = pd.DataFrame(results_dicts, index=run_names)
    df = pd.DataFrame(df_content)
    return df

In [41]:
RESULTS_DF = create_results_df(all_results)
RESULTS_DF.head()

Unnamed: 0,run_name,dataset_type,loss,word_loss,stop_loss,bleu1,bleu2,bleu3,bleu4,bleu,rougeL,ciderD,distinct_words,distinct_sentences,word_acc
0,0915_174026_dummy-common-sentences-100--free,train,-1.0,-1.0,-1.0,0.340114,0.207458,0.137165,0.095464,0.19505,0.27658,0.152633,109.0,296.0,
1,0915_174026_dummy-common-sentences-100--free,val,-1.0,-1.0,-1.0,0.330119,0.201151,0.133937,0.093666,0.189718,0.272445,0.160992,109.0,277.0,
2,0915_174026_dummy-common-sentences-100--free,test,-1.0,-1.0,-1.0,0.346689,0.21085,0.13844,0.095769,0.197937,0.280916,0.165551,109.0,222.0,
3,0915_174026_dummy-common-sentences-100--free,train-normal,,,,0.404256,0.257186,0.176439,0.126318,0.24105,0.310559,0.221799,,,
4,0915_174026_dummy-common-sentences-100--free,train-abnormal,,,,0.286127,0.169578,0.109083,0.074254,0.15976,0.253867,0.10425,,,


In [50]:
def filter_results(dataset_type=None, metrics=None, contains=None):
    df = RESULTS_DF
    
    if dataset_type:
        if isinstance(dataset_type, str):
            df = df[df['dataset_type'] == dataset_type]
        elif isinstance(dataset_type, (list, tuple)):
            dataset_type = set(dataset_type)
            df = df[df['dataset_type'].isin(dataset_type)]
    
    if contains:
        filter_contains = lambda d, s: d.loc[d['run_name'].str.contains(s)]
        if isinstance(contains, (list, tuple)):
            for c in contains:
                df = filter_contains(df, c)
        elif isinstance(contains, str):
            df = filter_contains(df, contains)
        
    if metrics:
        columns = ['run_name', 'dataset_type']
        columns += metrics
        df = df[columns]
    
    return df

### Report generation

In [22]:
NLP_METRICS = ['bleu1', 'bleu2', 'bleu3', 'bleu4', 'bleu', 'rougeL', 'ciderD']
CHEXPERT_METRICS = ['recall', 'prec', 'f1', 'roc_auc']

In [52]:
filter_results(['test-normal', 'test', 'test-abnormal'], metrics=NLP_METRICS, contains='-free')

Unnamed: 0,run_name,dataset_type,bleu1,bleu2,bleu3,bleu4,bleu,rougeL,ciderD
2,0915_174026_dummy-common-sentences-100--free,test,0.346689,0.210850,0.138440,9.576881e-02,0.197937,0.280916,0.165551
7,0915_174026_dummy-common-sentences-100--free,test-normal,0.423569,0.271667,0.187014,1.342417e-01,0.254123,0.316438,0.227408
8,0915_174026_dummy-common-sentences-100--free,test-abnormal,0.302018,0.178054,0.113370,7.643432e-02,0.167469,0.261473,0.130919
20,0915_172915_dummy-common-words-10--free,test,0.312055,0.107749,0.024166,8.550974e-07,0.110993,0.243604,0.042768
25,0915_172915_dummy-common-words-10--free,test-normal,0.255797,0.096562,0.023035,1.040850e-06,0.093849,0.247808,0.064311
...,...,...,...,...,...,...,...,...,...
479,0919_092820_lstm-v2_lr0.0001_resnet-50_size256...,test-normal,0.495187,0.328123,0.231719,1.716516e-01,0.306670,0.397355,0.386726
480,0919_092820_lstm-v2_lr0.0001_resnet-50_size256...,test-abnormal,0.361030,0.228941,0.156155,1.119964e-01,0.214531,0.320588,0.220597
494,0919_032831_h-lstm-att-v2_lr0.0001_precnn_size...,test,0.389193,0.244593,0.168024,1.214219e-01,0.230808,0.349173,0.151545
499,0919_032831_h-lstm-att-v2_lr0.0001_precnn_size...,test-normal,0.392209,0.268442,0.197605,1.520707e-01,0.252582,0.405952,0.176805


In [52]:
metrics = [
    col
    for col in RESULTS_DF.columns
    # if col.startswith('recall')
    # if col == 'recall-No Finding'
    if 'No Finding' in col
]
# CHEXPERT_METRICS
results_df_test[metrics].sort_index()

Unnamed: 0,prec-No Finding,recall-No Finding,f1-No Finding,roc_auc-No Finding
0915_172709_dummy-constant---free,0.361702,1.0,0.53125,0.5
0915_172915_dummy-common-words-10---free,0.361702,1.0,0.53125,0.5
0915_173110_dummy-common-words-50---free,0.356164,0.191176,0.248804,0.497672
0915_173307_dummy-common-words-100---free,0.36,0.165441,0.2267,0.499387
0915_173524_dummy-common-sentences-10---free,0.361702,1.0,0.53125,0.5
0915_173609_dummy-random---free,0.416084,0.4375,0.426523,0.544792
0915_173754_dummy-common-sentences-50---free,0.358006,0.871324,0.507495,0.492953
0915_173951_dummy-most-similar-image_densenet-121---free,0.399381,0.474265,0.433613,0.535049
0915_174026_dummy-common-sentences-100---free,0.356375,0.852941,0.502709,0.490012
0915_174222_dummy-most-similar-image_0717_120222_covid-x_densenet-121_lr1e-06_os_aug-covid---free,0.4,0.455882,0.426117,0.534191


In [22]:
import re

In [27]:
remove_timestamp = lambda s: re.sub(r'^\d{4}_\d{6}_', '', s)

In [28]:
s = '0915_174026_dummy-common-sentences-100---free'
remove_timestamp(s)

'dummy-common-sentences-100---free'

In [31]:
# print_columns = [
#     c
#     for c in results_df_test.columns
#     if c != 'loss' and 'cm' not in c and c != 'word_acc'
# ]
print(results_df_test.rename(index=remove_timestamp).sort_index().to_latex(
    columns=METRICS,
    float_format='%.3f',
))

\begin{tabular}{lrrrrrrr}
\toprule
{} &  bleu1 &  bleu2 &  bleu3 &  bleu4 &  bleu &  rougeL &  ciderD \\
\midrule
dummy-common-sentences-10---free                   &  0.275 &  0.170 &  0.113 &  0.078 & 0.159 &   0.294 &   0.152 \\
dummy-common-sentences-100---free                  &  0.347 &  0.211 &  0.138 &  0.096 & 0.198 &   0.281 &   0.166 \\
dummy-common-sentences-50---free                   &  0.333 &  0.206 &  0.137 &  0.097 & 0.193 &   0.283 &   0.188 \\
dummy-common-words-10---free                       &  0.312 &  0.108 &  0.024 &  0.000 & 0.111 &   0.244 &   0.043 \\
dummy-common-words-100---free                      &  0.349 &  0.090 &  0.016 &  0.000 & 0.114 &   0.206 &   0.072 \\
dummy-common-words-50---free                       &  0.375 &  0.102 &  0.019 &  0.000 & 0.124 &   0.224 &   0.075 \\
dummy-constant---free                              &  0.455 &  0.312 &  0.223 &  0.165 & 0.289 &   0.357 &   0.293 \\
dummy-most-similar-image\_0717\_120222\_covid-x\_de... &  0.

In [74]:
np.mean([0.369, 0.246, 0.171, 0.115])

0.22525

### Classification

In [64]:
# contains = 'covid-x'
# contains = 'cxr14'
contains = 'e0'
# contains = '0717_120222_covid-x_densenet-121_lr1e-06_os_aug-covid'
# contains = '0717_101812_covid-x_densenet-121_lr1e-06_os-max2_aug-covid'
contains = '0717_120222_covid-x_densenet-121_lr1e-06_os_aug-covid' # WINNER

contains = '0717_101812_covid-x_densenet-121_lr1e-06_os-max2_aug-covid'
contains = 'covid-uc'

In [71]:
results_df_train = results_df_train.loc[results_df_train.index.str.contains(contains)]
results_df_val = results_df_val.loc[results_df_val.index.str.contains(contains)]
results_df_test = results_df_test.loc[results_df_test.index.str.contains(contains)]

In [62]:
results_df_all

Unnamed: 0,loss,acc,prec_covid,prec_Non-COVID,prec_normal,recall_covid,recall_Non-COVID,recall_normal,spec_covid,spec_Non-COVID,spec_normal,cm
0717_120222_covid-x_densenet-121_lr1e-06_os_aug-covid---covid-uc_size256_frontal,3.348938,0.393496,0.050633,0.2,0.941964,0.666667,0.202703,0.408124,0.492386,0.889094,0.867347,"[[15, 8, 1], [44, 19, 11], [229, 83, 205]]"
0717_120222_covid-x_densenet-121_lr1e-06_os_aug-covid---covid-uc_size512_frontal,4.755333,0.269919,0.046083,0.282051,0.950704,0.833333,0.148649,0.261122,0.299492,0.948244,0.928571,"[[20, 4, 0], [55, 12, 7], [342, 35, 140]]"
0717_120222_covid-x_densenet-121_lr1e-06_os_aug-covid---covid-uc_size512,2.975181,0.234428,0.038241,0.17734,0.961783,0.714286,0.367347,0.199472,0.411696,0.787261,0.952381,"[[18, 10, 0], [49, 42, 7], [406, 185, 166]]"
0717_120222_covid-x_densenet-121_lr1e-06_os_aug-covid---covid-uc_size256,2.332392,0.322763,0.03304,0.164773,0.952569,0.535714,0.295918,0.318362,0.48655,0.812739,0.904762,"[[11, 17, 0], [52, 35, 11], [332, 189, 236]]"


In [111]:
results_df_train.sort_values(['acc', 'recall_covid', 'prec_covid'],
                           ascending=False).dropna(axis=1, how='all')

KeyError: 'recall_covid'

In [72]:
results_df_val.sort_values(['acc', 'recall_covid', 'prec_covid'],
                           ascending=False).dropna(axis=1, how='all')

Unnamed: 0,loss,acc,prec_covid,prec_normal,recall_covid,recall_normal,spec_covid,spec_normal,cm,prec_Non-COVID,recall_Non-COVID,spec_Non-COVID
0722_193534_covid-uc_densenet-121_lr0.0001_os-max10_aug_size256--,0.421872,0.897727,0.0,0.916667,0.0,0.974684,0.987952,0.222222,"[[0, 0, 5], [0, 2, 2], [1, 1, 77]]",0.666667,0.5,0.988095
0722_201835_covid-uc_densenet-121_lr0.0001_os-max10_aug_pre-covid-x--,1.011396,0.772727,0.181818,0.969697,0.8,0.810127,0.783133,0.777778,"[[4, 0, 1], [2, 0, 2], [13, 3, 63]]",0.0,0.0,1.0


In [67]:
results_df_test

Unnamed: 0,loss,acc,prec_covid,prec_pneumonia,prec_normal,recall_covid,recall_pneumonia,recall_normal,spec_covid,spec_pneumonia,...,cm_Infiltration,cm_Mass,cm_Nodule,cm_Pneumothorax,cm_Consolidation,cm_Edema,cm_Emphysema,cm_Fibrosis,cm_Pleural_Thickening,cm_Hernia


In [66]:
results_df_test.sort_values(['acc', 'recall_covid', 'prec_covid'],
                            ascending=False).dropna(axis=1, how='all')

In [36]:
print_columns = [
    'acc',
    'prec_covid','prec_pneumonia','prec_normal',
    'recall_covid','recall_pneumonia', 'recall_normal',
    'spec_covid','spec_pneumonia', 'spec_normal'
#     'acc_Pneumonia', 'prec_Pneumonia', 'recall_Pneumonia'
]
# print_columns = [c for c in results_df_test.columns if c != 'loss' and 'cm' not in c]

In [37]:
print((results_df_test*100).to_latex(columns=print_columns,
                               float_format='%.2f',
                              ))

\begin{tabular}{lrrrrrrrrrr}
\toprule
{} &   acc &  prec\_covid &  prec\_pneumonia &  prec\_normal &  recall\_covid &  recall\_pneumonia &  recall\_normal &  spec\_covid &  spec\_pneumonia &  spec\_normal \\
\midrule
0717\_101812\_covid-x\_densenet-121\_lr1e-06\_os-max... & 93.73 &       93.94 &           91.12 &        95.53 &         93.00 &             93.27 &          94.12 &       99.59 &           94.52 &        94.38 \\
\bottomrule
\end{tabular}



### Report-generation: results at different report lengths

In [32]:
vals_words = [20, 25, 27, 33, 44, None]
vals_sents = [3, 4, 5, 6, None]

In [40]:
max_words = vals_words[0]
suffix = f'max-words-{max_words}' if max_words else ''
all_results = load_results(suffix)
results_df_test = create_results_df(all_results, 'test')
results_df_test

Unnamed: 0,loss,word_acc,bleu1,bleu2,bleu3,bleu4,bleu,rougeL,ciderD
0717_041434_lstm_lr0.0001_densenet-121,5.579509,0.149129,0.478934,0.325275,0.256634,0.2183,0.319786,0.395486,1.354237
0716_211601_lstm-att_lr0.0001_densenet-121,7.382415,0.068293,0.442359,0.28249,0.199632,0.150046,0.268632,0.358223,0.775244
0717_015057_h-lstm_lr0.0001_densenet-121,4.548621,0.162518,0.328184,0.19551,0.12641,0.082401,0.183126,0.334782,0.256378
0716_234501_h-lstm-att_lr0.0001_densenet-121,5.157984,0.132504,0.298223,0.17906,0.115951,0.075574,0.167202,0.315891,0.101786
