## Import

In [None]:
%env CUDA_VISIBLE_DEVICES=0

In [None]:
import torch

In [None]:
import pandas as pd
pd.options.display.max_columns = None

In [None]:
%run ../utils/__init__.py
config_logging(logging.INFO)

In [None]:
# DEVICE = torch.device('cpu')
DEVICE = torch.device('cuda')
DEVICE

## Evaluate models in subsets

TODO: move this to script???

In [None]:
%run -n train_report_generation.py
%run datasets/__init__.py
%run models/checkpoint/__init__.py
%run training/report_generation/flat.py
%run training/report_generation/hierarchical.py
%run models/report_generation/__init__.py

In [None]:
def eval_in_subset(run_name, compiled_model, debug=True, max_n_words=None, max_n_sentences=None,
                   device='cuda'):
    # Create datasets
    vocab = compiled_model.metadata['vocab']
    train_dataset = IUXRayDataset('train', vocab=vocab)
    val_dataset = IUXRayDataset('val', vocab=vocab)
    test_dataset = IUXRayDataset('test', vocab=vocab)
    
    # Prepare subsets
    subset_kwargs = {
        'max_n_words': max_n_words,
        'max_n_sentences': max_n_sentences,
    }
    
    train_subset = create_report_dataset_subset(train_dataset, **subset_kwargs)
    val_subset = create_report_dataset_subset(val_dataset, **subset_kwargs)
    test_subset = create_report_dataset_subset(test_dataset, **subset_kwargs)
    
    # Decide hierachical
    decoder_name = compiled_model.metadata['decoder_kwargs']['decoder_name']
    hierarchical = is_decoder_hierarchical(decoder_name)
    if hierarchical:
        create_dataloader = create_hierarchical_dataloader
    else:
        create_dataloader = create_flat_dataloader

    # Create dataloaders
    BS = 50
    train_dataloader = create_dataloader(train_subset, batch_size=BS)
    val_dataloader = create_dataloader(val_subset, batch_size=BS)
    test_dataloader = create_dataloader(test_subset, batch_size=BS)
    
    # Create a suffix
    if max_n_words:
        suffix = f'max-words-{max_n_words}'
    elif max_n_sentences:
        suffix = f'max-sentences-{max_n_sentences}'
        
    evaluate_and_save(run_name,
                      compiled_model.model,
                      train_dataloader,
                      val_dataloader,
                      test_dataloader,
                      hierarchical=hierarchical,
                      debug=debug,
                      device=device,
                      suffix=suffix,
                     )

In [None]:
eval_n_words = [
    20 , # --> 15%
    25 , # --> 26%
    27 , # --> 33%
    33 , # --> 50%
#     39 , # --> 66%
#     41 , # --> 70%
    44 , # --> 75%
#     47 , # --> 80%
#     58 , # --> 90%
    # None, # --> 100%
]

In [None]:
eval_n_sentences = [
#     1, # 1.2324835387472564
#     2, # 4.761100793516799
    3, # 25.730204288367382
    4, # 55.10720918453487
    5, # 76.66722944453824
    6, # 89.39726489954415
#     7, # 95.03629917271653
#     8, # 97.6194496032416
#     9, # 98.86881647813608
#     10, # 99.42596657099443
#     11, # 99.71298328549722
#     12, # 99.89869998311667
#     13, # 99.96623332770555
#     17, # 99.98311666385278
#     18, # 100
]

In [None]:
run_names = [
#     '0717_041434_lstm_lr0.0001_densenet-121',
    '0716_211601_lstm-att_lr0.0001_densenet-121', # faltan 33 y 34
#     '0717_015057_h-lstm_lr0.0001_densenet-121',
#     '0716_234501_h-lstm-att_lr0.0001_densenet-121',
]
debug = False

In [None]:
for run_name in run_names:
    compiled_model = load_compiled_model_report_generation(run_name,
                                                           debug=debug,
                                                           multiple_gpu=True,
                                                           device=DEVICE)
    for n_words in tqdm(eval_n_words):
        eval_in_subset(run_name,
                       compiled_model,
                       max_n_words=n_words,
                       max_n_sentences=None,
                       debug=debug,
                       device=DEVICE,
                      )
    for n_sentences in tqdm(eval_n_sentences):
        eval_in_subset(run_name,
                       compiled_model,
                       max_n_words=None,
                       max_n_sentences=n_sentences,
                       debug=debug,
                       device=DEVICE,
                      )

## Debug chexpert-labeler

In [None]:
%run ../utils/files.py
%run ../metrics/__init__.py
%run ../metrics/report_generation/chexpert.py
# %run -n ../eval_report_generation_chexpert_labeler.py

In [None]:
run_id = RunId('0428_133057', True, 'rg')
run_id.full_name

In [None]:
df = load_rg_outputs(run_id, free=False)
print(len(df))
df.head()

In [None]:
gt_with_labels = _load_all_gt_labels('mimic-cxr')
gt_with_labels.head()

In [None]:
print(len(gt_with_labels))

In [None]:
gt_with_labels.drop('filename', axis=1, inplace=True)
gt_with_labels = gt_with_labels.groupby('Reports').first()
print(len(gt_with_labels))
gt_with_labels.head()

In [None]:
annotated_from_gt = gt_with_labels.merge(df[['generated']], left_on='Reports', right_on='generated', how='inner')
print(len(annotated_from_gt))
annotated_from_gt.head()

In [None]:
missing_df = df.loc[~df['generated'].isin(set(annotated_from_gt['generated']))]
print(len(missing_df))
missing_df.head()

In [None]:
unique_missing_reports = missing_df['generated'].unique()
print(len(unique_missing_reports))
unique_missing_reports.shape

In [None]:
df_unique = pd.DataFrame(unique_missing_reports, columns=['gen-unique'])
print(len(df_unique))
df_unique.head()

In [None]:
gen = _apply_labeler_to_column_in_batches(
    df_unique, 'gen-unique', n_batches=3, fill_empty=0, fill_uncertain=1,
    caller_id='eval-notebook',
)
gen.shape

In [None]:
df_unique_2 = _concat_df_matrix(df_unique, gen, 'gen')
print(len(df_unique_2))
df_unique_2.head()

In [None]:
df.head()

In [None]:
df = df.merge(df_unique_2, how='inner', left_on='generated', right_on='gen-unique')

In [None]:
%%time

df = apply_labeler_to_df(df,
                         batches=3,
                         caller_id='eval-notebook',
                         dataset_name='mimic-cxr',
                        )
df.head()

### Debug `batches` implementation

In [None]:
%%time

df_batches = apply_labeler_to_df(df,
                         batches=3,
                         caller_id='eval-notebook',
                         dataset_name='mimic-cxr',
                        )
df_batches.head()

In [None]:
df_whole = apply_labeler_to_df(df,
                         batches=1,
                         caller_id='eval-notebook',
                         dataset_name='mimic-cxr',
                        )
df_whole.head()

In [None]:
cols = [c for c in df_whole.columns if c.endswith('-gt') or c.endswith('-gen')]
len(cols)

In [None]:
arr_batches = df_batches[cols].to_numpy()
arr_whole = df_whole[cols].to_numpy()
arr_batches.shape

In [None]:
assert arr_batches.shape == arr_whole.shape
assert (arr_batches == arr_whole).all()