# Imports

In [None]:
import torch
import matplotlib.pyplot as plt
import pandas as pd
import os

In [None]:
import matplotlib
matplotlib.rcParams['figure.facecolor'] = 'white'

In [None]:
pd.options.display.max_columns = None

In [None]:
from medai.datasets.iu_xray import DATASET_DIR as IU_DIR
from medai.datasets.mimic_cxr import DATASET_DIR as MIMIC_DIR

# Load utils

In [None]:
USE_DATASET = 'iu' # 'mimic_cxr' # 'iu'
dataset_dir = MIMIC_DIR if 'mimic' in USE_DATASET else IU_DIR

## Load vocab and stuff

In [None]:
%run ../datasets/common/constants.py
%run ../datasets/vocab/__init__.py
%run ../utils/nlp.py

In [None]:
VOCAB = load_vocab(os.path.join(dataset_dir, 'reports'), 'v4')
REPORT_READER = ReportReader(VOCAB)
len(VOCAB)

## Load holistic chexpert

In [None]:
fpath = os.path.join(dataset_dir, 'reports', 'reports_with_chexpert_labels.csv')
df = pd.read_csv(fpath, index_col=0)
df.replace(-1, 1, inplace=True)
df.replace(-2, 0, inplace=True)
df.head(3)

In [None]:
REPORTS_LIST = [
    REPORT_READER.text_to_idx(report)
    for report in df['Reports']
]
len(REPORTS_LIST)

In [None]:
def add_suffix(col):
    if col in CHEXPERT_LABELS:
        return f'{col}-gt'
    return col
df.rename(
    columns=add_suffix,
    inplace=True,
)
df.head()

In [None]:
%run ../metrics/report_generation/chexpert.py
%run -n ../eval_report_generation_chexpert_labeler.py

In [None]:
def _compute_metrics_vs_holistic(labels):
    columns = labels_with_suffix('gen')
    
    assert len(labels) == len(df)
    
    if isinstance(labels, torch.Tensor):
        labels = labels.cpu().numpy()
    full_df = pd.concat([
        df,
        pd.DataFrame(labels, index=df.index, columns=columns),
    ], axis=1)
    
    assert len(full_df) == len(labels)
    
    return full_df, _calculate_metrics(full_df)

# Compare light vs holistic

## Calculate light-labeler chexpert

In [None]:
%run ../metrics/report_generation/labeler_correctness/light_labeler.py

In [None]:
labeler = ChexpertLightLabeler(VOCAB)

In [None]:
%%time

labels = labeler(REPORTS_LIST)
labels.shape

In [None]:
labels[labels == -2] = 0
labels[labels == -1] = 1
labels

In [None]:
acc, precision, recall, f1, roc_auc, pr_auc = _compute_metrics_vs_holistic(labels)
acc, precision, recall, f1, roc_auc, pr_auc

## Calculate with full-labeler

In [None]:
%run ../metrics/report_generation/labeler_correctness/full_labeler.py
%run ../utils/nlp.py

In [None]:
labeler = ChexpertFullLabeler(VOCAB)
labeler

In [None]:
%%time

labels = labeler(REPORTS_LIST)
labels.shape

In [None]:
labels[labels == -2] = 0
labels[labels == -1] = 1
labels

In [None]:
acc, precision, recall, f1, roc_auc, pr_auc = _compute_metrics_vs_holistic(labels)
acc, precision, recall, f1, roc_auc, pr_auc

# Compare lighter vs holistic

In [None]:
%run ../metrics/report_generation/abn_match/chexpert.py

In [None]:
labeler = ChexpertLighterLabeler(VOCAB, device='cpu')

In [None]:
%%time

labels = labeler(REPORTS_LIST)
labels.size()

In [None]:
if labels.size(1) == 13:
    nf_column = torch.zeros(labels.size(0), device=labels.device).unsqueeze(-1)
    labels = torch.cat((nf_column, labels), dim=1)
labels.size()

In [None]:
full_df, (acc, precision, recall, f1, roc_auc, pr_auc) = _compute_metrics_vs_holistic(labels)
len(acc)

In [None]:
precision[1:], precision[1:].mean(), recall[1:], recall[1:].mean(), f1[1:], f1[1:].mean()

In [None]:
labeler.diseases

In [None]:
target = labeler.diseases[0]
colgt = f'{target}-gt'
colgen = f'{target}-gen'
d = full_df
d = d[((d[colgt] == 1) & (d[colgen] == 0))]
d = d[['Reports', colgt, colgen]]
print(len(d))
d.head(2)

In [None]:
l = list(d['Reports'])
l[:10]

# Debug Lighter labeler

In [None]:
%run ../metrics/report_generation/abn_match/chexpert.py

In [None]:
l = ChexpertLighterLabeler(vocab)
l

In [None]:
# reports = [
#     'heart is enlarged .',
#     'heart is not enlarged .',
#     'heart is upper limit',
# ]
sample_sentences = sentences[-50:-45]
reports_as_one = list([' '.join(sample_sentences)])
reports_as_many = list(sample_sentences)
for r in reports_as_many:
    print(r)
reports_as_one = [reader.text_to_idx(r) for r in reports_as_one]
reports_as_many = [reader.text_to_idx(r) for r in reports_as_many]

In [None]:
l(reports_as_one)

In [None]:
l(reports_as_many)

In [None]:
report = reader.text_to_idx('heart size is large , no pneumothorax')
res = l([report]).tolist()[0]
list(zip(res, l.diseases))

## All reports/sentences

TODO: merge with the above code??

In [None]:
FULL_REPORTS = False

In [None]:
if FULL_REPORTS:
    name = 'reports_with_chexpert_labels.csv'
    TARGET_COL = 'Reports'
else:
    name = 'sentences_with_chexpert_labels.csv'
    TARGET_COL = 'sentence'
fpath = os.path.join(IU_DIR, 'reports', name)
df.replace(-2, 0, inplace=True)
df = pd.read_csv(fpath, index_col=0 if FULL_REPORTS else None)
df.head(2)

In [None]:
texts = [reader.text_to_idx(s) for s in df[TARGET_COL]]
len(texts)

In [None]:
%run ../metrics/report_generation/labeler_correctness/lighter_labeler/__init__.py

In [None]:
l = ChexpertLighterLabeler(vocab, device='cpu')

In [None]:
%%time

labels = l(texts)
labels.size()

In [None]:
cols = [f'lighter-{d}' for d in l.diseases]
df[cols] = labels.cpu().numpy()
df.head(2)

In [None]:
gt_labels = df[l.diseases].to_numpy().astype(np.int8)
gt_labels = torch.tensor(gt_labels, device='cpu')
gt_labels.size()

In [None]:
tp = ((labels == 1) & (gt_labels == 1)).sum(0)
fp = ((labels == 1) & (gt_labels == 0)).sum(0)
tn = ((labels == 0) & (gt_labels == 0)).sum(0)
fn = ((labels == 0) & (gt_labels == 1)).sum(0)
tp.size(), fp.size(), tn.size(), fn.size()

In [None]:
precision = tp / (tp + fp)
recall = tp / (tp + fn)
f1 = 2 * precision * recall / (precision + recall)
precision, recall, f1

In [None]:
l.diseases

In [None]:
l.diseases

In [None]:
disease = l.diseases[0]
print(disease)
lighter_disease = f'lighter-{disease}'
show_cols = [TARGET_COL, 'No Finding', disease, lighter_disease]
d = df
d = d.loc[((d[disease] == 1) & (d[lighter_disease] == 0))]
d = d[show_cols].sort_values(TARGET_COL, key=lambda x: x.str.len())
print(len(d))
d.head(1)

In [None]:
list(d[TARGET_COL].unique())

# Debug medical-labeler

With both torch and numpy arrays

In [None]:
class NPLabeler:
    use_numpy = True
    def __call__(self, reports):
        return np.zeros((len(reports), 14))
    
class TorchLabeler:
    use_numpy = False
    def __call__(self, reports):
        return torch.zeros(len(reports), 14)

In [None]:
TorchLabeler().__class__.__name__

In [None]:
reports_gt = [
    [1, 2, 3, 4],
    [1, 2, 3, 5, 6, 7, 4],
]
reports_gen = [
    [1, 2, 3, 4, 6],
    [1, 2],
]

In [None]:
%run ../metrics/report_generation/labeler_correctness/metric.py

In [None]:
l = MedicalLabelerCorrectness(NPLabeler(), device='cuda')
l.reset()

In [None]:
l.update((reports_gen, reports_gt))

In [None]:
res = l.compute()
res

In [None]:
res['acc'].mean().item()

In [None]:
res['acc'][3].item()