# Imports

In [None]:
%env CUDA_VISIBLE_DEVICES=1

In [None]:
import numpy as np
import torch

# Debug medical-labeler

With both torch and numpy arrays

In [None]:
class NPLabeler:
    use_numpy = True
    def __call__(self, reports):
        return np.zeros((len(reports), 14))
    
class TorchLabeler:
    use_numpy = False
    def __call__(self, reports):
        return torch.zeros(len(reports), 14)

In [None]:
TorchLabeler().__class__.__name__

In [None]:
reports_gt = [
    [1, 2, 3, 4],
    [1, 2, 3, 5, 6, 7, 4],
]
reports_gen = [
    [1, 2, 3, 4, 6],
    [1, 2],
]

In [None]:
%run ../metrics/report_generation/labeler_correctness/metric.py

In [None]:
l = MedicalLabelerCorrectness(NPLabeler(), device='cuda')
l.reset()

In [None]:
l.update((reports_gen, reports_gt))

In [None]:
res = l.compute()
res

In [None]:
res['acc'].mean().item()

In [None]:
res['acc'][3].item()

# Debug Lighter labeler

## Utils

In [None]:
%run ../datasets/vocab/__init__.py
%run ../utils/nlp.py

In [None]:
from medai.datasets.iu_xray import DATASET_DIR as IU_DIR
import pandas as pd

In [None]:
vocab = load_vocab('iu_xray')
reader = ReportReader(vocab)
len(vocab)

In [None]:
fpath = os.path.join(IU_DIR, 'reports', 'sentences_with_chexpert_labels.csv')
df = pd.read_csv(fpath)
df.head(2)

In [None]:
sentences = df['sentence']
len(sentences)

## Run lighter labeler

In [None]:
%run ../metrics/report_generation/labeler_correctness/lighter_labeler/__init__.py

### One sample

In [None]:
l = ChexpertLighterLabeler(vocab)

In [None]:
# reports = [
#     'heart is enlarged .',
#     'heart is not enlarged .',
#     'heart is upper limit',
# ]
sample_sentences = sentences[-50:-45]
reports_as_one = list([' '.join(sample_sentences)])
reports_as_many = list(sample_sentences)
for r in reports_as_many:
    print(r)
reports_as_one = [reader.text_to_idx(r) for r in reports_as_one]
reports_as_many = [reader.text_to_idx(r) for r in reports_as_many]

In [None]:
l(reports_as_one)

In [None]:
l(reports_as_many)

In [None]:
report = reader.text_to_idx('no heart size is normal large , lungs are pneumothorax')
res = l([report]).tolist()[0]
list(zip(res, l.diseases))

### All reports/sentences

In [None]:
FULL_REPORTS = False

In [None]:
if FULL_REPORTS:
    name = 'reports_with_chexpert_labels.csv'
    TARGET_COL = 'Reports'
else:
    name = 'sentences_with_chexpert_labels.csv'
    TARGET_COL = 'sentence'
fpath = os.path.join(IU_DIR, 'reports', name)
df.replace(-2, 0, inplace=True)
df = pd.read_csv(fpath, index_col=0 if FULL_REPORTS else None)
df.head(2)

In [None]:
texts = [reader.text_to_idx(s) for s in df[TARGET_COL]]
len(texts)

In [None]:
%run ../metrics/report_generation/labeler_correctness/lighter_labeler/__init__.py

In [None]:
l = ChexpertLighterLabeler(vocab)

In [None]:
%%time

labels = l(texts)
labels.size()

In [None]:
cols = [f'lighter-{d}' for d in l.diseases]
df[cols] = labels.cpu().numpy()
df.head(2)

In [None]:
gt_labels = df[l.diseases].to_numpy().astype(np.int8)
gt_labels = torch.tensor(gt_labels, device='cuda')
gt_labels.size()

In [None]:
tp = ((labels == 1) & (gt_labels == 1)).sum(0)
fp = ((labels == 1) & (gt_labels == 0)).sum(0)
tn = ((labels == 0) & (gt_labels == 0)).sum(0)
fn = ((labels == 0) & (gt_labels == 1)).sum(0)
tp.size(), fp.size(), tn.size(), fn.size()

In [None]:
precision = tp / (tp + fp)
recall = tp / (tp + fn)
f1 = 2 * precision * recall / (precision + recall)
precision, recall, f1

In [None]:
l.diseases

In [None]:
disease = l.diseases[5]
print(disease)
lighter_disease = f'lighter-{disease}'
show_cols = [TARGET_COL, 'No Finding', disease, lighter_disease]
d = df
d = d.loc[((d[disease] == 1) & (d[lighter_disease] == 0))]
d = d[show_cols].sort_values(TARGET_COL, key=lambda x: x.str.len())
print(len(d))
d.head(1)

In [None]:
list(d[TARGET_COL].unique())