## Shared interest data files

In [1]:
import torch
import json

In [8]:
fpath = '~/software/shared-interest/data/examples/data_cxr14.json' # dogs

In [9]:
with open(fpath) as f:
    data = json.load(f)
len(data)

9

In [10]:
data.keys()

dict_keys(['fname', 'image', 'bbox', 'saliency', 'label', 'prediction', 'ground_truth_coverage', 'explanation_coverage', 'iou'])

In [13]:
set(type(a) for a in data['label'].values())

{str}

In [14]:
len(data['label'])

5045

## Debug MedicalLabelerCorrectness

In [6]:
import numpy as np
import torch
from torch.nn.utils.rnn import pad_sequence

In [7]:
%run ../metrics/report_generation/labeler_correctness/cache.py
%run ../metrics/report_generation/labeler_correctness/light_labeler.py
%run ../metrics/report_generation/labeler_correctness/__init__.py

In [8]:
%run ../datasets/vocab/__init__.py

In [9]:
%run ../datasets/iu_xray.py
%run ../utils/nlp.py

In [10]:
dataset = IUXRayDataset('all')
len(dataset)

7426

In [11]:
report_reader = ReportReader(dataset.get_vocab())

In [12]:
chexpert_labeler = ChexpertLightLabeler(dataset.get_vocab())
chexpert_labeler

<__main__.ChexpertLightLabeler at 0x7f21276e4080>

In [15]:
reports = pad_sequence([
    torch.tensor(dataset[idx+2].report)
    for idx in [100, 200, 300, 400]
], batch_first=True)
reports

tensor([[ 32,  11,  50,   4,  52,  10,  13,   4,  17,  28,   4,   0,   0,   0,
           0],
        [ 40,  13,  52,  10,   4, 151,  43, 571,   4,  32,  11,  50,   4,   0,
           0],
        [ 52,  10,  16,  13,   8,  32,  11,  50,   4,  17,  27,  23, 563,   4,
           0],
        [ 52,  10,  13,   8,  32,  11,  50,   4, 300, 727,  63, 135,  68, 312,
           4]])

In [9]:
metric = MedicalLabelerCorrectness(chexpert_labeler)
metric

<__main__.MedicalLabelerCorrectness at 0x7fbc2e576278>

In [18]:
metric.reset()
metric.update((reports, reports))
metric.compute()

{'acc': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'prec': array([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.]),
 'recall': array([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.]),
 'spec': array([0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'npv': array([0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])}

In [11]:
[report_reader.idx_to_text(r) for r in reports]

['xxxx change . no active cardiopulmonary disease . thoracolumbar scoliosis . PAD PAD PAD PAD',
 'chest . no active disease . right shoulder . healing scapular fracture . PAD PAD',
 'the cardiac contours are normal . the lungs are clear . thoracic spondylosis . PAD',
 'heart size is normal and the lungs are clear . no nodules or masses .']