In [1]:
#-*- coding:utf-8 -*-

import os
import pandas as pd
import json
import numpy as np
from PIL import Image
import cv2
import metric
import pprint
import argparse
import pickle
from sklearn.metrics import accuracy_score,roc_curve,auc
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm
plt.rcParams["font.family"] = "Times New Roman"
plt.rcParams["font.size"] = 10


UINT8_MAX = np.iinfo(np.uint8).max
UINT16_MAX = np.iinfo(np.uint16).max

#from visual import make_overlay_image, make_overlay_image2
#import visual as visual_lib

In [2]:
def convert_dict_to_array(contour):
    arr = [[row["x"], row["y"]] for row in contour]
    arr = np.array(arr)
    return arr

In [3]:
def get_masks(contours, width, height, dtype=np.float32, mask_size=1000):
    assert isinstance(contours, list) or isinstance(contours, tuple)

    images = []
    for contour in contours:
        for conts in contour:
            images.append(get_mask(conts, width, height, dtype, mask_size=mask_size))

    return images

In [None]:
def get_mask(contour, width, height, dtype=np.float32, mask_size=1000):
    if width < height:
        mask_height = mask_size
        mask_width = int(mask_size * width / height)
    else:
        mask_width = mask_size
        mask_height = int(mask_size * height / width)

    image = np.zeros((mask_height, mask_width), np.uint8)

    assert len(contour) > 0

    for subcontour in contour:
        assert len(subcontour) > 0
        pts = np.array(
            [[[(x + width / 2) / width * mask_width, (y + height / 2) / height * mask_height]] for x, y in subcontour],
            np.int32)
        cv2.fillPoly(image, [pts], (255, 255, 255))
    return convert_image_type(image, dtype)

In [None]:
def convert_image_type(image, dtype=np.float32):
    if image.dtype == np.uint8:

        if dtype == np.float32:
            image = image.astype(np.float32)
            image /= UINT8_MAX
            return image
        elif dtype == np.uint8:
            return image
        else:
            raise TypeError('numpy.float32 or numpy.uint8 supported as a target dtype')

    elif image.dtype == np.uint16:

        if dtype == np.float32:
            image = image.astype(np.float32)
            image /= UINT16_MAX
            return image
        elif dtype == np.uint8:
            image = image.astype(np.float32)
            image *= UINT8_MAX / UINT16_MAX
            image = image.astype(np.uint8)
            return image
        elif dtype == np.uint16:
            return image
        else:
            raise TypeError('numpy.float32 or numpy.uint8 or numpy.uint16 supported as a target dtype')

    else:
        raise TypeError('numpy.uint8 or numpy.uint16 supported as an input dtype')

In [None]:
def get_human_output(pixel_array, human_data):
    _dict = eval(human_data['contour_list'])
    _ratings = eval(human_data['rating_list'])

    height, width = pixel_array.shape

    if _ratings:
        masks = []
        for _key in _dict.keys():
            _contour = _dict[_key]

            for _rating in _ratings:
                if _rating['contourId']  == _key:
                    lesion_rating = _rating['rating']

            arr = convert_dict_to_array(_contour)

            arr[..., :, 0] = arr[:, 0] * width - width / 2
            arr[..., :, 1] = arr[:, 1] * height - height / 2

            arr = arr.astype(np.int64)

            arr = np.expand_dims(arr, 0)
            arr = np.expand_dims(arr, 0)

            mask = sum(get_masks([arr], width=width, height=height, mask_size=max(width, height)))
            mask = mask * (float(lesion_rating)/5)
            masks.append(mask)

        final_mask = np.stack(masks, axis=0).max(0)

    else:
        final_mask = np.zeros((height, width), dtype=np.float32)

    return final_mask

In [7]:
def get_gt_final_mask(data):
    if 'abnormal_finding' in data.keys() and data['abnormal_finding']:
        masks = []
        for _dict in data['abnormal_finding']:
#             if _dict['label_text'] in mca_list:
            if _dict['label_text']:
                for contour_key in _dict['contour_list'].keys():
                    contour = _dict['contour_list'][contour_key]
                    arr = convert_dict_to_array(contour)
                    # print(width, height)
                    # print(arr)

                    # print(width, height)
                    # print(arr)

                    arr[..., :, 0] = arr[:, 0]
                    arr[..., :, 1] = arr[:, 1]
                    arr = arr.astype(np.int64)

                    arr = np.expand_dims(arr, 0)
                    arr = np.expand_dims(arr, 0)

                    mask = sum(get_masks([arr], width=width, height=height, mask_size=max(width, height)))
                    masks.append(mask)
            else:
                mask = np.zeros((height, width), dtype=np.float32)
                masks.append(mask)

        final_mask = np.stack(masks, axis=0).max(0)
    else:
        final_mask = np.zeros((height, width), dtype=np.float32)
        
    return final_mask

In [8]:
mapping_df = pd.read_csv('./data/brmh_1_mapping_table_respiratory.csv')
mapping_cases = mapping_df['case_no'].tolist()

interest_list = []
for i in range(2):
    interest_list.append('u{}_u{}'.format(str(i+2),str(i+14)))

#interest_list = ['u2/t2']
print(interest_list)

with open('jafroc_respiratory(opt_resp).txt', 'w') as csvfile:
    for interest_dir in interest_list:
#         print(interest_dir)
        gt_masks = []
        human_masks = []
        for index, file_name in enumerate(mapping_cases):
            if file_name.split('-')[0] == 'B':
                hospital_name = 'brmh'
            elif file_name.split('-')[0] == 'K':
                hospital_name = 'kyuh'
            elif file_name.split('-')[0] == 'G':
                hospital_name = 'gugh'
            else:
                raise ValueError('invalid hospital name')

            json_root_path = 'D:/lunit/data/review_result_20200705/{}-A1/{}/respiratory'.format(hospital_name.upper(), hospital_name)
            json_file = os.path.join(json_root_path, (file_name+'.dcm.json'))

#             heatmap_root_path = '/storage2/ctr/original/cxr/external_validation/BRMH-GIL-KONYANG/{}/respiratory'.format(hospital_name)

            with open(json_file, "r") as f:
                data = json.load(f)

#             handler = dicom_handler.get_handler(os.path.join(heatmap_root_path, (file_name+'.dcm')), modality='CXR')
#             pixel_array = handler.pixels
#             pixel_array = (pixel_array * 255).astype(np.uint8)
            height, width = data['height'], data['width']
            pixel_array = np.zeros((height,width))

#             mca_list = ['Nodule / Mass', 'Consolidation', 'Pneumothorax']
#             mca_list = ['Nodule / Mass']
            mca_list = ['Consolidation']
#             mca_list = ['Pneumothorax']
            gt_masks.append(get_gt_final_mask(data))

            human_root_path = 'D:/lunit/data/cxr_opt_respiratory'
            human_json_name = str(mapping_df['seq'].tolist()[index]) + '.json'
            human_json_full = os.path.join(human_root_path, interest_dir, 'without_AI', human_json_name)
            with open(human_json_full, "r") as f:
                human_data = json.load(f)

            human_masks.append(get_human_output(pixel_array, human_data))

        new_shape = (512, 512)
        human_outputs = [cv2.resize(np.asarray(human_mask), new_shape, interpolation=cv2.INTER_NEAREST) for human_mask in human_masks]

        gt_masks = [gt_mask.astype(bool) for gt_mask in gt_masks]


        resized_human_outputs = []
        for index, human_output in enumerate(human_outputs):
            resized_human_outputs.append(np.resize(human_output, gt_masks[index].shape))

        jafroc_value = metric.jafroc(resized_human_outputs, gt_masks)
        print("jafroc:\t{:.3f}".format(jafroc_value), file=csvfile)
        print("jafroc:\t{:.3f}".format(jafroc_value))

        jaf_ci = metric.bootstrap_jafroc_ci(resized_human_outputs, gt_masks, n_bootstraps=100, alpha=0.05, rng_seed=123)
        print(jaf_ci, file=csvfile)
        print(jaf_ci)
              
csvfile.close()

['u2_u14']
u2_u14
92 138
[0.6, 0.0, 0.0, 0.8, 0.0, 0.8, 0.4, 0.6, 0.8, 0.8, 0.4, 0.0, 0.8, 0.6, 0.6, 0.4, 0.0, 0.0, 0.8, 0.0, 0.2, 0.0, 0.6, 0.8, 0.0, 0.6, 0.0, 0.0, 0.0, 0.4, 0.6, 0.0, 0.6, 0.6, 0.4, 0.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.4, 0.0, 0.0, 0.4, 0.8, 0.0, 0.0, 0.0, 0.0, 0.2, 0.0, 0.0, 0.4, 0.0, 0.6, 0.2, 0.6, 0.0, 0.4, 0.6, 0.0, 0.4, 0.0, 0.0, 0.8, 0.6, 0.6, 0.4, 0.0, 0.0, 0.0, 0.0, 0.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.6, 0.0, 0.0, 0.2, 0.2, 0.0, 0.6, 0.0, 0.0, 0.0, 0.0] [[0.8, 0.8], [1.0, 1.0, 1.0], [0.8, 0.8], [1.0, 1.0], [0.6], [0.0], [0.8, 0.8], [1.0, 1.0, 1.0, 1.0], [1.0, 1.0], [1.0], [1.0, 1.0], [0.8], [1.0], [1.0], [0.6], [0.8], [0.8, 0.8], [0.8], [1.0], [1.0, 1.0], [1.0, 1.0], [1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [0.8, 0.8], [0.8], [0.6], [1.0], [1.0, 1.0], [1.0], [0.8, 0.8], [1.0, 1.0, 1.0, 1.0], [0.6], [1.0], [0.8], [1.0], [0.8, 0.8], [1.0], [0.4], [0.8, 0.8], [0.6], [1.0, 1.0], [1.0], [0.8], [1.0], [1.0, 1.0], [0.0], [0.8], [1.0, 1.0], [1.0, 1.0, 

In [9]:
mapping_df = pd.read_csv('./data/brmh_1_mapping_table_respiratory.csv')
mapping_cases = mapping_df['case_no'].tolist()

interest_list = []
for i in range(12):
    interest_list.append('u{}_u{}'.format(str(i+2),str(i+14)))

#interest_list = ['u2/t2']
print(interest_list)

with open('jafroc_respiratory(opt_resp).txt', 'w') as csvfile:
    for interest_dir in interest_list:
        print(interest_dir)
        gt_masks = []
        human_masks = []
        for index, file_name in enumerate(mapping_cases):
            if file_name.split('-')[0] == 'B':
                hospital_name = 'brmh'
            elif file_name.split('-')[0] == 'K':
                hospital_name = 'kyuh'
            elif file_name.split('-')[0] == 'G':
                hospital_name = 'gugh'
            else:
                raise ValueError('invalid hospital name')

            json_root_path = 'D:/lunit/data/review_result_20200705/{}-A1/{}/respiratory'.format(hospital_name.upper(), hospital_name)
            json_file = os.path.join(json_root_path, (file_name+'.dcm.json'))

#             heatmap_root_path = '/storage2/ctr/original/cxr/external_validation/BRMH-GIL-KONYANG/{}/respiratory'.format(hospital_name)

            with open(json_file, "r") as f:
                data = json.load(f)

#             handler = dicom_handler.get_handler(os.path.join(heatmap_root_path, (file_name+'.dcm')), modality='CXR')
#             pixel_array = handler.pixels
#             pixel_array = (pixel_array * 255).astype(np.uint8)
            height, width = data['height'], data['width']
            pixel_array = np.zeros((height,width))

#             mca_list = ['Nodule / Mass', 'Consolidation', 'Pneumothorax']
#             mca_list = ['Nodule / Mass']
            mca_list = ['Consolidation']
#             mca_list = ['Pneumothorax']
            gt_masks.append(get_gt_final_mask(data))

            human_root_path = 'D:/lunit/data/cxr_opt_respiratory'
            human_json_name = str(mapping_df['seq'].tolist()[index]) + '.json'
            human_json_full = os.path.join(human_root_path, interest_dir, 'with_AI', human_json_name)
            with open(human_json_full, "r") as f:
                human_data = json.load(f)

            human_masks.append(get_human_output(pixel_array, human_data))

        new_shape = (512, 512)
        human_outputs = [cv2.resize(np.asarray(human_mask), new_shape, interpolation=cv2.INTER_NEAREST) for human_mask in human_masks]

        gt_masks = [gt_mask.astype(bool) for gt_mask in gt_masks]


        resized_human_outputs = []
        for index, human_output in enumerate(human_outputs):
            resized_human_outputs.append(np.resize(human_output, gt_masks[index].shape))

        jafroc_value = metric.jafroc(resized_human_outputs, gt_masks)
        print("jafroc:\t{:.3f}".format(jafroc_value), file=csvfile)
        print("jafroc:\t{:.3f}".format(jafroc_value))

        jaf_ci = metric.bootstrap_jafroc_ci(resized_human_outputs, gt_masks, n_bootstraps=100, alpha=0.05, rng_seed=123)
        print(jaf_ci, file=csvfile)
        print(jaf_ci)
              
csvfile.close()

['u2_u14', 'u3_u15', 'u4_u16', 'u5_u17', 'u6_u18', 'u7_u19', 'u8_u20', 'u9_u21', 'u10_u22', 'u11_u23', 'u12_u24', 'u13_u25']
u2_u14
jafroc:	0.905
(0.873, 0.937)
u3_u15
jafroc:	0.909
(0.874, 0.949)
u4_u16
jafroc:	0.856
(0.818, 0.91)
u5_u17
jafroc:	0.878
(0.831, 0.922)
u6_u18
jafroc:	0.886
(0.845, 0.921)
u7_u19
jafroc:	0.875
(0.828, 0.921)
u8_u20
jafroc:	0.914
(0.875, 0.951)
u9_u21
jafroc:	0.857
(0.822, 0.894)
u10_u22
jafroc:	0.866
(0.833, 0.905)
u11_u23
jafroc:	0.879
(0.836, 0.922)
u12_u24
jafroc:	0.872
(0.825, 0.919)
u13_u25
jafroc:	0.884
(0.849, 0.924)


In [10]:
mapping_df = pd.read_csv('./data/brmh_2_mapping_table_healthcheck.csv')
mapping_cases = mapping_df['case_no'].tolist()

interest_list = []
for i in range(9):
    interest_list.append('u{}_u{}'.format(str(i+2),str(i+11)))

#interest_list = ['u2/t2']
print(interest_list)

with open('jafroc_respiratory(opt_health).txt', 'w') as csvfile:
    for interest_dir in interest_list:
        print(interest_dir)
        gt_masks = []
        human_masks = []
        for index, file_name in enumerate(mapping_cases):
            if file_name.split('-')[0] == 'B':
                hospital_name = 'brmh'
            elif file_name.split('-')[0] == 'K':
                hospital_name = 'kyuh'
            elif file_name.split('-')[0] == 'G':
                hospital_name = 'gugh'
            else:
                raise ValueError('invalid hospital name')

            json_root_path = 'D:/lunit/data/review_result_20200705/{}-A2/{}/healthcheck'.format(hospital_name.upper(), hospital_name)
            json_file = os.path.join(json_root_path, (file_name+'.dcm.json'))

#             heatmap_root_path = '/storage2/ctr/original/cxr/external_validation/BRMH-GIL-KONYANG/{}/respiratory'.format(hospital_name)

            with open(json_file, "r") as f:
                data = json.load(f)

#             handler = dicom_handler.get_handler(os.path.join(heatmap_root_path, (file_name+'.dcm')), modality='CXR')
#             pixel_array = handler.pixels
#             pixel_array = (pixel_array * 255).astype(np.uint8)
            height, width = data['height'], data['width']
            pixel_array = np.zeros((height,width))

#             mca_list = ['Nodule / Mass', 'Consolidation', 'Pneumothorax']
#             mca_list = ['Nodule / Mass']
            mca_list = ['Consolidation']
#             mca_list = ['Pneumothorax']
            gt_masks.append(get_gt_final_mask(data))

            human_root_path = 'D:/lunit/data/cxr_opt_healthcheck'
            human_json_name = str(mapping_df['seq'].tolist()[index]) + '.json'
            human_json_full = os.path.join(human_root_path, interest_dir, 'without_AI', human_json_name)
            with open(human_json_full, "r") as f:
                human_data = json.load(f)

            human_masks.append(get_human_output(pixel_array, human_data))

        new_shape = (512, 512)
        human_outputs = [cv2.resize(np.asarray(human_mask), new_shape, interpolation=cv2.INTER_NEAREST) for human_mask in human_masks]

        gt_masks = [gt_mask.astype(bool) for gt_mask in gt_masks]


        resized_human_outputs = []
        for index, human_output in enumerate(human_outputs):
            resized_human_outputs.append(np.resize(human_output, gt_masks[index].shape))

        jafroc_value = metric.jafroc(resized_human_outputs, gt_masks)
        print("jafroc:\t{:.3f}".format(jafroc_value), file=csvfile)
        print("jafroc:\t{:.3f}".format(jafroc_value))

        jaf_ci = metric.bootstrap_jafroc_ci(resized_human_outputs, gt_masks, n_bootstraps=100, alpha=0.05, rng_seed=123)
        print(jaf_ci, file=csvfile)
        print(jaf_ci)
              
csvfile.close()

['u2_u11', 'u3_u12', 'u4_u13', 'u5_u14', 'u6_u15', 'u7_u16', 'u8_u17', 'u9_u18', 'u10_u19']
u2_u11
jafroc:	0.646
(0.603, 0.737)
u3_u12
jafroc:	0.718
(0.642, 0.793)
u4_u13
jafroc:	0.701
(0.628, 0.8)
u5_u14
jafroc:	0.704
(0.641, 0.778)
u6_u15
jafroc:	0.664
(0.59, 0.735)
u7_u16
jafroc:	0.696
(0.627, 0.758)
u8_u17
jafroc:	0.718
(0.635, 0.797)
u9_u18
jafroc:	0.689
(0.601, 0.752)
u10_u19
jafroc:	0.635
(0.575, 0.694)


In [11]:
mapping_df = pd.read_csv('./data/brmh_2_mapping_table_healthcheck.csv')
mapping_cases = mapping_df['case_no'].tolist()

interest_list = []
for i in range(9):
    interest_list.append('u{}_u{}'.format(str(i+2),str(i+11)))

#interest_list = ['u2/t2']
print(interest_list)

with open('jafroc_respiratory(opt_health).txt', 'w') as csvfile:
    for interest_dir in interest_list:
        print(interest_dir)
        gt_masks = []
        human_masks = []
        for index, file_name in enumerate(mapping_cases):
            if file_name.split('-')[0] == 'B':
                hospital_name = 'brmh'
            elif file_name.split('-')[0] == 'K':
                hospital_name = 'kyuh'
            elif file_name.split('-')[0] == 'G':
                hospital_name = 'gugh'
            else:
                raise ValueError('invalid hospital name')

            json_root_path = 'D:/lunit/data/review_result_20200705/{}-A2/{}/healthcheck'.format(hospital_name.upper(), hospital_name)
            json_file = os.path.join(json_root_path, (file_name+'.dcm.json'))

#             heatmap_root_path = '/storage2/ctr/original/cxr/external_validation/BRMH-GIL-KONYANG/{}/respiratory'.format(hospital_name)

            with open(json_file, "r") as f:
                data = json.load(f)

#             handler = dicom_handler.get_handler(os.path.join(heatmap_root_path, (file_name+'.dcm')), modality='CXR')
#             pixel_array = handler.pixels
#             pixel_array = (pixel_array * 255).astype(np.uint8)
            height, width = data['height'], data['width']
            pixel_array = np.zeros((height,width))

#             mca_list = ['Nodule / Mass', 'Consolidation', 'Pneumothorax']
#             mca_list = ['Nodule / Mass']
#             mca_list = ['Consolidation']
#             mca_list = ['Pneumothorax']
            gt_masks.append(get_gt_final_mask(data))

            human_root_path = 'D:/lunit/data/cxr_opt_healthcheck'
            human_json_name = str(mapping_df['seq'].tolist()[index]) + '.json'
            human_json_full = os.path.join(human_root_path, interest_dir, 'with_AI', human_json_name)
            with open(human_json_full, "r") as f:
                human_data = json.load(f)

            human_masks.append(get_human_output(pixel_array, human_data))

        new_shape = (512, 512)
        human_outputs = [cv2.resize(np.asarray(human_mask), new_shape, interpolation=cv2.INTER_NEAREST) for human_mask in human_masks]

        gt_masks = [gt_mask.astype(bool) for gt_mask in gt_masks]


        resized_human_outputs = []
        for index, human_output in enumerate(human_outputs):
            resized_human_outputs.append(np.resize(human_output, gt_masks[index].shape))

        jafroc_value = metric.jafroc(resized_human_outputs, gt_masks)
        print("jafroc:\t{:.3f}".format(jafroc_value), file=csvfile)
        print("jafroc:\t{:.3f}".format(jafroc_value))

        jaf_ci = metric.bootstrap_jafroc_ci(resized_human_outputs, gt_masks, n_bootstraps=100, alpha=0.05, rng_seed=123)
        print(jaf_ci, file=csvfile)
        print(jaf_ci)
              
csvfile.close()

['u2_u11', 'u3_u12', 'u4_u13', 'u5_u14', 'u6_u15', 'u7_u16', 'u8_u17', 'u9_u18', 'u10_u19']
u2_u11
jafroc:	0.659
(0.595, 0.718)
u3_u12
jafroc:	0.700
(0.602, 0.766)
u4_u13
jafroc:	0.679
(0.587, 0.762)
u5_u14
jafroc:	0.684
(0.609, 0.756)
u6_u15
jafroc:	0.698
(0.614, 0.774)
u7_u16
jafroc:	0.632
(0.571, 0.689)
u8_u17
jafroc:	0.702
(0.626, 0.754)
u9_u18
jafroc:	0.653
(0.565, 0.734)
u10_u19
jafroc:	0.691
(0.593, 0.772)
