# Chest X-Rays and Bounding Boxes

This notebook shows you how to display a DICOM image and the corresponding bounding boxes using matplotlib.  
It is based on:
- https://www.kaggle.com/raddar/convert-dicom-to-np-array-the-correct-way
- https://www.kaggle.com/trungthanhnguyen0502/eda-vinbigdata-chest-x-ray-abnormalities

In [None]:
from pathlib import Path
from random import randint

import pandas as pd
import numpy as np
import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut
import matplotlib.pyplot as plt
import cv2
from glob import glob

%matplotlib inline

In [None]:
dataset_dir = Path('../input/vinbigdata-chest-xray-abnormalities-detection')
path_train_set = dataset_dir / "train"
dicom_paths = glob(f'{dataset_dir}/train/*.dicom')

In [None]:
def patient_id_to_path(patient_id, root=Path("./")):
    return root / f"{patient_id}.dicom"


def dicom_to_array(path, voi_lut = True, fix_monochrome = True):
    """Convert a DICOM chest xray to np.array.
    
    Raw dicom data is not actually linearly convertable to png/jpg. 
    In fact, most of DICOM's store pixel values in exponential scale.
    This function applies the necessary transformations.
    
    
    Source
    ------
    Kaggle user: raddar
    https://www.kaggle.com/raddar/convert-dicom-to-np-array-the-correct-way
    """
    dicom = pydicom.read_file(path)
    
    # VOI LUT (if available by DICOM device) is used to transform raw DICOM data to "human-friendly" view
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array
               
    # depending on this value, X-ray may look inverted - fix that:
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
        
    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
        
    return data


def plot_img(img, size=(7, 7), is_rgb=True, title="", cmap='gray'):
    plt.figure(figsize=size)
    plt.axis('off')
    plt.imshow(img, cmap=cmap)
    plt.suptitle(title)
    plt.show()


def plot_imgs(imgs, cols=4, size=7, is_rgb=True, title="", cmap='gray', img_size=(500,500)):
    rows = len(imgs)//cols + 1
    fig = plt.figure(figsize=(cols*size, rows*size))
    for i, img in enumerate(imgs):
        if img_size is not None:
            img = cv2.resize(img, img_size)
        fig.add_subplot(rows, cols, i+1)
        plt.imshow(img, cmap=cmap)
        plt.axis('off')
    plt.suptitle(title)
    plt.show()
    

In [None]:
imgs = [dicom_to_array(path) for path in dicom_paths[:4]]
plot_imgs(imgs)

In [None]:
df_train = pd.read_csv(f'{dataset_dir}/train.csv')
df_finding = df_train[df_train['class_name'] != 'No finding']

In [None]:
label2color = {class_id:[randint(0,255) for i in range(3)] for class_id in df_train.class_id.unique()}

In [None]:
def draw_bboxes(img, boxes, thickness=10, colors=None, img_size=(500,500)):
    img_copy = img.copy()
    
    if len(img_copy.shape) == 2:
        img_copy = np.stack([img_copy, img_copy, img_copy], axis=-1)
    
    if colors is None:
        colors = [(255, 0, 0)] * len(boxes)
    else:
        assert len(colors) == len(boxes)
    
    for color, box in zip(colors, boxes):
        img_copy = cv2.rectangle(
            img_copy,
            (int(box[0]), int(box[1])),
            (int(box[2]), int(box[3])),
            color, thickness)
        
    if img_size is not None:
        img_copy = cv2.resize(img_copy, img_size)
        
    return img_copy

In [None]:
patient_id = df_finding.sample(1).image_id.iloc[0]
boxes = df_finding.loc[df_finding.image_id == patient_id, ['class_id', 'x_min', 'y_min', 'x_max', 'y_max']].values
img = dicom_to_array(patient_id_to_path(patient_id, root=path_train_set))
img_bboxes_1 = draw_bboxes(img, boxes[:,1:], colors=[label2color[label] for label in boxes[:,0]])
plot_img(img_bboxes_1)