In [None]:
from getpass import getpass
import os

import pydicom, numpy as np
import pandas as pd

In [None]:
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
df = pd.read_csv('../input/rsna-pneumonia-detection-challenge/stage_2_train_labels.csv')
print(df.head())

In [None]:
df.hist()

In [None]:
train_dir = '../input/rsna-pneumonia-detection-challenge/stage_2_train_images/'

patientId = df['patientId'][0]
dcm_file = train_dir + '%s.dcm' % patientId
dcm_data = pydicom.read_file(dcm_file)

print(dcm_data)

In [None]:
im = dcm_data.pixel_array
print(type(im))
print(im.dtype)
print(im.shape)

In [None]:
import matplotlib.pyplot as plt
plt.imshow(im, cmap= plt.cm.gist_gray)

In [None]:
def parse_data(df, parent_dir):
    
    # --- Define lambda to extract coords in list [y, x, height, width]
    extract_box = lambda row: [row['y'], row['x'], row['height'], row['width']]
    
    parsed = {}
    for n, row in df.iterrows():
        # --- Initialize patient entry into parsed 
        pid = row['patientId']
        if pid not in parsed:
            parsed[pid] = {
                'dicom': parent_dir + '%s.dcm' % pid,
                'label': row['Target'],
                'boxes': []
            }
    
        # --- Add box if opacity is present
        if parsed[pid]['label'] == 1:
            parsed[pid]['boxes'].append(extract_box(row))
    
    return parsed

In [None]:
parsed = parse_data(df, train_dir)

In [None]:
pd.DataFrame(parsed)

In [None]:
print(parsed['00436515-870c-4b36-a041-de91049b9ab4'])

In [None]:
def draw(data):
    
    # --- Open DICOM file
    d = pydicom.read_file(data['dicom'])
    im = d.pixel_array
    
    # --- Convert from single-channel grayscale to 3-channel RGB
    im = np.stack([im] * 3, axis=2)
    
    # --- Add boxes with random color if present
    for box in data['boxes']:
        rgb = np.floor(np.random.rand(3) * 256).astype('int')
        im = overlay_box(im= im, box= box, rgb= rgb, stroke= 6)
    
    plt.imshow(im, cmap=plt.cm.gist_gray)

In [None]:
def overlay_box(im, box, rgb, stroke=1):
    
    # --- Convert coordinates to integers
    box = [int(b) for b in box]
    
    # --- Extract coordinates
    y1, x1, height, width = box
    y2 = y1 + height
    x2 = x1 + width
    
    im[y1:y1 + stroke, x1:x2] = rgb
    im[y2:y2 + stroke, x1:x2] = rgb
    im[y1:y2, x1:x1 + stroke] = rgb
    im[y1:y2, x2:x2 + stroke] = rgb
    
    return im

In [None]:
df[ df['patientId'] == '00436515-870c-4b36-a041-de91049b9ab4' ]

In [None]:
draw(parsed['00436515-870c-4b36-a041-de91049b9ab4'])

In [None]:
df[ df['patientId'] == '0004cfab-14fd-4e49-80ba-63a80b6bddd6' ]

In [None]:
draw(parsed['0004cfab-14fd-4e49-80ba-63a80b6bddd6'])

In [None]:
df[ df['patientId'] == '00704310-78a8-4b38-8475-49f4573b2dbb' ]

In [None]:
draw(parsed['00704310-78a8-4b38-8475-49f4573b2dbb'])