In [None]:
! conda install -c conda-forge gdcm -y

In [None]:
import os
import pydicom
import glob
import cv2
import plotly
import ast

import numpy as np 
import pandas as pd 
import matplotlib.patches as patches
import plotly.graph_objects as go
import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt

from tqdm.notebook import tqdm
from pydicom.pixel_data_handlers.util import apply_voi_lut
from skimage import exposure
from matplotlib.colors import ListedColormap
from collections import Counter
from pathlib import Path
from fastai.vision.all import *
from fastai.medical.imaging import *

In [None]:
dataset_path = Path('../input/siim-covid19-detection')
train_study_df = pd.read_csv(dataset_path/'train_study_level.csv')
train_study_df.head()

In [None]:
FIG_FONT = dict(family="Helvetica, Arial", size=14, color="#7f7f7f")
LABEL_LIST = ['negative', 'typical', 'indeterminate', 'atypical']
LABEL_COLORS = [px.colors.label_rgb(px.colors.convert_to_RGB_255(x)) for x in sns.color_palette("Spectral", 4)]
study_classes = ['Negative for Pneumonia', 'Typical Appearance', 'Indeterminate Appearance', 'Atypical Appearance']

In [None]:
fig = px.bar(x=LABEL_LIST,
             y=train_study_df[study_classes].values.sum(axis=0),
             color=LABEL_LIST, 
             opacity=0.7,
             color_discrete_sequence=LABEL_COLORS,
             labels={"y":"Counts", "x": ""})
fig.update_layout(legend_title=None, font=FIG_FONT, xaxis_title="",
                  yaxis_title="<b>Counts</b>")
fig.show()

In [None]:
train_image_df = pd.read_csv(dataset_path/'train_image_level.csv')
train_image_df.head()

In [None]:
bbox = train_image_df['boxes'].apply(lambda x: len(ast.literal_eval(x)) if x == x else 0)
c = Counter(bbox.values)
num_box = [str(i) for i in range(5)]
counts = [c[i] for i in range(5)]
fig = px.bar(x=num_box, y=counts, color=num_box, opacity=0.7,
             color_discrete_sequence=LABEL_COLORS)
             # labels={"y":"Counts", "x": "Number of boxes"})
fig.update_layout(legend_title=None, font=FIG_FONT, xaxis_title="<b>Number of boxes</b>",
                  yaxis_title="<b>Counts</b>")
fig.show()

In [None]:
def dicom2array(path, voi_lut=True, fix_monochrome=True):
    dicom = pydicom.read_file(path)
    # VOI LUT (if available by DICOM device) is used to
    # transform raw DICOM data to "human-friendly" view
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array
    # depending on this value, X-ray may look inverted - fix that:
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
    return data
        
    
def plot_img(img, size=(7, 7), is_rgb=True, title="", cmap='gray'):
    plt.figure(figsize=size)
    plt.imshow(img, cmap=cmap)
    plt.suptitle(title)
    plt.show()


def plot_imgs(imgs, cols=4, size=7, is_rgb=True, title="", cmap='gray', img_size=(500,500)):
    rows = len(imgs)//cols + 1
    fig = plt.figure(figsize=(cols*size, rows*size))
    for i, img in enumerate(imgs):
        if img_size is not None:
            img = cv2.resize(img, img_size)
        fig.add_subplot(rows, cols, i+1)
        plt.imshow(img, cmap=cmap)
    plt.suptitle(title)
    plt.show()

In [None]:
dicom_paths = get_dicom_files(dataset_path/'train')

In [None]:
imgs = [dicom2array(path) for path in dicom_paths[:4]]
plot_imgs(imgs)

In [None]:
def image_path(row):
    study_path = dataset_path/'train'/row.StudyInstanceUID
    for i in get_dicom_files(study_path):
        if row.id.split('_')[0] == i.stem: return i 
        
train_image_df['image_path'] = train_image_df.apply(image_path, axis=1)

In [None]:
imgs = []
image_paths = train_image_df['image_path'].values
train_image_df['split_label'] = train_image_df.label.apply(lambda x: [x.split()[offs:offs+6] 
                                                                      for offs in range(0, len(x.split()), 6)])

# map label_id to specify color
thickness = 5
scale = 5
random.seed(42)

for i in range(8):
    image_path = random.choice(image_paths)
    # print(image_path)
    img = dicom2array(path=image_path)
    img = cv2.resize(img, None, fx=1/scale, fy=1/scale)
    img = np.stack([img, img, img], axis=-1)
    for i in train_image_df.loc[train_image_df['image_path'] == image_path].split_label.values[0]:
        if i[0] == 'opacity':
            img = cv2.rectangle(img,
                                (int(float(i[2])/scale), int(float(i[3])/scale)),
                                (int(float(i[4])/scale), int(float(i[5])/scale)),
                                [255,0,0], thickness)
    
    img = cv2.resize(img, (500,500))
    imgs.append(img)
    
plot_imgs(imgs, cmap=None)