In [None]:
import pandas as pd
import pydicom as dicom
from pydicom.pixel_data_handlers.util import apply_voi_lut
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from ast import literal_eval
import os
import glob
import seaborn as sns
sns.set_style("whitegrid")

In [None]:
voi_lut=True
fix_monochrome=True

def dicom_dataset_to_dict(filename):
    """Credit: https://github.com/pydicom/pydicom/issues/319
               https://www.kaggle.com/raddar/convert-dicom-to-np-array-the-correct-way
    """
    
    dicom_header = dicom.dcmread(filename) 
    
    #====== DICOM FILE DATA ======
    dicom_dict = {}
    repr(dicom_header)
    for dicom_value in dicom_header.values():
        if dicom_value.tag == (0x7fe0, 0x0010):
            #discard pixel data
            continue
        if type(dicom_value.value) == dicom.dataset.Dataset:
            dicom_dict[dicom_value.name] = dicom_dataset_to_dict(dicom_value.value)
        else:
            v = _convert_value(dicom_value.value)
            dicom_dict[dicom_value.name] = v
      
    del dicom_dict['Pixel Representation']
    
    #====== DICOM IMAGE DATA ======
    # VOI LUT (if available by DICOM device) is used to transform raw DICOM data to "human-friendly" view
    if voi_lut:
        data = apply_voi_lut(dicom_header.pixel_array, dicom_header)
    else:
        data = dicom_header.pixel_array
    # depending on this value, X-ray may look inverted - fix that:
    if fix_monochrome and dicom_header.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
    data = data - np.min(data)
    data = data / np.max(data)
    modified_image_data = (data * 255).astype(np.uint8)
    
    return dicom_dict, modified_image_data

def _sanitise_unicode(s):
    return s.replace(u"\u0000", "").strip()

def _convert_value(v):
    t = type(v)
    if t in (list, int, float):
        cv = v
    elif t == str:
        cv = _sanitise_unicode(v)
    elif t == bytes:
        s = v.decode('ascii', 'replace')
        cv = _sanitise_unicode(s)
    elif t == dicom.valuerep.DSfloat:
        cv = float(v)
    elif t == dicom.valuerep.IS:
        cv = int(v)
    else:
        cv = repr(v)
    return cv

# SIIM-FISABIO-RSNA COVID-19 Detection
### Identify and localize COVID-19 abnormalities on chest radiographs

**In this competition, we are identifying and localizing COVID-19 abnormalities on chest radiographs. This is an object detection and classification problem.**

# Load data

In [None]:
train_image_level = pd.read_csv('../input/siim-covid19-detection/train_image_level.csv')
train_study_level =  pd.read_csv('../input/siim-covid19-detection/train_study_level.csv')

In [None]:
train_image_level.head()

In [None]:
train_study_level.head()

## Checking for NaN

In [None]:
train_image_level.isna().sum()

In [None]:
train_study_level.isna().sum()

## Merge 

In [None]:
train_study_level['StudyInstanceUID'] = train_study_level['id'].apply(lambda x : x.split('_')[0])
train_study_level = train_study_level.drop('id',axis=1)

In [None]:
result = pd.merge(train_image_level, train_study_level, on="StudyInstanceUID")

In [None]:
result.head()

## Creating a Categorical Column

In [None]:
labels = result[['Negative for Pneumonia', 'Typical Appearance',
                 'Indeterminate Appearance', 'Atypical Appearance']]

result['category'] = labels.apply(lambda x: x[x==1].index.values[0], axis=1)

result.drop(['Negative for Pneumonia', 'Typical Appearance',
             'Indeterminate Appearance', 'Atypical Appearance','label'],
           axis=1, inplace=True)

## Fill NaN

In [None]:
result['boxes'] = result['boxes'].fillna(0)

In [None]:
nans = result[result['boxes']==0]

In [None]:
result['boxes'] = result['boxes'].apply(lambda x: literal_eval(x) if x != 0 else [{'x':0,'y':0,'width':0,'height':0}])

In [None]:
fig, ax = plt.subplots(figsize=(10,4))
sns.countplot(x='category', data=nans, ax=ax)
ax.set_title('Total Count of NaN per Category')
plt.show()

## Get Image Directory

In [None]:
training_paths = []
train_directory = '../input/siim-covid19-detection/train'

for UID in result['StudyInstanceUID']:
    training_paths.append(glob.glob(os.path.join(train_directory, UID +"/*/*"))[0])

result['path'] = training_paths

In [None]:
result.head()

# EDA

## Categories

In [None]:
fig, ax = plt.subplots(figsize=(10,4))
sns.countplot(x='category', data=result, ax=ax)
ax.set_title('Total Count of Categories')
plt.show()

Need to deal with unbalanced dataset

## Visualizations

In [None]:
def visualize_image(sample):

    df, img_array = dicom_dataset_to_dict(sample['path'])

    fig, ax = plt.subplots(figsize=(8,8))
    ax.imshow(img_array, cmap='jet')
    ax.set_xticks([])
    ax.set_yticks([])

    for i,location in enumerate(sample.boxes):

        rect = patches.Rectangle((location['x'], location['y']),
                                 location['width'], location['height'],
                                 linewidth=1, edgecolor='k',
                                 facecolor='none')
        ax.add_patch(rect)
    
    ax.set_title(f'ID: {sample.id} Label: {sample.category}')
    plt.show()
    

## Negative for Pneumonia

In [None]:
sample = result.iloc[1]
visualize_image(sample)

In [None]:
sample = result.iloc[6]
visualize_image(sample)

In [None]:
sample = result.iloc[27]
visualize_image(sample)

## Typical Appearance

In [None]:
sample = result.iloc[0]
visualize_image(sample)

In [None]:
sample = result.iloc[2]
visualize_image(sample)

In [None]:
sample = result.iloc[4]
visualize_image(sample)

## Indeterminate Appearance

In [None]:
sample = result.iloc[5]
visualize_image(sample)

In [None]:
sample = result.iloc[18]
visualize_image(sample)

In [None]:
sample = result.iloc[40]
visualize_image(sample)

## Atypical Appearance

In [None]:
sample = result.iloc[3]
visualize_image(sample)

In [None]:
sample = result.iloc[46]
visualize_image(sample)

In [None]:
sample = result.iloc[53]
visualize_image(sample)

**Work in Progress**