In [None]:
!pip install python-gdcm

# Update

In [None]:
import pydicom
import numpy as np
import json
import pandas as pd

from fastai.data.all import get_files
from functools import partial
from pathlib import Path
from random import choice
from matplotlib import pyplot as plt
from matplotlib.patches import Rectangle
from IPython.core.pylabtools import print_figure
from collections import Counter
from glob import glob
from pydicom.pixel_data_handlers.util import apply_voi_lut
from IPython.core.debugger import set_trace


In [None]:
data_url = Path("../input/siim-covid19-detection")
# data_url = Path("../data")

In [None]:
dfi = pd.read_csv(data_url/"train_image_level.csv")
dfi.id = dfi.id.apply(lambda x: x.split("_")[0])
dfi.set_index("id", inplace=True)

In [None]:
get_dicoms = partial(get_files, extensions=['.dcm'])

In [None]:
dfs = pd.read_csv(data_url/'train_study_level.csv')
dfs.id = dfs.id.apply(lambda x: x.split("_")[0])
dfs.set_index('id', inplace=True)

In [None]:
class Study:
    _cats = [ "Negative for Pneumonia", "Typical Appearance", "Indeterminate Appearance", "Atypical Appearance"]
    def __init__(self, path):
        if isinstance(path, Path):
            self.path = path
        else:
            self.path = list((data_url/"train").glob(path))[0]
        self.dcms = [DCM(_) for _ in get_dicoms(self.path)]
    def __len__(self): return len(self.dcms)

    @property
    def result(self): return self._cats[np.argmax(dfs.loc[self.path.stem])]
    
    @property
    def _figure(self):
        cols=4
        rows=len(self)//5+1
        fig, axs = plt.subplots(rows, cols, figsize=(14, rows*3.5+1), 
                                gridspec_kw={'hspace':0.5, 'wspace':0.0},
                                squeeze=False)
        axs=axs.ravel()
        for i, dcm in enumerate(self.dcms): self.dcms[i].plot(axs[i])
        for ax in axs[len(self):]:ax.axis('off')

        data = print_figure(fig)
        fig.suptitle(f"{self.__class__.__name__}: {self.path.stem} {self.result}")
        plt.close(fig)
        return fig

    def _repr_png_(self):
        data = print_figure(self._figure)
        return data

In [None]:
class DCM:
    def __init__(self, path):
        if isinstance(path, Path):
            self.path = path
        else:
            self.path = list((data_url/"train").glob(f"**/{path}.dcm"))[0]
        self._dcm = None
    @property
    def label(self): return dfi.loc[self.name].label

    @property
    def dcm(self):
        if self._dcm is None:
            dicom = pydicom.read_file(self.path)
            try:
                data = apply_voi_lut(dicom.pixel_array, dicom)
            except RuntimeError:
                data = dicom.pixel_array
            if dicom.PhotometricInterpretation == "MONOCHROME1":
                data = np.amax(data) - data
        
            data = data - np.min(data)
            data = data / np.max(data)
            self._dcm = (data * 255).astype(np.uint8)
            
            try: self.patient_sex = dicom.PatientSex
            except AttributeError: self.patient_sex = "NA"
            try: self.body_part_examined = dicom.BodyPartExamined
            except AttributeError: self.body_part_examined = "NA"
            
        return self._dcm
    
    @property
    def boxes(self):
        try: return json.loads(dfi.loc[self.name].boxes.replace("'", "\""))
        except AttributeError: return []
    
    def plot(self, ax):
        ax.imshow(self.dcm, cmap='gray')
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_title(f'{self.name}\nBody Part:{self.body_part_examined}\nPatient Sex:{self.patient_sex}')

        for box in self.boxes:
            x, y, w, h = box.values()
            ax.add_patch(Rectangle((x, y), w, h, edgecolor="red", fill=False))
        
    @property
    def name(self): return self.path.stem
    def _repr_png_(self):
        fig, ax = plt.subplots()
        self.plot(ax)
        plt.close(fig)
        data = print_figure(fig)
        return data
    
    def __len__(self): return 1

In [None]:
## data structure
# study/series/image

In [None]:
get_dicoms = partial(get_files, extensions=['.dcm'])
dcms = get_dicoms(path=data_url/"train")

In [None]:
DCM("1018aa051dd9")

PatientSex
BodyPartExamined

In [None]:
study_counter = Counter([_.parent.parent for _ in dcms])
studys = [Study(_) for _ in list(study_counter.elements())]
# for p, i in studys.most_common(50): Study(p)

In [None]:
choice(studys)

In [None]:
DCM(choice(dcms))

In [None]:
Study("7e91e041c7d4")

In [None]:
DCM("d3885c0f58bb")

In [None]:
Study("a7335b2f9815")

In [None]:
Study("8943d1d85097")

In [None]:
Study("970c96c9fa5d")

In [None]:
Study("7e91e041c7d4")

In [None]:
Study("4c45ac349e3a")

In [None]:
Study("26648048b098")

In [None]:
Study("1e96d5eb4c91")

In [None]:
Study("1de3b9724942")

In [None]:
Study("72044bb44d41")

In [None]:
Study("7e91e041c7d4")