In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns

import pydicom
import scipy.ndimage
# import gdcm

from skimage import measure 
from mpl_toolkits.mplot3d.art3d import Poly3DCollection
from skimage.morphology import disk, opening, closing
from tqdm import tqdm

from IPython.display import HTML
from PIL import Image

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

from os import listdir, mkdir

In [None]:
listdir("../input/")

In [None]:
basepath = "../input/vinbigdata-chest-xray-abnormalities-detection/train/"
listdir(basepath)

In [None]:
train = pd.read_csv(basepath + "train.csv")


In [None]:
train.shape

In [None]:
train.head()


# Working with dicom files 

In [None]:
def load_scans(dcm_path):
    slices = pydicom.dcmread(basepath + "/" + str(dcm_path) + ".dicom" )

    return slices

In [None]:
example = train.image_id.values[0]
scans = load_scans(example)

In [None]:
scans

In [None]:
fig, ax = plt.subplots(1,2,figsize=(20,5))
image = scans.pixel_array.flatten()
rescaled_image = image * scans.RescaleSlope + scans.RescaleIntercept
sns.distplot(image.flatten(), ax=ax[0]);
sns.distplot(rescaled_image.flatten(), ax=ax[1])
ax[0].set_title("Raw pixel array distributions for 10 examples")
ax[1].set_title("HU unit distributions for 10 examples");

In [None]:
def set_outside_scanner_to_air(raw_pixelarrays):
    # in OSIC we find outside-scanner-regions with raw-values of -2000. 
    # Let's threshold between air (0) and this default (-2000) using -1000
    raw_pixelarrays[raw_pixelarrays <= -1000] = 0
    return raw_pixelarrays

In [None]:
def transform_to_hu(slices):
    images = slices.pixel_array
    images = images.astype(np.int16)

    images = set_outside_scanner_to_air(images)
    
    # convert to HU        
    intercept = slices.RescaleIntercept
    slope = slices.RescaleSlope

    if slope != 1:
        images = slope * images.astype(np.float64)
        images = images.astype(np.int16)

    images += np.int16(intercept)
    
    return np.array(images, dtype=np.int16)

In [None]:
hu_scans = transform_to_hu(scans)


In [None]:
fig, ax = plt.subplots(1,4,figsize=(20,3))
ax[0].set_title("Original CT-scan")
ax[0].imshow(scans.pixel_array, cmap="bone")
ax[1].set_title("Pixelarray distribution");
sns.distplot(scans.pixel_array.flatten(), ax=ax[1]);

ax[2].set_title("CT-scan in HU")
ax[2].imshow(hu_scans, cmap="bone")
ax[3].set_title("HU values distribution");
sns.distplot(hu_scans.flatten(), ax=ax[3]);

for m in [0,2]:
    ax[m].grid(False)