In [1]:
import numpy as np
import os
import pathlib
from PIL import Image
import pydicom as dicom
from tqdm import tqdm

In [2]:
input = 'input/train'
output = 'output/processed'
blacklist = {'ID00011637202177653955184', 'ID00052637202186188008618'}

patients = {dir for dir in os.listdir(input)}
patients = patients - blacklist
print(f'{len(patients)} patients')

174 patients


In [3]:
def load_scan(path):
    slices = [dicom.read_file(f'{path}/{s}') for s in os.listdir(path)]
    #slices.sort(key = lambda x: float(x.ImagePositionPatient[2]))
    return slices


def to_hu(slices, padding=-2000):
    """Convert to Hounsfield units (HU)"""
    frames = np.stack([s.pixel_array for s in slices])
    # Convert to int16 (from sometimes int16), 
    # should be possible as values should always be low enough (<32k)
    frames = frames.astype(np.int16)
    for i in range(len(slices)):
        f = frames[i]
        s = slices[i]
        if "PixelPaddingValue" in s:
            padding = np.int16(s.PixelPaddingValue)
        slope = np.float64(s.RescaleSlope)
        intercept = np.int16(s.RescaleIntercept)
        # Set outside-of-scan pixels to 0
        f[f <= padding] = 0 
        if slope != 1:
            f = slope * f.astype(np.float64)
            print(f'f.dtype={f.dtype}')
            f = f.astype(np.int16)  
        f += intercept
    return frames.astype(np.int16)


def window(frames, hu_min=-1000, hu_max=600):
    rng = hu_max - hu_min
    norm = (frames - hu_min) / rng
    norm[norm < 0] = 0
    norm[norm > 1] = 1
    norm = (norm * 255).astype(np.uint8)
    res = []
    for f in norm:
        channel = f.T
        rgb = np.array([channel, channel, channel]).T
        res.append(rgb)
    return np.array(res, dtype=np.uint8)


def resize(frames, target_size=(600, 600), slices=None):
    res = []
    for i in range(len(frames)):
        f = frames[i]
        im = Image.fromarray(f, mode='RGB')
        if slices is not None:
            s = slices[i]
            rows, cols = float(s.PixelSpacing[0]), float(s.PixelSpacing[1]) 
            height = int(f.shape[0] * rows)
            width = int(f.shape[1] * cols)
            #print(f'original shape=({height}, {width})')
            im = im.resize((width, height), Image.NEAREST)
        im = im.resize(target_size, Image.NEAREST)
        res.append(np.asarray(im))
    return np.array(res, dtype=np.uint8)


def preprocess(dir):
    slices = load_scan(dir)
    res = to_hu(slices)
    res = window(res)
    res = resize(res, slices=None)
    return res

In [4]:
for patient in tqdm(patients):
    dir = f"{output}/{patient}"
    pathlib.Path(dir).mkdir(parents=True, exist_ok=True)
    try:
        frames = preprocess(f"{input}/{patient}")
    except Exception as ex:
        print(f'patient={patient}, Error={ex}')
        continue
    for i in range(len(frames)):
        im =  Image.fromarray(frames[i], mode='RGB')
        im.save(f"{dir}/{i + 1}.png")

100%|██████████| 174/174 [44:31<00:00, 15.35s/it] 
