In [None]:
import glob
import pydicom
import numpy as np
import imageio
import os
import skimage.draw
import matplotlib.pyplot as plt

In [None]:
def filter_dicom_files(dicom_files):
    """
    Filters a list of DICOM files into 4 lists depending on file attributes:

    dicom_series: DICOM imaging series (CT DICOM files).

    dicom_structures: DICOM structure set file (RS DICOM file).

    dicom_plan: DICOM treatment plan file (RP DICOM file).

    dicom_dose: DICOM dose grid file (RD DICOM file).
    """
    dicom_series = []
    dicom_structures = []
    dicom_plan = []
    dicom_dose = []

    for file in dicom_files:
        if hasattr(file, 'ImageType'):
            dicom_series.append(file)
        elif hasattr(file, 'StructureSetName'):
            dicom_structures.append(file)
        elif hasattr(file, 'BeamSequence'):
            dicom_plan.append(file)
        else:
            # TODO Add condition - will it always be DICOM dose file?
            dicom_dose.append(file)
    return dicom_series, dicom_structures, dicom_plan, dicom_dose

In [None]:
def add_transfer_syntax(dicom_series):
    """
    Fill in missing TransferSyntaxUID on DICOM files after reading
    in the DICOM series. Required before pixel_array attribute is called.
    """
    for dicom in dicom_series:
        try:
            dicom.file_meta.TransferSyntaxUID
        except AttributeError:
            dicom.file_meta.TransferSyntaxUID = (
                pydicom.uid.ImplicitVRLittleEndian)
    return dicom_series

In [None]:
def read_structures(dicom_structures):
    structures = []
    """Returns a sorted list of all files names in data_path with (optional) ext.

    Parameters
    ----------
    dicom_structures : RS DICOM file

    Returns
    -------
    structures : list
        List containing a dict for each structure type found in dicom_structure
        Each dict has keys: 'number', 'name', 'contour_points', 'color'

    Note
    -------
    contour_points need to be transformed to pixel space for model
    """
    # loop through each structure type
    for structure_index in range(len(dicom_structures.ROIContourSequence)):
        structure_dict = {}
        structure_dict['name'] = dicom_structures.StructureSetROISequence[
            structure_index].ROIName
        structure_dict['contour_points'] = [
            z_slice.ContourData for z_slice in dicom_structures.
            ROIContourSequence[structure_index].ContourSequence
        ]

        structures.append(structure_dict)
    return structures

In [None]:
def clean_structures(structures, structure_names):
    clean_structures = []
    for structure in structures:
        if structure['name'] in structure_names:
            clean_structures.append(structure)
    return clean_structures

In [None]:
def transform_to_array(x, y, z, dicom_series):
    """
    Transform from patient space to pixel space
    """
    Cx, Cy, Cz = dicom_series[0].ImagePositionPatient
    
    assert Cz == np.min(np.array([dicom.ImagePositionPatient[2] for dicom in dicom_series]))
    
    dx, dy = dicom_series[0].PixelSpacing
    dz = dicom_series[0].SliceThickness

    orientation = dicom_series[0].ImageOrientationPatient
    
    x = np.array(x)
    y = np.array(y)
    z = np.array(z)

    # NOTE Only handles +-1 cosines
    # A more robust method that handles intermediate angels
    # was attempted however the affine matrix was singular
    # See: http://dicom.nema.org/medical/dicom/current/output/chtml/part03/sect_C.7.6.2.htmlx1
    # See: https://dicomiseasy.blogspot.com/2013/06/getting-oriented-using-image-plane.html
    d = (z - Cz) / dz
    r = (y - Cy) / dy * orientation[4]
    c = (x - Cx) / dx * orientation[0]

    return d, r, c

In [None]:
def make_output_folders(output_path, patient_id):

    patient_folder = output_path + "/" + patient_id
    images_folder = patient_folder + "/images/"
    labels_folder = patient_folder + "/labels/"

    try:
        os.mkdir(patient_folder)
    except OSError:
        print (f"Directory already exists: {patient_folder}")

    try:
        os.mkdir(images_folder)
    except OSError:
        print (f"Directory already exists: {images_folder}")

    try:
        os.mkdir(labels_folder)
    except OSError:
        print (f"Directory already exists: {labels_folder}")
        
    return images_folder, labels_folder

In [None]:
def get_masks(structs, dcm_series):

    struct_chann = len(structs)
    slice_chann = len(dcm_series)
    x = 512
    y = 512
    masks = np.zeros(shape=(slice_chann, struct_chann, x, y), dtype=np.int16)


    for struct_index, struct in enumerate(structs):
        for z_slice_contour_data in struct['contour_points']:

            # arrance into list([x1,y1,z1], [x2, y2, z2]...)
            xyz_points = np.array(z_slice_contour_data).reshape((-1, 3))
        
            # these should all be the same value for each loop
            z_points = xyz_points[:,2]
        
            # x and y points for a struct for a z
            x_points_on_z_slice = xyz_points[:, 0]
            y_points_on_z_slice = xyz_points[:, 1]

            d, r, c = transform_to_array(x_points_on_z_slice, y_points_on_z_slice, z_points, dcm_series)
            d = int(d[0])
            rr, cc = skimage.draw.polygon(r, c)

            masks[d, struct_index, rr, cc] = 1
            
    return masks

In [None]:
def go_baby_go(input_path, output_path, structure_names):
    
    patient_paths = glob.glob(input_path + "*")

    for _, path in enumerate(patient_paths):
        
        print(f"----------------")
        print(f"LOADING: {_+1}/{len(patient_paths)}")
        print(f"{path}")
        
        dcm_paths = glob.glob(path + "/*.dcm")
        dcm_files = [pydicom.dcmread(dcm_path, force=True) for dcm_path in dcm_paths]
    
        dcm_series, dcm_structs, *rest = filter_dicom_files(dcm_files)
    
        dcm_series = add_transfer_syntax(dcm_series)
        dcm_series.sort(key=lambda x: float(x.ImagePositionPatient[2]))
    
        # This is in pixel space
        images = np.array([dcm_file.pixel_array for dcm_file in dcm_series])
    
        structs = read_structures(dcm_structs[0])
        structs = clean_structures(structs, structure_names)
    
        masks = get_masks(structs, dcm_series)
    
        patient_id = str(dcm_series[0].SOPInstanceUID)
        images_folder, labels_folder = make_output_folders(output_path, patient_id)
    
        fill_images = len(str(len(images)))
        fill_masks = len(str(len(structure_names)))
    
        for index, image in enumerate(images):
            image_path = images_folder + str(patient_id) + "-" + str(index).zfill(fill_images) + ".tif"
            imageio.imwrite(image_path, image)
        
            for number, label in enumerate(masks[index]):
                labels_path = labels_folder + str(patient_id) + "-" + str(index).zfill(fill_images) + "-" + str(number).zfill(fill_masks) + ".tif"
                imageio.imwrite(labels_path, label)
                
    print(f"================")
    print(f"COMPLETE")

In [None]:
INPUT_PATH = "/home/matthew/priv/PROSTATE_TEST/"
OUTPUT_PATH = "/home/matthew/tmp/OUTPUT_FOLDER"
STRUCTURE_NAMES = ["patient", "RT HOF", "LT HOF", "BLADDER", "RECTUM", "Couch Foam Half Couch", "Couch Outer Half Couch", "Couch Edge"]

go_baby_go(INPUT_PATH, OUTPUT_PATH, STRUCTURE_NAMES)