In [None]:
import functools
import pathlib
import json
import random

import numpy as np
import matplotlib.pyplot as plt

import shapely.geometry
import skimage.draw
import skimage.filters

import tensorflow as tf

import pydicom

import pymedphys
import pymedphys._dicom.structure as dcm_struct

from names import names_map

In [None]:
# Put all of the DICOM data here, file structure doesn't matter:
data_path_root = pathlib.Path.home().joinpath('.data/dicom-ct-and-structures')
npz_directory = data_path_root.joinpath('npz_cache')
npz_directory.mkdir(parents=True, exist_ok=True)

In [None]:
dcm_paths = list(data_path_root.rglob('dicom/**/*.dcm'))
uid_cache_path = data_path_root.joinpath("uid-cache.json")

In [None]:
def soft_surface_dice(reference, evaluation):
    edge_reference = skimage.filters.scharr(reference)
    edge_evaluation = skimage.filters.scharr(evaluation)
    
    score = (
        np.sum(np.abs(edge_evaluation - edge_reference)) /
        np.sum(edge_evaluation + edge_reference)
    )
    
    return 1 - score

In [None]:
def get_uid_cache(relative_paths):
    relative_paths = [
        str(path) for path in relative_paths
    ]
    
    try:
        with open(uid_cache_path) as f:
            uid_cache = json.load(f)
    except (FileNotFoundError, json.JSONDecodeError):
        uid_cache = {
            "ct_image_paths": {},
            "structure_set_paths": {},
            "ct_uid_to_structure_uid": {},
            "paths_when_run": []
        }
    
    if set(uid_cache["paths_when_run"]) == set(relative_paths):
        return uid_cache
    
    dcm_headers = []
    for dcm_path in dcm_paths:
        dcm_headers.append(pydicom.read_file(
            dcm_path, force=True, 
            specific_tags=['SOPInstanceUID', 'SOPClassUID', 'StudyInstanceUID']))
        
    ct_image_paths = {
        str(header.SOPInstanceUID): str(path)
        for header, path in zip(dcm_headers, relative_paths)
        if header.SOPClassUID.name == "CT Image Storage"
    }
    
    structure_set_paths = {
        str(header.SOPInstanceUID): str(path)
        for header, path in zip(dcm_headers, relative_paths)
        if header.SOPClassUID.name == "RT Structure Set Storage"
    }
    
    ct_uid_to_study_instance_uid = {
        str(header.SOPInstanceUID): str(header.StudyInstanceUID)
        for header in dcm_headers
        if header.SOPClassUID.name == "CT Image Storage"
    }
    
    study_instance_uid_to_structure_uid = {
        str(header.StudyInstanceUID): str(header.SOPInstanceUID)
        for header in dcm_headers
        if header.SOPClassUID.name == "RT Structure Set Storage"
    }
    
    ct_uid_to_structure_uid = {
        ct_uid: study_instance_uid_to_structure_uid[study_uid]
        for ct_uid, study_uid in ct_uid_to_study_instance_uid.items()
    }
    
    uid_cache["ct_image_paths"] = ct_image_paths
    uid_cache["structure_set_paths"] = structure_set_paths
    uid_cache["ct_uid_to_structure_uid"] = ct_uid_to_structure_uid    
    uid_cache["paths_when_run"] = relative_paths
    
    with open(uid_cache_path, "w") as f:
        json.dump(uid_cache, f)
        
    return uid_cache

In [None]:
relative_paths = [
    path.relative_to(data_path_root)
    for path in dcm_paths
]

uid_cache = get_uid_cache(relative_paths)
ct_image_paths = uid_cache["ct_image_paths"]
structure_set_paths = uid_cache["structure_set_paths"]
ct_uid_to_structure_uid = uid_cache["ct_uid_to_structure_uid"]

In [None]:
# ct_uid_to_structure_uid

In [None]:
structures_to_learn = list(set([item for key, item in names_map.items()]).difference({None}))
structures_to_learn = sorted(structures_to_learn)
structures_to_learn

In [None]:
ct_uid = random.choice(list(ct_image_paths.keys()))
ct_uid = '1.2.840.113704.1.111.3156.1551674448.121455'

In [None]:
structure_uid = ct_uid_to_structure_uid[ct_uid]
structure_uid

In [None]:
structure_set_path = data_path_root.joinpath(structure_set_paths[structure_uid])

structure_set = pydicom.read_file(
    structure_set_path, 
    force=True, 
    specific_tags=['ROIContourSequence', 'StructureSetROISequence'])

In [None]:
number_to_name_map = {
    roi_sequence_item.ROINumber: names_map[roi_sequence_item.ROIName]
    for roi_sequence_item in structure_set.StructureSetROISequence
    if names_map[roi_sequence_item.ROIName] is not None
}

number_to_name_map

In [None]:
contours_by_ct_uid = {}

for roi_contour_sequence_item in structure_set.ROIContourSequence:
    try:
        structure_name = number_to_name_map[roi_contour_sequence_item.ReferencedROINumber]
    except KeyError:
        continue
        
    for contour_sequence_item in roi_contour_sequence_item.ContourSequence:
        ct_uid = contour_sequence_item.ContourImageSequence[0].ReferencedSOPInstanceUID
        
        try:
            _ = contours_by_ct_uid[ct_uid]
        except KeyError:
            contours_by_ct_uid[ct_uid] = dict()
                    
        try:
            contours_by_ct_uid[ct_uid][structure_name].append(contour_sequence_item.ContourData)
        except KeyError:
            contours_by_ct_uid[ct_uid][structure_name] = [contour_sequence_item.ContourData]

In [None]:
ct_path = data_path_root.joinpath(ct_image_paths[ct_uid])
dcm_ct = pydicom.read_file(ct_path, force=True)
dcm_ct.file_meta.TransferSyntaxUID = pydicom.uid.ImplicitVRLittleEndian

ct_size = np.shape(dcm_ct.pixel_array)

In [None]:
def get_image_transformation_parameters(dcm_ct):
    # From Matthew Coopers work in ../old/data_generator.py
    
    position = dcm_ct.ImagePositionPatient
    spacing = [x for x in dcm_ct.PixelSpacing] + [dcm_ct.SliceThickness]
    orientation = dcm_ct.ImageOrientationPatient

    dx, dy, *_ = spacing
    Cx, Cy, *_ = position
    Ox, Oy = orientation[0], orientation[4]
    
    return dx, dy, Cx, Cy, Ox, Oy

In [None]:
def reduce_expanded_mask(expanded_mask, img_size, expansion):
    return np.mean(np.mean(
        tf.reshape(expanded_mask, (img_size, expansion, img_size, expansion)),
        axis=1), axis=2)

In [None]:
def calculate_aliased_mask(contours, dcm_ct, expansion=5):
    dx, dy, Cx, Cy, Ox, Oy = get_image_transformation_parameters(dcm_ct)
    
    ct_size = np.shape(dcm_ct.pixel_array)
    x_grid = np.arange(Cx, Cx + ct_size[0]*dx*Ox, dx*Ox)
    y_grid = np.arange(Cy, Cy + ct_size[1]*dy*Oy, dy*Oy)
    
    new_ct_size = np.array(ct_size) * expansion
    
    expanded_mask = np.zeros(new_ct_size)
    
    for xyz in contours:
        x = np.array(xyz[0::3])
        y = np.array(xyz[1::3])
        z = xyz[2::3]

        assert len(set(z)) == 1

        r = (((y - Cy) / dy * Oy)) * expansion + (expansion - 1) * 0.5
        c = (((x - Cx) / dx * Ox)) * expansion + (expansion - 1) * 0.5

        expanded_mask = np.logical_or(expanded_mask, skimage.draw.polygon2mask(new_ct_size, np.array(list(zip(r, c)))))
        
    mask = reduce_expanded_mask(expanded_mask, ct_size[0], expansion)
    mask = 2 * mask - 1
    
    return x_grid, y_grid, mask

In [None]:
def get_contours_from_mask(x_grid, y_grid, mask):
    cs = plt.contour(x_grid, y_grid, mask, [0]);
    
    contours = [
        path.vertices for path in cs.collections[0].get_paths()
    ]
    
    plt.close()
    
    return contours

In [None]:
contours_on_this_slice = contours_by_ct_uid[ct_uid].keys()

masks = np.nan * np.ones((*ct_size, len(structures_to_learn)))

for i, structure in enumerate(structures_to_learn):
    if not structure in contours_on_this_slice:
        masks[:,:,i] = np.zeros(ct_size) - 1
        
        continue
        
    original_contours = contours_by_ct_uid[ct_uid][structure]
    x_grid, y_grid, masks[:,:,i] = calculate_aliased_mask(original_contours, dcm_ct)

In [None]:
np.shape(masks)
assert np.sum(np.isnan(masks)) == 0

In [None]:
dcm_ct.pixel_array

In [None]:
npz_path = npz_directory.joinpath(f'{ct_uid}.npz')
np.savez(npz_path, input_array=dcm_ct.pixel_array, output_array=masks)