In [1]:
import ipyparallel as ipp
rc = ipp.Client()
view = rc[:]

In [2]:
%%px --local
from glob import glob
import SimpleITK as sitk
import numpy as np
import pandas as pd
import math
from collections import defaultdict

In [3]:
%%px --local
LUNA_DATASET_PATH = "/mnt/DATASETS/LUNA/"
EXPORT_MASK_IMAGE_FORMAT = ".nii.gz"

In [4]:
%%px --local
def get_id_luna_image(path):
    return path.split("/")[-1].split(".mhd")[0]

def get_id_luna_nodule_mask(path):
    return path.split("/")[-1].split(EXPORT_MASK_IMAGE_FORMAT)[0]

def load_luna_image(path):
    return sitk.ReadImage(path)

In [5]:
luna_images_paths = glob(LUNA_DATASET_PATH + "subset*/*.mhd")
luna_nodule_mask_paths = glob(LUNA_DATASET_PATH + "seg-nodules-LUNA16/*" + EXPORT_MASK_IMAGE_FORMAT)

luna_image_ids = { get_id_luna_image(path): path for path in luna_images_paths }
luna_nodule_mask_ids = { get_id_luna_nodule_mask(path): path for path in luna_nodule_mask_paths }

# Discard processed images
luna_image_ids = {k: v for k,v in luna_image_ids.items() if k not in luna_nodule_mask_ids}

print("LUNA dataset has %d images" % (len(luna_images_paths)))
df = pd.read_csv(LUNA_DATASET_PATH + "/CSVFILES/annotations.csv")
df.sort_values(by=['seriesuid'], inplace=True)
df["image_path"] = df.apply(lambda x: luna_image_ids.get(x.seriesuid), axis=1)
df.head()

LUNA dataset has 0 images


FileNotFoundError: File b'/mnt/DATASETS/LUNA//CSVFILES/annotations.csv' does not exist

In [6]:
annotations = defaultdict(list)
for row in df.itertuples():
    key = row.image_path
    entry = (
        row.coordX,
        row.coordY,
        row.coordZ,
        row.diameter_mm,
    )
    annotations[key].append(entry)

In [7]:
%%px --local
def create_slice_nodule_mask(pos_nodule, rad_nodule, slice_shape, img):
    mask_arr_slice = np.zeros(slice_shape)
    X, Y = slice_shape
    idxs = np.zeros((X*Y, 2))
    for y in range(Y):
        for x in range(X):
            idxs[y*Y + x, :] = np.array(img.TransformIndexToPhysicalPoint((x, y, 0))[:2])
    mask_1d = np.sqrt(np.sum((idxs - pos_nodule)**2, axis=1)) <= rad_nodule
    return mask_1d.reshape((X,Y))

def calc_radius_projection(z_center, radius, z_slice):
    return math.sqrt(radius**2 - abs(z_center - z_slice)**2)

def create_nodule_mask(img_path, nodules):
    img = sitk.ReadImage(img_path)
    img_arr = sitk.GetArrayViewFromImage(img)
    mask_arr = np.zeros(img_arr.shape, dtype=np.bool)
    for nodule in nodules:
        x, y, z, nodule_diameter = nodule
        nodule_radius = nodule_diameter / 2
        nodule_point = np.array([x, y])

        min_z = z - nodule_radius
        max_z = z + nodule_radius
        min_slice_idx = math.ceil(img.TransformPhysicalPointToContinuousIndex((x, y, min_z))[2])
        max_slice_idx = math.floor(img.TransformPhysicalPointToContinuousIndex((x, y, max_z))[2])

        slice_idxs = list(range(min_slice_idx, max_slice_idx + 1))
        slice_z_coords = [
            img.TransformIndexToPhysicalPoint((0, 0, idx))[2]
            for idx in slice_idxs
        ]
        slice_radius = [
            calc_radius_projection(z, nodule_radius, z_slice)
            for z_slice in slice_z_coords
        ]
        for idx, z_coords, radius in zip(slice_idxs, slice_z_coords, slice_radius):
            mask = create_slice_nodule_mask(nodule_point, radius, img_arr.shape[1:], img)
            mask_arr[idx, :, :] |= mask
    # Save mask to file
    mask_img = sitk.GetImageFromArray(mask_arr.astype(np.uint8))
    mask_img.SetSpacing(img.GetSpacing())
    mask_img.SetOrigin(img.GetOrigin())
    mask_img_path = LUNA_DATASET_PATH + "seg-nodules-LUNA16/" + get_id_luna_image(img_path) + EXPORT_MASK_IMAGE_FORMAT
    sitk.WriteImage(mask_img, mask_img_path)


In [8]:
view.map(
    create_nodule_mask,
    annotations.keys(),
    annotations.values(),
).wait()

True