In [1]:
from glob import glob
import SimpleITK as sitk

In [2]:
KAGGLE_DATASET_PATH = "/mnt/DATASETS/KAGGLE/"
LUNA_DATASET_PATH = "/mnt/DATASETS/LUNA/"
EXPORT_MASK_IMAGE_FORMAT = ".nii.gz"

In [3]:
def get_id_luna_image(path):
    return path.split("/")[-1].split(".mhd")[0]

def load_luna_image(path):
    return sitk.ReadImage(path)

def get_id_kaggle_image(path):
    return path.split("/")[-1]

def load_kaggle_image(path):    
    reader = sitk.ImageSeriesReader()
    dicom_names = reader.GetGDCMSeriesFileNames(path)
    reader.SetFileNames(dicom_names)
    image = reader.Execute()
    return image

def get_id_kaggle_mask(path):
    return tuple(path.split("/")[-1].split(EXPORT_MASK_IMAGE_FORMAT)[0].split("_"))


In [4]:
def calculate_kaggle_segmentation_mask(img_kaggle, img_luna, mask_luna):
    """Registers img_luna to img_kaggle and applies the resulting to transformation to mask_luna,
    which is returned as a new image.
    Performing a multiresolution image registration, with translation, affine and non-rigid transformations.
    """
    # Apply the mask to the luna image. We don't want to register the whole ribcage, just the lungs
    mask_image_filter = sitk.MaskImageFilter()
    masked_img_luna = mask_image_filter.Execute(img_luna, mask_luna)
    
    # Registration parameters
    parameterMapVector = sitk.VectorOfParameterMap()
    parameterMapVector.append(sitk.GetDefaultParameterMap("translation"))
    parameterMapVector.append(sitk.GetDefaultParameterMap("affine"))
    parameterMapVector.append(sitk.GetDefaultParameterMap("bspline"))
    
    # Perform registration
    elastixImageFilter = sitk.ElastixImageFilter()
    elastixImageFilter.SetFixedImage(img_kaggle)
    elastixImageFilter.SetMovingImage(masked_img_luna)
    elastixImageFilter.SetParameterMap(parameterMapVector)
    elastixImageFilter.Execute()
    
    # Apply transformation to LUNA's mask, to obtain Kaggle's
    result = sitk.Transformix(mask_luna, elastixImageFilter.GetTransformParameterMap())
    
    return result

In [None]:
luna_images_paths = glob(LUNA_DATASET_PATH + "subset*/*.mhd")
kaggle_images_paths = glob(KAGGLE_DATASET_PATH + "stage1/*")

luna_segmentation_paths = glob(LUNA_DATASET_PATH + "seg-lungs-LUNA16/*.mhd")
luna_segmentation_paths_by_id = {get_id_luna_image(path): path for path in luna_segmentation_paths}

kaggle_segmentation_paths = glob(KAGGLE_DATASET_PATH + "seg-lungs-KAGGLE-all/*" + EXPORT_MASK_IMAGE_FORMAT)
kaggle_segmentation_paths_by_id = {get_id_kaggle_mask(path): path for path in kaggle_segmentation_paths}

print("LUNA dataset has %d images" % (len(luna_images_paths)))
print("KAGGLE dataset has %d images" % (len(kaggle_images_paths)))


LUNA dataset has 742 images
KAGGLE dataset has 1595 images


In [None]:
iteration = 0
max_iterations = len(kaggle_images_paths) * len(kaggle_images_paths)
for kaggle_image_path in kaggle_images_paths:
    id_kaggle = get_id_kaggle_image(kaggle_image_path)
    for luna_image_path in luna_images_paths:
        iteration += 1
        print("Segmenting %d out of %d total images" % (iteration, max_iterations))
        
        id_luna = get_id_luna_image(luna_image_path)
        
        # First check if image has already been segmented and skip if that is the case
        if kaggle_segmentation_paths_by_id.get((id_kaggle, id_luna)):
            print("Skipping %s using %s as base: already segmented" % (id_kaggle, id_luna))
            continue

        # Load the images
        img_kaggle = load_kaggle_image(kaggle_image_path)
        img_luna = load_luna_image(luna_image_path)
        mask_luna = load_luna_image(luna_segmentation_paths_by_id[id_luna])
        
        # Register and calculate kaggle segmentation mask
        %time mask_kaggle = calculate_kaggle_segmentation_mask(img_kaggle, img_luna, mask_luna)
        
        # Reduce byte depth
        mask_kaggle = sitk.Cast(mask_kaggle, sitk.sitkInt8)
        
        # Store mask to file
        id_kaggle_luna = id_kaggle + "_" + id_luna
        mask_kaggle_path = KAGGLE_DATASET_PATH + "seg-lungs-KAGGLE-all/" + id_kaggle_luna + EXPORT_MASK_IMAGE_FORMAT
        sitk.WriteImage(mask_kaggle, mask_kaggle_path)
        
        print("Saved segmentation for %s using %s as base" % (id_kaggle, id_luna))

Segmenting 1 out of 2544025 total images
Skipping 0015ceb851d7251b8f399e39779d1e7d using 1.3.6.1.4.1.14519.5.2.1.6279.6001.105756658031515062000744821260 as base: already segmented
Segmenting 2 out of 2544025 total images
Skipping 0015ceb851d7251b8f399e39779d1e7d using 1.3.6.1.4.1.14519.5.2.1.6279.6001.108197895896446896160048741492 as base: already segmented
Segmenting 3 out of 2544025 total images
Skipping 0015ceb851d7251b8f399e39779d1e7d using 1.3.6.1.4.1.14519.5.2.1.6279.6001.109002525524522225658609808059 as base: already segmented
Segmenting 4 out of 2544025 total images
CPU times: user 17min 20s, sys: 1min 9s, total: 18min 29s
Wall time: 11min 21s
Saved segmentation for 0015ceb851d7251b8f399e39779d1e7d using 1.3.6.1.4.1.14519.5.2.1.6279.6001.111172165674661221381920536987 as base
Segmenting 5 out of 2544025 total images
Skipping 0015ceb851d7251b8f399e39779d1e7d using 1.3.6.1.4.1.14519.5.2.1.6279.6001.122763913896761494371822656720 as base: already segmented
Segmenting 6 out of 2