In [21]:
import SimpleITK as sitk
import numpy as np
import os
from glob import glob
import pandas as pd

tqdm = lambda x: x

In [22]:
def make_mask(mask, v_center, v_diam, spacing):
    v_diam_z = int(diam / spacing[2] + 1)
    v_diam_y = int(diam / spacing[1] + 1)
    v_diam_x = int(diam / spacing[0] + 1)
    v_diam_z = np.rint(v_diam_z / 2)
    v_diam_y = np.rint(v_diam_y / 2)
    v_diam_x = np.rint(v_diam_x / 2)
    z_min = int(v_center[0] - v_diam_z)
    z_max = int(v_center[0] + v_diam_z + 1)
    x_min = int(v_center[1] - v_diam_x)
    x_max = int(v_center[1] + v_diam_x + 1)
    y_min = int(v_center[2] - v_diam_y)
    y_max = int(v_center[2] + v_diam_y + 1)
    mask[z_min:z_max, x_min:x_max, y_min:y_max] = 1.0



# Helper function to get rows in data frame associated with each file
def get_filename(file_list, case):
    for f in file_list:
        if case in f:
            return f

In [28]:
# Getting list of image files and save mask image files
for subsetindex in range(1):  ####10####
    luna_path = "data"
    luna_subset_path = luna_path + "/subset" + str(subsetindex) + "/"
    output_path = "data/mask"
    luna_subset_mask_path = output_path + "subset" + str(subsetindex) + "/"
    if not os.path.exists(luna_subset_mask_path):
        os.makedirs(luna_subset_mask_path)
    file_list = glob(luna_subset_path + "*.mhd")

    file_list_path = []
    for i in range(len(file_list)):
        file_list_path.append(file_list[i][0:-4])

    luna_csv_path = "data"
    df_node = pd.read_csv(luna_csv_path + "/CSVFILES/" + "annotations.csv")
    df_node["file"] = df_node["seriesuid"].map(lambda file_name: get_filename(file_list_path, file_name))
    df_node = df_node.dropna()

    # Looping over the image files
    for fcount, img_file in enumerate(tqdm(file_list_path)):
        # get all nodules associate with file
        mini_df = df_node[df_node["file"] == img_file]
        # load the src data once
        img_file = img_file + ".mhd"
        itk_img = sitk.ReadImage(img_file)
        # indexes are z,y,x (notice the ordering)
        img_array = sitk.GetArrayFromImage(itk_img)
        # num_z height width constitute the transverse plane
        num_z, height, width = img_array.shape
        # x,y,z  Origin in world coordinates (mm)
        origin = np.array(itk_img.GetOrigin())
        # spacing of voxels in world coor. (mm)
        spacing = np.array(itk_img.GetSpacing())
        # some files may not have a nodule--skipping those
        if mini_df.shape[0] == 0:
            # set out mask data once
            mask_itk = np.zeros(shape=(num_z, height, width), dtype=np.float)
        if mini_df.shape[0] > 0:
            # set out mask data once
            mask_itk = np.zeros(shape=(num_z, height, width), dtype=np.float)
            # go through all nodes in one series image
            for node_idx, cur_row in mini_df.iterrows():
                node_x = cur_row["coordX"]
                node_y = cur_row["coordY"]
                node_z = cur_row["coordZ"]
                diam = cur_row["diameter_mm"]
                center = np.array([node_x, node_y, node_z])
                # nodule center
                v_center = np.rint((center - origin) / spacing)
                # nodule diam
                v_diam = diam
                # convert x,y,z order v_center to z,y,x order v_center
                v_center[0], v_center[1], v_center[2] = v_center[2], v_center[1], v_center[0]
                make_mask(mask_itk, v_center, v_diam, spacing)
            mask_itk = np.uint8(mask_itk * 255.)
            mask_itk = np.clip(mask_itk, 0, 255).astype('uint8')
            sitk_maskimg = sitk.GetImageFromArray(mask_itk)
            sitk_maskimg.SetSpacing(spacing)
            sitk_maskimg.SetOrigin(origin)
            sub_img_file = img_file[len(luna_subset_path):-4]
            sitk.WriteImage(sitk_maskimg, luna_subset_mask_path + sub_img_file + "_segmentation.mhd")

(25, 21, 21)
(17, 13, 13)
(11, 9, 9)
(5, 15, 15)
(5, 13, 13)
(7, 25, 25)
(5, 13, 13)
(7, 21, 21)
(5, 13, 13)
(9, 11, 11)
(5, 7, 7)
(7, 9, 9)
(11, 13, 13)
(7, 15, 15)
(5, 11, 11)
(5, 9, 9)
(5, 9, 9)
(5, 9, 9)
(21, 17, 17)
(13, 17, 17)
(19, 25, 25)
(13, 13, 13)
(9, 9, 9)
(13, 17, 17)
(7, 9, 9)
(7, 9, 9)
(9, 9, 9)
(9, 29, 29)
(5, 11, 11)
(9, 15, 15)
(5, 11, 11)
(5, 9, 9)
(5, 9, 9)
(5, 9, 9)
(17, 17, 17)
(13, 17, 17)
(19, 17, 17)
(7, 13, 13)
(5, 9, 9)
(9, 11, 11)
(9, 13, 13)
(13, 21, 21)
(9, 13, 13)
(9, 13, 13)
(7, 21, 21)
(9, 27, 27)
(9, 13, 13)
(5, 9, 9)
(7, 9, 9)
(5, 9, 9)
(5, 9, 9)
(7, 9, 9)
(23, 21, 21)
(15, 13, 13)
(5, 9, 9)
(5, 9, 9)
(9, 9, 9)
(11, 13, 13)
(7, 9, 9)
(5, 13, 13)
(5, 9, 9)
(7, 9, 9)
(17, 21, 21)
(5, 9, 9)
(9, 29, 29)
(17, 27, 27)
(9, 9, 9)
(13, 13, 13)
(13, 25, 25)
(5, 9, 9)
(5, 9, 9)
(9, 23, 23)
(9, 13, 13)
(11, 15, 15)
(5, 9, 9)
(5, 9, 9)
(13, 21, 21)
(5, 9, 9)
(5, 13, 13)
(9, 13, 13)
(13, 37, 37)
(5, 15, 15)
(5, 17, 17)
(9, 9, 9)
(11, 13, 13)
(19, 19, 19)
(5, 7, 7)

In [30]:
luna_csv_path = "data"
df_node = pd.read_csv(luna_csv_path + "/CSVFILES/" + "candidates_v2.csv")
df_node["file"] = df_node["seriesuid"].map(lambda file_name: get_filename(file_list_path, file_name))
df_node = df_node.dropna()

In [37]:
df_node[df_node["class"]==1]

Unnamed: 0,seriesuid,coordX,coordY,coordZ,class,file
20384,1.3.6.1.4.1.14519.5.2.1.6279.6001.108197895896...,-100.709660,68.191806,-230.920000,1,data/subset0/1.3.6.1.4.1.14519.5.2.1.6279.6001...
22033,1.3.6.1.4.1.14519.5.2.1.6279.6001.109002525524...,36.577828,77.166931,-123.632500,1,data/subset0/1.3.6.1.4.1.14519.5.2.1.6279.6001...
22208,1.3.6.1.4.1.14519.5.2.1.6279.6001.109002525524...,45.517008,48.789231,-109.205277,1,data/subset0/1.3.6.1.4.1.14519.5.2.1.6279.6001...
25892,1.3.6.1.4.1.14519.5.2.1.6279.6001.111172165674...,136.297029,117.290290,-182.063909,1,data/subset0/1.3.6.1.4.1.14519.5.2.1.6279.6001...
56764,1.3.6.1.4.1.14519.5.2.1.6279.6001.124154461048...,146.239444,-161.190112,-310.777295,1,data/subset0/1.3.6.1.4.1.14519.5.2.1.6279.6001...
...,...,...,...,...,...,...
726438,1.3.6.1.4.1.14519.5.2.1.6279.6001.898642529028...,-42.265000,92.300000,-566.640000,1,data/subset0/1.3.6.1.4.1.14519.5.2.1.6279.6001...
728007,1.3.6.1.4.1.14519.5.2.1.6279.6001.905371958588...,103.710144,42.348020,-119.266765,1,data/subset0/1.3.6.1.4.1.14519.5.2.1.6279.6001...
728104,1.3.6.1.4.1.14519.5.2.1.6279.6001.905371958588...,110.830000,57.690000,-122.430000,1,data/subset0/1.3.6.1.4.1.14519.5.2.1.6279.6001...
728857,1.3.6.1.4.1.14519.5.2.1.6279.6001.905371958588...,108.537180,48.248923,-120.765895,1,data/subset0/1.3.6.1.4.1.14519.5.2.1.6279.6001...
