In [None]:
# Creates binary masks for all CT scans based on labeled GTVs (clinician-reviewed lung tumors)

In [None]:
# Import Statements
import trimesh
import numpy as np
import re
import SimpleITK as sitk
import matplotlib.pyplot as plt
import math
from numpy import savez_compressed
import datetime
import random

# Paths
labeled_GTVs_csv = "" #path to CSV file of labeled GTVs (generated by clinician review using web-app)
path_to_plans = "" #path to folder with CT and stl files
savedGTVs_file = "" #path to txt file containing information (i.e. centroid, name) for all GTVs for which cross sectional images were created and reveiwed by clinician (generated with save_GTV_images script)
save_location = "" #path to save location for numpy array masks of lung tumors  

# Seed
random.seed(30)

In [None]:
def readCT(image_file):
    # Read CT image and attributes
    ct = sitk.ReadImage(image_file, sitk.sitkFloat32)
    ct_array = sitk.GetArrayFromImage(ct)
    
    # Read CT information
    ct_dimensions = ct.GetSize()
    ct_spacings = ct.GetSpacing()
    ct_offset = ct.GetOrigin()
    
    x_offset = ct_offset[0]
    y_offset = ct_offset[1]
    z_offset = ct_offset[2]
    
    x_spacing = ct_spacings[0]
    y_spacing = ct_spacings[1]
    z_spacing = ct_spacings[2]
    
    # Prepare axial, coronal, sagittal aspect ratios
    z_aspect = x_spacing/y_spacing
    x_aspect = x_spacing/z_spacing
    y_aspect = y_spacing/z_spacing
    
    return ct_dimensions, [x_offset, y_offset, z_offset], [x_spacing, y_spacing, z_spacing], [x_aspect, y_aspect, z_aspect], ct_array

In [None]:
# CT class contains – pt id, ct id, ct spacing information, list of eligible GTVs with lung-tumor label, and binary numpy tumor mask
class CT:
    
    def __init__(self, pt = None, ct = None, ct_info = None, GTVs = None, mask = None):
        
        self.pt = pt
        self.ct = ct
        self.ct_info = ct_info #ct_dimensions, [x_offset, y_offset, z_offset], [x_spacing, y_spacing, z_spacing], [x_aspect, y_aspect, z_aspect]
        self.lung_tumor_GTVs = GTVs # list of GTV objects
        self.mask = mask

# GTV class objects have following attributes: name of GTV (including volume number) and saved centroid from GTV_image script
class GTV:
    
    def __init__(self, name = None, saved_centroid = None):
        
        self.name = name
        self.saved_centroid = saved_centroid

        
# Get saved centroid information 
#(this is necessary to check that GTV meshes is being loaded and segmented in same way as when screening GTV images were created)
def saved_Centroids():
    
    saved_centroids = {}
    
    with open(savedGTVs_file, 'r') as filehandle:
        for line in filehandle:
            line = line[:-1]
            #print(line)
            centroid = re.search('array\(\[(.*)\]\)',line).group(1)
            centroid = centroid.replace(" ","")
            centroid = centroid.split(",")
            
            name = line.split(",")[-1].replace(" '", "").replace("']","")
            saved_centroids[name] = centroid
            
    return saved_centroids


In [None]:
# Get the CT scans and corresponding GTV plans that are eligible for creating tumor mask
# (i.e. Plan must have at least one GTV segment that has been lung tumor)
# For each CT that has at least on eligible GTV segment, this creates a CT object with array of all eligible GTVs (lung_tumor_GTVs)
def eligible_CTs_GTVs():
    
    with open(labeled_GTVs_csv) as f:
        
        eligible_plans = []
        curr_CT = ''
        CT_obj = ''
        
        for labeled_image in f:
            
            if "lung-tumor" in labeled_image:
                
                pt_ct = labeled_image.split("__")[0] + "__" + labeled_image.split("__")[1]
                
                if pt_ct != curr_CT:
                    
                    if CT_obj != '':
                        eligible_plans.append(CT_obj)
                    
                    CT_obj = CT(pt = labeled_image.split("__")[0], 
                                ct = labeled_image.split("__")[1],
                                GTVs = [])
                    GTV_name = labeled_image.split(",")[0]
                    GTV_obj = GTV(name = GTV_name)
                    CT_obj.lung_tumor_GTVs.append(GTV_obj)
                
                else:
                    GTV_name = labeled_image.split(",")[0]
                    GTV_obj = GTV(name = GTV_name)
                    CT_obj.lung_tumor_GTVs.append(GTV_obj)
                
                curr_CT = pt_ct
        
        eligible_plans.append(CT_obj) # for the last CT object with eligible GTV plan
        
    return eligible_plans #list of CT objects that have eligible GTV plans

In [None]:
# Create tumor_mask for CT 
def create_Mask(CT_obj):
    
    GTVs = CT_obj.lung_tumor_GTVs
    
    # Create empty numpy binary array
    dim = CT_obj.ct_info[0]
    mask = np.zeros(dim)
    
    # Create list of masks for all bodies (will be used for determining overlapping bodies)
    masks = []
    
    # CT information
    z_spacing = CT_obj.ct_info[2][2]
    x_scale = z_spacing/CT_obj.ct_info[2][0]
    y_scale = z_spacing/CT_obj.ct_info[2][1]
    
    scaled_x_offset = CT_obj.ct_info[1][1]*x_scale
    scaled_y_offset = CT_obj.ct_info[1][0]*y_scale
    z_offset = CT_obj.ct_info[1][2]
    
    
    for gtv in GTVs:
        
        vol_num = int(re.search("__vol(.*).png",gtv.name).group(1))-1
        
        gtv_file = '__'.join(gtv.name.split("__")[2:-1])
        
        body = GTV_body(CT_obj.pt, CT_obj.ct, gtv_file, vol_num)
        
        cent = [str(i) for i in body.centroid]
        cent = [round(float(i),3) for i in cent]
        saved_centroid = [round(float(i),3) for i in gtv.saved_centroid]
        
        if cent != saved_centroid:
            raise Exception("GTV body centroid doesn't match with saved centroid")
        
        # Transform body with CT specific spacing and create mask
        scaled_body = body.apply_scale((x_scale,y_scale,1))
        voxelized_body = scaled_body.voxelized(pitch=z_spacing)
        voxelized_body = voxelized_body.fill()
        body_matrix = voxelized_body.matrix
        body_matrix = np.rot90(np.swapaxes(body_matrix,0,1),2)
        body_matrix = body_matrix.astype(int)
        
        x_lower_bound = int(np.round((-scaled_body.bounds[1][1]-scaled_x_offset)/z_spacing))
        y_lower_bound = int(np.round((-scaled_body.bounds[1][0]-scaled_y_offset)/z_spacing))
        z_lower_bound = int(np.round((scaled_body.bounds[0][2]-z_offset)/z_spacing))
        
        current_mask = np.zeros(dim)
        current_mask[x_lower_bound:x_lower_bound+body_matrix.shape[0],y_lower_bound:y_lower_bound+body_matrix.shape[1],z_lower_bound:z_lower_bound+body_matrix.shape[2]] = body_matrix
        
        masks.append(current_mask)
        
    # Check for and remove overlapping masks (e.g. when two different RT plans exist for same tumor) 
    masks = unique_Masks(masks)
    for curr_mask in masks:
        mask = np.maximum(curr_mask,mask)

    return mask
     
# Get all segments/bodies in GTV plan 
# process the exact same way as Save_GTV_images_stanford script – so that volume numbers in labeled GTVs correspond correctly
def GTV_body(pt, ct, GTV_name, vol_num): 
    
    GTV_path = path_to_plans + pt + "/" + ct + "/" + GTV_name + ".stl"
    
    GTV = trimesh.load(GTV_path)
    
    #Split GTV into discrete bodies
    gtv_bodies = GTV.split()
    
    # Check volume of bodies
    # Determine if volume of body is < 65 mm^3 (screen out contours around lesions with diameter of less than 5mm)
    for body in gtv_bodies:
        if body.volume < 65:
            gtv_bodies = np.delete(gtv_bodies, np.argwhere(gtv_bodies==body))
    
    # Check if no. of bodies > 9 - throw error b/c these should not be there
    if len(gtv_bodies) > 9:
        raise Exception("More than 9 bodies")
    
    gtv_body = gtv_bodies[vol_num]    
    
    return gtv_body

# Check for overlapping masks
# For all groups of overlapping masks (DICE > 0.5), pick 1 mask at random
def unique_Masks(masks):
    i = 0
    screened = [] 
    to_remove = [] # Overlapping GTVs that will be removed
    while i < len(masks):
        if i not in screened:

            j = i + 1

            overlaps = [i]
            while j < len(masks):
                
                intersection = np.logical_and(masks[i], masks[j])
                dice = 2. * intersection.sum() / (masks[i].sum() + masks[j].sum())
                
                if dice > 0.5:
                    print("dice exceeded 0.5!: " + str(dice))
                    overlaps.append(j)
                j += 1
            
            keep = random.choice(overlaps) # choose 1 GTV (from overlapping GTVs) to keep, discard others
            discarded_overlaps = [n for n in overlaps if n != keep]

            to_remove += discarded_overlaps
            screened += overlaps
        
        i +=1

    for index in sorted(to_remove, reverse=True):
        del(masks[index])
    
    return masks

In [None]:
# Create Masks and save as compressed numpy array files
saved_centroids = saved_Centroids()
CTs = eligible_CTs_GTVs()

for CT_obj in CTs[1262:]:
    
    # Get CT info
    ct_info = list(readCT(path_to_plans + "/" + CT_obj.pt + "/" + CT_obj.ct + "/" + CT_obj.ct + ".mhd"))
    CT_obj.ct_info = ct_info
    
    # Add saved centroid information
    for gtv in CT_obj.lung_tumor_GTVs:
        gtv.saved_centroid = saved_centroids[gtv.name]
    
    # Create Mask
    tumor_mask = create_Mask(CT_obj)
    
    # Save Mask
    savez_compressed(str(save_location + CT_obj.pt + "_" + CT_obj.ct),tumor_mask)