In [None]:
#setup libraries
import numpy as np
import pandas as pd
import os, re, glob, sys
from skimage import io, morphology, measure
from skimage.registration import phase_cross_correlation
from cellpose import models
from scipy import spatial, ndimage
from PIL import Image
import tifffile

In [12]:
pipeline_name = "CtcK" #cellpose trained cell segmentation, KIT tracking
ch1_name = 'NR'
ch2_name = 'CC'
data_path = '<set to path containing plate-level directories>' 
def in_ipython():
    try:
        return __IPYTHON__
    except NameError:
        return False
if in_ipython():
    cellline = "HCC1143"
    plateID = 'HC00801'
    well_index = 1
    debugging_flag = False
else:
    cellline = sys.argv[3]
    plateID = sys.argv[1]
    well_index = int(sys.argv[2])
    debugging_flag = False

output_path = os.path.join(data_path+plateID,"Analysis",pipeline_name,"intermediate_files/")
registered_stacks_path = os.path.join(data_path+plateID,"Analysis","registered_stacks")
transformation_path = os.path.join(registered_stacks_path,"transformations")
tracking_path = os.path.join(output_path,'tracking/')

if not os.path.exists(output_path):
    os.makedirs(output_path, exist_ok=True)
            
#set up for 24 or 96  well plate, select well using the index
well_lists = (['A' + str(i) for i in list(range(1, 13))],
             ['B' + str(i) for i in list(range(1, 13))],
             ['C' + str(i) for i in list(range(1, 13))],
             ['D' + str(i) for i in list(range(1, 13))],
             ['E' + str(i) for i in list(range(1, 13))],
             ['F' + str(i) for i in list(range(1, 13))],
             ['G' + str(i) for i in list(range(1, 13))],
             ['H' + str(i) for i in list(range(1, 13))])
well_list = [item for sublist in well_lists for item in sublist]
well = well_list[well_index-1]
if os.path.exists(os.path.join(data_path,plateID,well)):
    subdirectories = sorted(glob.glob(os.path.join(data_path,plateID,well,"field_[1-9]")))
else:
    P_registered_stacks = sorted(glob.glob(os.path.join(data_path,plateID,"Analysis","registered_stacks",plateID+"_P_"+well+"*")))
    subdirectories = []
    for stack_name in P_registered_stacks:
        field_str = re.findall("_[1-9]_",stack_name)[0]
        field_num = re.sub("_", "", field_str)
        subdirectories.append(os.path.join(data_path,plateID,well,"field_"+field_num))

if debugging_flag:
    subdirectories = subdirectories[0:1]

#### Register the image stacks
If there is a registered red channel stack skip this step, otherwise:  
Load the red, green and phase images  
Delete images from any time slice that does not have a complete set of images  
Rescale the fluorescent images from 12 to 8 bits  
Calculate the transformations needed to register the red stack, correcting the translation only  
Store the registration transformations  
Use the transformations to register all three stacks  
Save the registered stacks as 16 bit images  


In [13]:

for subdir in subdirectories:
    field = re.findall("field_[1-9]",subdir)[0]
    field_num = re.findall("[0-9]", field)[0]
    reg_filename = os.path.join(registered_stacks_path,plateID+"_R_"+well+"_"+field_num+"_reg_stack.tif")
    # Only process the field-level image files if registered stacks are missing
    if not np.logical_and(np.logical_and(os.path.exists(reg_filename),os.path.exists(reg_filename.replace("_R_", "_P_"))),os.path.exists(reg_filename.replace("_R_", "_G_"))):
        sys.stdout.write("Gathering image stacks for "+subdir+"\n")
        #load and prepare red, green and phase channels. Scale for 8 bits but these are uint16 data types
        r_data_paths = glob.glob(os.path.join(subdir,"*_R_*m.tif"))
        r_time_slices = set()
        for data_paths in r_data_paths:
            r_time_slices.add(re.findall("..d..h..m", data_paths)[0])
        g_data_paths = glob.glob(os.path.join(subdir,"*_G_*m.tif"))
        g_time_slices = set()
        for data_paths in g_data_paths:
            g_time_slices.add(re.findall("..d..h..m", data_paths)[0])
        p_data_paths = glob.glob(os.path.join(subdir,"*_P_*m.tif"))
        p_time_slices = set()
        for data_paths in p_data_paths:
            p_time_slices.add(re.findall("..d..h..m", data_paths)[0])
        complete_time_slices = r_time_slices & g_time_slices & p_time_slices
        r_data_paths_c = []
        g_data_paths_c = []
        p_data_paths_c = []
        for time_slice in complete_time_slices:
            r_data_paths_c.append(os.path.join(data_path+plateID,well,field,plateID+"_R_"+well+"_"+field_num+"_"+time_slice+".tif"))
            g_data_paths_c.append(os.path.join(data_path+plateID,well,field,plateID+"_G_"+well+"_"+field_num+"_"+time_slice+".tif"))
            p_data_paths_c.append(os.path.join(data_path+plateID,well,field,plateID+"_P_"+well+"_"+field_num+"_"+time_slice+".tif"))
        img_r_ic = io.imread_collection(r_data_paths_c) # 3 dimensions : frames x width x height
        img_rs = np.stack(img_r_ic)

        img_g_ic = io.imread_collection(g_data_paths_c) # 3 dimensions : frames x width x height
        img_gs = np.stack(img_g_ic)

        img_p_ic = io.imread_collection(p_data_paths_c) # 3 dimensions : frames x width x height
        img_ps = np.stack(img_p_ic)
        
        #register using skimage skimage.registration phase_cross_correlation
        shifts = np.zeros([img_ps.shape[0], 2])
        img_rs_reg = img_rs
        img_gs_reg = img_gs
        img_ps_reg = img_ps
        sys.stdout.write("calculating shifts"+"\n")
        for i, image in enumerate(img_ps[range(img_ps.shape[0]-1)]): #loop through the first to the second to last image
            shift, error, diffphase = phase_cross_correlation(image, img_ps[i+1], normalization=None, upsample_factor=4)
            #print(str(i)+" shift: "+str(shift))
            shifts[i+1] = shifts[i]+shift #make shifts absolute, based on first image
            #print(str(i)+" absolute shift: "+str(shifts[i+1]))
        sys.stdout.write("shifting image stacks"+"\n")
        for i in range(img_ps.shape[0]):
            img_rs_reg[i] = ndimage.shift(img_rs[i], shifts[i], order=3, mode='constant', cval=0, prefilter=True)
            img_gs_reg[i] = ndimage.shift(img_gs[i], shifts[i], order=3, mode='constant', cval=0, prefilter=True)
            img_ps_reg[i] = ndimage.shift(img_ps[i], shifts[i], order=3, mode='constant', cval=0, prefilter=True)
            
        if not os.path.exists(transformation_path):
            os.makedirs(transformation_path, exist_ok=True)
        np.save(os.path.join(transformation_path,plateID+"_"+well+"_"+field+"_shifts.npy"), shifts)

        #assume transformations are in pixels and crop images to exclude areas that are outside of any registered image
        x_axis_length = img_rs_reg.shape[2]
        y_axis_length = img_rs_reg.shape[1]
        
        #crop each stack to the area that is common in all images after registration
        #use the min and max values in the transformation stack to define the common active area 
        x_max = shifts[:,1].max().astype("int")
        x_min = shifts[:,1].min().astype("int")
        y_max = shifts[:,0].max().astype("int")
        y_min = shifts[:,0].min().astype("int")
        
        #assume transformations are in pixels and crop images to exclude areas that are outside of any registered image
        x_axis_length = img_rs_reg.shape[2]
        y_axis_length = img_rs_reg.shape[1]
        sys.stdout.write("Cropping to active areas for "+subdir+"\n")
        
        img_rs_reg_crop = img_rs_reg[:,np.max([0, y_max]):(y_axis_length+np.min([0,y_min])),np.max([0, x_max]):(x_axis_length+np.min([0,x_min]))]
        img_gs_reg_crop = img_gs_reg[:,np.max([0, y_max]):(y_axis_length+np.min([0,y_min])),np.max([0, x_max]):(x_axis_length+np.min([0,x_min]))]
        img_ps_reg_crop = img_ps_reg[:,np.max([0, y_max]):(y_axis_length+np.min([0,y_min])),np.max([0, x_max]):(x_axis_length+np.min([0,x_min]))]
        sys.stdout.write("Saving stacks to disk for "+subdir+"\n")
        io.imsave(reg_filename, img_rs_reg_crop.astype(np.int16), plugin='tifffile', check_contrast=False)
        io.imsave(reg_filename.replace("_R_","_G_"), img_gs_reg_crop.astype(np.int16), plugin='tifffile', check_contrast=False)
        io.imsave(reg_filename.replace("_R_","_P_"), img_ps_reg_crop.astype(np.int16), plugin='tifffile', check_contrast=False)

#### Segment the phase and nuclear fluorescent images using trained cellpose model  
If mask files already exist, skip this step, otherwise:    
Load the registered phase and red stack nuclear marker images  
Segment cell outlines in the phase image 


In [17]:
model_loaded = False

for subdir in subdirectories:
    results = [] #collect results for one field in the well
    field = re.findall("field_[1-9]",subdir)[0]
    field_num = re.findall("[0-9]", field)[0]
    reg_filename = os.path.join(registered_stacks_path,plateID+"_R_"+well+"_"+field_num+"_reg_stack.tif")
    cell_mask_filename = os.path.join(output_path,plateID+"_"+well+"_"+field_num+"_cell_masks.tif")

    if not os.path.exists(cell_mask_filename): #Only segment if no mask files
        
        if not model_loaded:
            flow_threshold = .4
            cellprob_threshold=0
            c_min_size=400 
            c_max_size = 10000
            resample = True
            # define CHANNELS to run segementation on
            # grayscale=0, R=1, G=2, B=3
            # channels = [cytoplasm, nucleus]
            # if NUCLEUS channel does not exist, set the second channel to 0
            # will use channel R = 1 as nuclear channel only
            c_channels = [2,1]
            n_channels = [0,0]
            # DEFINE CELLPOSE MODELs
            cell_model = models.CellposeModel(gpu=True, pretrained_model = data_path+"cellpose_Ctc_"+cellline+"/train/models/Ctc")
            model_loaded = True
            
        sys.stdout.write("Start segmenting "+plateID + " "+ well + " " + field+"\n")
        img_rs_reg = io.imread(reg_filename)
        img_ps_reg = io.imread(reg_filename.replace('_R_', '_P_'))
        
        if debugging_flag:
            img_rs_reg = img_rs_reg[0:10]
            img_ps_reg = img_ps_reg[0:10]

        cell_mask_images = []

        for i in range(len(img_ps_reg)):
            image = np.stack((img_rs_reg[i], img_ps_reg[i]))

            # create cyto masks with cellpose 
            cell_masks, flows, styles = cell_model.eval(image,
                                                        net_avg = True,
                                                        flow_threshold=flow_threshold,
                                                        cellprob_threshold=cellprob_threshold,
                                                        channels=c_channels)

            #Make a list of the filtered cell mask full size images 
            cell_mask_images.append(cell_masks)
        io.imsave(cell_mask_filename, np.array(cell_mask_images, dtype = 'uint16'), plugin='tifffile', check_contrast=False)

sys.stdout.write("done with cell segmentation for "+plateID+" "+well+" \n")

done with cell segmentation for HC00801 A1 


44

In [18]:
#Copy individual image and mask files to the tracking directories
for subdir in subdirectories:
    field = re.findall("field_[1-9]",subdir)[0]
    field_num = re.findall("[0-9]", field)[0]
    reg_filename = os.path.join(registered_stacks_path,plateID+"_R_"+well+"_"+field_num+"_reg_stack.tif")
    c_mask_filename = os.path.join(output_path,plateID+"_"+well+"_"+field_num+"_cell_masks.tif")
    tracking_path = os.path.join(output_path,'tracking/')
    
    if not os.path.exists(tracking_path+well+"/"+field+"/cell_masks/"):
            os.makedirs(tracking_path+well+"/"+field+"/cell_masks/", exist_ok=True)
    if not os.path.exists(tracking_path+well+"/"+field+"/reg"):
            os.makedirs(tracking_path+well+"/"+field+"/reg", exist_ok=True)
    if not os.path.exists(tracking_path+well+"/"+field+"/results"):
            os.makedirs(tracking_path+well+"/"+field+"/results", exist_ok=True)
    
    #Only write files if image stacks exist and the individual files are not present
    if os.path.exists(reg_filename.replace('_R_', '_P_')):
        if not os.listdir(tracking_path+well+"/"+field+"/reg"):
            sys.stdout.write("copying registered P images to tracking directory"+tracking_path+" "+well+"/"+field+"/reg\n")
            #Use skimage to save individual images in the registered P image stack
            im_s = io.imread(reg_filename.replace('_R_', '_P_'))
            if debugging_flag:
                im_s = im_s[0:10]

            for i in range(len(im_s)):
                io.imsave(tracking_path+well+"/"+field+"/reg/"+"t%03.d.tif"%i, im_s[i].astype(np.int16), plugin='tifffile', check_contrast=False)    # Open the mask stack:
      
    # Open the cell mask stack:
    if os.path.exists(c_mask_filename):
        if not os.listdir(tracking_path+well+"/"+field+"/cell_masks"):
            sys.stdout.write("copying cell masks images to tracking directory"+tracking_path+well+"/"+field+"/cell_masks\n")
            #Use skimage to save individual images in the registered RP image stack
            im_s = io.imread(c_mask_filename)
            if debugging_flag:
                im_s = im_s[0:10]
                
            for i in range(len(im_s)):
                io.imsave(tracking_path+well+"/"+field+"/cell_masks/"+"mask%03.d.tif"%i, im_s[i].astype(np.int16), plugin='tifffile', check_contrast=False)    # Open the mask stack:

#### Track the nuclei  
Use the KIT-Loeffler tracking method to track the nuclei    
The tracking output includes masks with new label values and a res_track.txt file as described below  

In [19]:
sys.stdout.write("start tracking jobs if needed\n")
for subdir in subdirectories: #track all fields in the well
    sys.stdout.write("start tracking jobs for "+subdir+"\n")
    field = re.findall("field_[1-9]",subdir)[0]

    #Condition on the tracking output file res_tracks.txt existing
    if not os.path.exists(tracking_path+well+"/"+str(field)+"/results/res_track.txt"): #Only track if no res_track.txt
        cmd = "python -m run_tracking --image_path "+ tracking_path+well+"/"+str(field)+"/reg/ --segmentation_path "+tracking_path+well+"/"+str(field)+"/cell_masks/ --results_path "+tracking_path+well+"/"+str(field)+"/results --delta_t 4 --default_roi_size 2"
        returned_value = os.system(cmd)  # returns the exit code in unix
        if (returned_value != 0):
            sys.exit(returned_value)


start tracking jobs if needed
start tracking jobs for /home/exacloud/gscratch/HeiserLab/images/HC00801/A1/field_1
start tracking jobs for /home/exacloud/gscratch/HeiserLab/images/HC00801/A1/field_2
start tracking jobs for /home/exacloud/gscratch/HeiserLab/images/HC00801/A1/field_3
start tracking jobs for /home/exacloud/gscratch/HeiserLab/images/HC00801/A1/field_4


#### Identify cells
Read in the tracking results  
res_track.txt - A text file representing an acyclic graph for the whole image sequence. Every line corresponds to a single track that is encoded by four numbers separated by a space:  
L B E P where  
L - a unique label of the track (label of markers, 16-bit positive value)  
B - a zero-based temporal index of the frame in which the track begins  
E - a zero-based temporal index of the frame in which the track ends  
P - label of the parent track (0 is used when no parent is defined)


Filter the track objects keeping the parents and those with a minimum track length and save the results to tracks.csv    

Create a new file tracks.csv with the following columns:  
label - a unique label of the track (label of markers, 16-bit positive value)  
begins - a zero-based temporal index of the frame in which the track begins  
ends - a zero-based temporal index of the frame in which the track ends  
parent - label of the parent track (0 is used when no parent is defined)  
length - The number of frames that the cell is identified in  
plateID - Character string of the plate's ID such as AU02001  
well - Character string of the well such as A1  
field - Integer of the image field within the well  

In [20]:
#set filter parameters
min_track_length = 1
#loop through the results from each segmented field
for subdir in subdirectories:
    field = re.findall("field_[1-9]",subdir)[0]
    res_filename = os.path.join(output_path,"tracking",well,field,"results","res_track.txt")
    res_flt_filename = res_filename.replace("res_track.txt","tracks.csv")
    tracks = pd.read_csv(res_filename, sep=" ", header=None)
    tracks.columns = ["label", "begins", "ends", "parent"]
    tracks['length'] = tracks.ends - tracks.begins + 1
    last_track = tracks.ends.max()
    #check if object is a parent
    tracks["is_parent"] = tracks['label'].isin(tracks['parent'])
    tracks['plateID'] = plateID
    tracks['well'] = well
    tracks['field'] = field.replace("field_","") #only filter if no tracks.csv file
    #If filtered results do not exist, read in the res_track.txt file for the current field
    if not os.path.exists(res_flt_filename):
        #Filter using the filter parameters
        #remove short tracks that are not parents and are not in the last frame
        tracks_flt = tracks.query('length >= @min_track_length or is_parent or ends > (@last_track-@min_track_length)')
        #write out the tracks.csv file 
        tracks_flt.to_csv(res_flt_filename,index=False) 

#### Filter masks to only tracked cells  
Use the filtered tracks to remove masks for non-cell objects  
Save the filtered masks as individual image files in filtered_masks directory   

In [21]:
#use the tracks.csv file to filter out cell masks
sys.stdout.write("check on need to filter cell masks on tracking results for "+plateID+" "+well+" \n")

for subdir in subdirectories:
    field = re.findall("field_[1-9]",subdir)[0]
    mask_track_path = os.path.join(output_path,"tracking",well,field,"results")
    tracked_mask_filenames = sorted(glob.glob(mask_track_path+"/mask*"))
    cell_filtered_path = tracking_path+well+"/"+field+"/cell_filtered_masks/"
    if debugging_flag:
        tracked_mask_filenames = tracked_mask_filenames[0:10]
        
    if not os.path.exists(cell_filtered_path):
        os.makedirs(cell_filtered_path, exist_ok=True)
            
    #condition on whether the filtered cell masks files exist
    if len(os.listdir(cell_filtered_path)) == 0:
        sys.stdout.write("filtering cell masks on tracking results for "+plateID+" "+well+" "+field+" \n")

        #read in the tracks file for this field    
        tracks_filename = os.path.join(output_path,"tracking",well,field,"results","tracks.csv")
        tracks = pd.read_csv(tracks_filename)
        
        #loop through the cell mask images in the field
        for fn in tracked_mask_filenames:
            #read in the cell mask image that tracking has relabeled to be consistent across images
            im_cell = io.imread(fn)
            
            #replace any label that's not a cell based on the tracks file with a 0 value
            cell_labels = np.array([x if x in tracks.label.to_numpy()
                                       else 0 for x in range(0, im_cell.max()+1)])
            im_cell_filtered = cell_labels[im_cell]
 
            io.imsave(fn.replace("results","cell_filtered_masks"), im_cell_filtered.astype(np.int16), plugin='tifffile', check_contrast=False)    
    

check on need to filter cell masks on tracking results for HC00801 A1 


In [22]:
if cellline == "AU565":
    n_diameter = 13
elif cellline == "HCC1143":
    n_diameter = 17
elif cellline == "21MT1":
    n_diameter = 28
elif cellline == "MDAMB157":
    n_diameter = 20
else:
    n_diameter = 17

n_model_loaded = False

#Segment the nuclei using a second cellpose model
#shrink the nuclei masks to better match the biomarkers in the images
#filter the nuclei to those only within the cell masks
#associate the nuclei with the cells based on maximum area within a cell
##count the number of nuclei in each cell
#relabel the nuclei to the same as the cell label
#create a cyto mask by subtracting the nuclei from the cell
for subdir in subdirectories:
    field = re.findall("field_[1-9]",subdir)[0]
    field_num = re.findall("[0-9]", field)[0]
    track_path = os.path.join(output_path,"tracking",well,field)
    nuc_filtered_masks_path = os.path.join(track_path,"nuc_filtered_masks")
    cyto_filtered_masks_path = os.path.join(track_path,"cyto_filtered_masks")
    csv_filename = track_path+"/Nbr_nuclei.csv"
    if not os.path.exists(nuc_filtered_masks_path):
        os.makedirs(nuc_filtered_masks_path, exist_ok=True)
    if not os.path.exists(cyto_filtered_masks_path):
        os.makedirs(cyto_filtered_masks_path, exist_ok=True)
        
    #condition on whether the nuclear and cyto masks exist
    if np.logical_or(len(os.listdir(nuc_filtered_masks_path)) == 0,
                     np.logical_or(len(os.listdir(cyto_filtered_masks_path)) == 0, not os.path.isfile(csv_filename))):
        if not n_model_loaded:
            n_flow_threshold = .4
            n_cellprob_threshold=0
            resample = True
            # define CHANNELS to run segementation on
            # grayscale=0, R=1, G=2, B=3
            # channels = [cytoplasm, nucleus]
            # if NUCLEUS channel does not exist, set the second channel to 0
            n_channels = [0,0]
            # DEFINE CELLPOSE MODELs
            nuc_model = models.Cellpose(gpu=True, model_type='cyto2')
            n_model_loaded = True

        r_reg_filename = os.path.join(registered_stacks_path,plateID+"_R_"+well+"_"+field_num+"_reg_stack.tif")

        sys.stdout.write("Start nuclear segmentation of "+plateID + " "+ well + " " + field+"\n")
        img_rs_reg = io.imread(r_reg_filename)
        #read in the cell masks that have been relabeled and filtered 
        cell_filtered_tracked_masks_filenames = sorted(glob.glob(track_path+"/cell_filtered_masks/mask*"))

        if debugging_flag:
            img_rs_reg = img_rs_reg[0:10]
            
        df_all = pd.DataFrame(columns = ['label','well','field', 'nuclei'])
        for i, image in enumerate(img_rs_reg): #loop for each image
            
            #use the nuclear cellpose model to identify the nuclei within each cell
            n_masks_raw, flows, styles, diams = nuc_model.eval(image,
                                                               diameter=n_diameter,
                                                               net_avg = True,
                                                               flow_threshold=n_flow_threshold,
                                                               cellprob_threshold=n_cellprob_threshold,
                                                               channels=n_channels)
            
            #shrink the masks to better match the biology
            n_masks = morphology.erosion(n_masks_raw, morphology.disk(2))
            
            cell_masks = io.imread(cell_filtered_tracked_masks_filenames[i])

            #use regionprops with the cell masks to get the nuclear labels
            nuc_images = measure.regionprops_table(cell_masks, intensity_image=n_masks,
                                               properties=('label', 'image', 'image_intensity', 'bbox'))   
            #nuc_images is a list of bounding box images 
            #label is the cell label's value
            #image is a 2d logical array of the cell mask in it's bounding box
            #image_intensity is a 2d integer array of the nuclear mask labels in the cell's bounding box
            
            #Use the nuc_images list to create a new full image 2d nuclear mask where the nuclear mask values 
            #are the same as the cell masks they are within
            
            #create a dataframe with cell label, nuc label and nuc area columns where each row is a cell and the information comes from the cell bounding box
            ####ToDo handle images with no nuclear objexcts to evaluate 
            cell_df_list = []
            nbr_nuclei_list = []
            cell_label_list = []
            
            for nuc_i, nuc_mask_labels in enumerate(nuc_images['image_intensity']): 
                #for each cell mask bounding box ...
                #remove nuclear masks that are outside the cell mask
                nuc_mask_labels[~nuc_images['image'][nuc_i]] = 0
                
                #get the nuclei mask values inside each cell mask
                #need to handle these cases
                #nuclei are in more than one cell - assign to the cell with the most area
                #after nuclei are only in one cell...
                #no nuclei in the cell - delete cell mask
                #one nuclei in the cell - relabel the nuclei to the cell
                #more than one nuclei in the cell - relabel all nuclei to the cell
                
                unique_nuc_labels = np.unique(nuc_mask_labels[nuc_mask_labels != 0])
                #print("nuclear label(s) "+str(unique_nuc_labels)+" are in cell "+str(nuc_images['label'][nuc_i]))
                #Keep track of the unique number of nuceli in each cell
                
                #for each mask, get the number of pixels inside the cell
                if(unique_nuc_labels.size != 0):
                    nuc_masks, counts = np.unique([element for element in nuc_mask_labels.ravel() if element != 0], return_counts=True)
                    cell_df = pd.DataFrame({'Cell': nuc_images['label'][nuc_i], 'Nuc_mask': nuc_masks, 'area':counts})
                    cell_df_list.append(cell_df)

            #build a new nuclei mask image with nuclei clipped to within one cell and using the cell's label 
            final_nuc_mask_image = np.zeros(n_masks.shape, like = n_masks).astype(int)
            
            if(len(cell_df_list) >0): #update final nuc mask if there nuclei detected in the image
                cell_df = pd.concat(cell_df_list, ignore_index = True)
            
                #use the cell dataframe to assign nuclei to cells based on max area in the cell
                cell_df = cell_df.sort_values(['area'],ascending=False).groupby('Nuc_mask').head(1).sort_index(ignore_index=True)

                nbr_nuclei_list = []
                cell_label_list = []
                for cell_i, nuc_mask_image in enumerate(nuc_images['image_intensity']): #loop through each cell mask
                    #don't process the nuclei if it's not within a cell
                    if(nuc_images['label'][cell_i] in cell_df['Cell'].to_numpy()):
                        #clip the nuclei to within the cell mask
                        nuc_mask_image[~nuc_images['image'][cell_i]] = 0
                        #zero out nuclei not associated with this cell
                        #find the nuclei associated with this cell
                        associated_nuclei = cell_df['Nuc_mask'][cell_df['Cell'] == nuc_images['label'][cell_i]].to_numpy()
                        nuc_associated_mask = np.isin(nuc_mask_image, associated_nuclei)
                        nuc_mask_image[~nuc_associated_mask] = 0

                        #Keep track of the unique number of nuclei in each cell
                        nbr_nuclei_list.append(len(associated_nuclei))
                        cell_label_list.append(nuc_images['label'][cell_i])

                        #Relabel nuclei associated with this cell to the cell's label
                        nuc_mask_image[nuc_associated_mask] = nuc_images['label'][cell_i]

                        #Use the bounding box origin to add the nuclei to the final mask
                        min_row = nuc_images['bbox-0'][cell_i]
                        min_col = nuc_images['bbox-1'][cell_i]
                        max_row = nuc_images['bbox-2'][cell_i]
                        max_col = nuc_images['bbox-3'][cell_i]
                        #place the labels into the correct place in the final image
                        #do not 0 out the existing values where there are 0's
                        final_nuc_mask_image[min_row:max_row,min_col:max_col] = final_nuc_mask_image[min_row:max_row,min_col:max_col] + nuc_mask_image

            io.imsave(cell_filtered_tracked_masks_filenames[i].replace("cell_","nuc_"), final_nuc_mask_image, plugin='tifffile', check_contrast=False)
            io.imsave(cell_filtered_tracked_masks_filenames[i].replace("cell_","cyto_"), cell_masks-final_nuc_mask_image, plugin='tifffile', check_contrast=False)
            if(np.logical_and(len(cell_label_list) >0,len(nbr_nuclei_list) >0)): 
                #save the number of nuclei in each cell to disk
                df = pd.DataFrame(list(zip(cell_label_list, nbr_nuclei_list)),columns =['label', 'nuclei'])
                df['slice'] = str(i)
                df_all = pd.concat([df_all, df], ignore_index = True)
        df_all['well'] = well
        df_all['field'] = field_num
        df_all.to_csv(csv_filename, index = False)

                

#### Get excel metadata file  
If this file does not exists, create a level 0 file that is data only  

In [23]:
#If the metadata exists, load it
metadata_filename = os.path.join(data_path,plateID,"metadata",plateID+".xlsx")

if os.path.exists(metadata_filename):
    md_all = pd.read_excel(metadata_filename, engine='openpyxl', dtype={'Drug1Concentration': str, 'Drug2Concentration': str})
    
    #remove unwanted columns read in from the excel files
    r = re.compile("Unnamed.*")
    columns_to_drop = list(filter(r.match, md_all.columns)) 
    metadata = md_all.drop(columns = columns_to_drop)
    
    #match metadata and data well labels format
    metadata['row'] = [re.sub(r'[0-9]*', '', Well) for Well in metadata['Well']]
    metadata['column'] = [re.sub(r'[A-Z]', '', Well) for Well in metadata['Well']]
    metadata['column'] = [re.sub(r'\A0', '', row) for row in metadata['column']]
    metadata['well'] = metadata['row'] + metadata['column']
    

#### Pull data from images  
Apply the filtered masks to the registered red nuclear channel and recored each cells nuclear morphology, intensity and texture  
Create cytoplasm masks by expanding the nuclear masks by a fixed amount or until they collide with another nuclear expansion  
Use the nuclear and cytoplasmic masks to measure intensities in the green fluroescent images  
Calculate intensity ratios between the cytomplasm and nuclei  
If the metadata is available, merge it with the cell level data  
Store the cell level data (and metadata) in a csv file where each row is a cell  
Data feature values can be decoded as follows:  
\<compartment>\_\<pipeline name>\_\<channel name>\_\<regionprops name>  
compartment - Nuclei, Cyto or Cell  
pipeline name - PC for python cellpose or other if added  
channel name - NR for nuclear reporter, CC for cell cycle reporter or others if added  
regionprops name - label passed through from the skimage measure.regionprops function https://scikit-image.org/docs/dev/api/skimage.measure.html#skimage.measure.regionprops  



In [24]:
minutes_between_images = 120
neighborhood_nuclei_distance = 5
neighborhood_radius_near = 20
neighborhood_radius_medium = 45
neighborhood_radius_far = 70
ratio_box_side_length = 10

#loop through the fields in the well
for subdir in subdirectories:
    field = re.findall("field_[1-9]",subdir)[0]
    field_num = re.findall("[0-9]", field)[0]
    l0_filename = os.path.join(data_path+plateID,"Analysis",pipeline_name,plateID+"_"+well+"_"+field+"_level_0.csv")
    ratio_stack_filename = os.path.join(output_path,plateID+"_"+well+"_"+field_num+"_ratios.tif")

    #condition on whether the l1 and l2 files exist
    if not os.path.exists(l0_filename.replace('level_0','level_1')):
        sys.stdout.write("Pulling data from images "+l0_filename.replace('_level_0.csv','')+"\n")
        cyto_filtered_mask_path = os.path.join(output_path,"tracking",well,field,"cyto_filtered_masks")
        nuc_filtered_mask_path = os.path.join(output_path,"tracking",well,field,"nuc_filtered_masks")
        cyto_tracked_mask_filenames = sorted(glob.glob(cyto_filtered_mask_path+"/mask*"))
        img_gs_reg = io.imread(os.path.join(registered_stacks_path,plateID+"_G_"+well+"_"+field.replace("field_","")+"_reg_stack.tif"))
        # iterate over the mask files
        results = []
        ratio_image_list = []
        for i, fn in enumerate(cyto_tracked_mask_filenames):
            #read in the cyto mask image
            cyto_masks = io.imread(fn)
            #read in the nuclear mask image
            nuc_masks = io.imread(fn.replace("cyto_","nuc_"))
            #read in registered cell cycle images
            #reg_fn = fn.replace("filtered_masks","reg") #registered phase
            image = img_gs_reg[i]

            #measure cell cycle reporter intensity and nuclear morphology, texture
            cyto = measure.regionprops_table(cyto_masks, intensity_image=image,
                                               properties=('label',
                                                           'area','bbox_area','convex_area','centroid','eccentricity','equivalent_diameter','extent','feret_diameter_max','filled_area',
                                                            'major_axis_length','minor_axis_length','moments_hu','perimeter','perimeter_crofton','solidity',
                                                            'mean_intensity','max_intensity','min_intensity'))
            nuc = measure.regionprops_table(nuc_masks, intensity_image=image,
                                   properties=('label',
                                               'area','bbox_area','convex_area','centroid','eccentricity','equivalent_diameter','extent','feret_diameter_max','filled_area',
                                                'major_axis_length','minor_axis_length','moments_hu','perimeter','perimeter_crofton','solidity',
                                                'mean_intensity','max_intensity','min_intensity'))

            # turn results into a dataframe
            cyto_data = pd.DataFrame(cyto)
            cyto_data.rename(columns={col: 'Cyto_'+ch2_name+'_'+col  for col in cyto_data.columns if col not in ['label']}, inplace=True)

            nuc_data = pd.DataFrame(nuc)
            nuc_data.rename(columns={col: 'Nuclei_'+ch2_name+'_'+col  for col in nuc_data.columns if col not in ['label']}, inplace=True)

            # recover the well and field values and add them to the dataframe
            well = re.findall('/[A-Z][0-9]+/',fn)[0]
            well = re.sub('/','', well)
            cyto_data['well'] = well
            field = re.findall('field_[0-9]+',fn)[0]
            field_num = int(re.sub('field_','', field))
            cyto_data['field'] = field_num
            cyto_data['slice'] = i
            cyto_data['elapsed_minutes'] = i*minutes_between_images #assumes time slice numbering starts at 1
            elapsed_minutes = i*minutes_between_images #assumes time slice numbering starts at 1
            day = np.floor(elapsed_minutes/(24*60)).astype(int)
            hour = np.floor((elapsed_minutes-day*(24*60))/60).astype(int)
            minute = np.floor(elapsed_minutes-day*(24*60)-hour*60).astype(int)
            day = str(day).zfill(2)
            hour = str(hour).zfill(2)
            minute = str(minute).zfill(2)
            cyto_data['time_slice'] = day+"d"+hour+"h"+minute+"m"

            #calculate the neighborhood density on the nuclei centroids ###TODO replace with a measure of cytoplasm
            nuc_kd = spatial.KDTree(nuc_data[['Nuclei_CC_centroid-0','Nuclei_CC_centroid-1']])
            nuc_data['neighborhood_'+str(neighborhood_radius_near)] = nuc_kd.query_ball_point(nuc_data[['Nuclei_CC_centroid-0','Nuclei_CC_centroid-1']],
                                                                                                    r = neighborhood_radius_near, return_sorted = True, return_length=True)
            nuc_data['neighborhood_'+str(neighborhood_radius_medium)] = nuc_kd.query_ball_point(nuc_data[['Nuclei_CC_centroid-0','Nuclei_CC_centroid-1']],
                                                                                                    r = neighborhood_radius_medium, return_sorted = True, return_length=True)
            nuc_data['neighborhood_'+str(neighborhood_radius_far)] = nuc_kd.query_ball_point(nuc_data[['Nuclei_CC_centroid-0','Nuclei_CC_centroid-1']],
                                                                                                    r = neighborhood_radius_far, return_sorted = True, return_length=True)
                        
            #merge the dataframes from the different channels, retain all cells even if there is no nuclear mask
            df_all = pd.merge(cyto_data, nuc_data, how="left", on=["label"])
            
            #Calculate ratio of ch2 cyto to nuclei intensities
            df_all['Cell_'+ch2_name+'_mean_intensity_ratio'] = df_all['Cyto_'+ch2_name+'_mean_intensity']/df_all['Nuclei_'+ch2_name+'_mean_intensity']
            df_all['Cell_'+ch2_name+'_max_intensity_ratio'] = df_all['Cyto_'+ch2_name+'_max_intensity']/df_all['Nuclei_'+ch2_name+'_max_intensity']
            df_all['Cell_'+ch2_name+'_min_intensity_ratio'] = df_all['Cyto_'+ch2_name+'_min_intensity']/df_all['Nuclei_'+ch2_name+'_min_intensity']

            #create an image with a small rectangle of the mean intensity ratio centered on each nuclei
            ratio_image = np.zeros_like(image)
            for i , ratio in enumerate(df_all['Cell_'+ch2_name+'_mean_intensity_ratio']):
                if not np.isnan(ratio):
                    center_y = df_all['Nuclei_CC_centroid-0'][i]
                    center_x = df_all['Nuclei_CC_centroid-1'][i]
                    x_start = (center_x - ratio_box_side_length/2).astype(int)
                    x_start = np.clip(x_start, 0, image.shape[1])
                    x_end = (center_x + ratio_box_side_length/2).astype(int)
                    x_end = np.clip(x_end, 0, image.shape[1])
                    y_start = (center_y - ratio_box_side_length/2).astype(int)
                    y_start = np.clip(y_start, 0, image.shape[0])
                    y_end = (center_y + ratio_box_side_length/2).astype(int)
                    y_end = np.clip(y_end, 0, image.shape[0])
                    #clip positions to within image
                    ratio_image[y_start:y_end, x_start:x_end] = int((ratio*100))

            # append this ratio_images to the list
            ratio_image_list.append(ratio_image)
            # append this image's dataframe to the results list
            results.append(df_all)
            
        #save the ratio stack to disk
        io.imsave(ratio_stack_filename, np.array(ratio_image_list, dtype = 'uint16'), plugin='tifffile', check_contrast=False)

        #concatenate all of the results from all images in the field
        l0_image = pd.concat(results)

        #join with the tracking results to get lineage, parent, frame length values
        tracks_filename = os.path.join(output_path,"tracking",well,"field_"+str(field_num),"results/tracks.csv")
        tracks = pd.read_csv(tracks_filename)
        l0_tracks = pd.merge(l0_image, tracks, how="left", on=["label", "well", "field"])
        
        #join with the nuclei counts
        nuc_count_filename = os.path.join(output_path,"tracking",well,"field_"+str(field_num),"Nbr_nuclei.csv")
        nuc_counts = pd.read_csv(nuc_count_filename)
        l0 = pd.merge(l0_tracks, nuc_counts, how="left", on=["label", "well", "field", "slice"])

        if os.path.exists(metadata_filename):
            #merge data and metadata on well values
            l1= pd.merge(l0, metadata, how="left", on=["well"]).round(decimals=2)
            l1['treatment'] =  l1['Drug1']+'_'+l1['Drug1Concentration']+'_'+l1['Drug2']+'_'+l1['Drug2Concentration']
            sys.stdout.write("Writing "+l0_filename.replace('level_0','level_1') + " to disk"+"\n")
            l1.to_csv(l0_filename.replace('level_0','level_1'), index = False)
        else:
            sys.stdout.write("no metadata file for "+plateID+" so creating level 0 file"+"\n")
            l0 = l0.round(decimals=2)
            l0.to_csv(l0_filename, index = False)