### Format and register images for ilastik segmentation

#### Details
    Load a sequence of red channel tiffs
    Subtract the background using a rolling ball filter with a radius of 25 pixels
    Auto correct the brightnes and contrast
    
    Load the corresponding phase channel tiffs
    
    Merge the two channels into a single image sequence, either RGB or 2 channel composite
    
    Register the merged images
    Save the registered images in HDF5 format
    Save the transformation matrices
    
  Generalize to run on all image sequences in a plate

In [1]:
#setup libraries
import pandas as pd
import numpy as np
import os, re, glob, shutil
from skimage import io, util, filters, morphology, exposure, color, measure, segmentation
import numpy as np
from pystackreg import StackReg
import matplotlib.pyplot as plt
from matplotlib import colors


def random_cmap():
    np.random.seed(42)
    cmap = colors.ListedColormap (np.random.rand(256,4))
    # value 0 should just be transparent
    cmap.colors[:,3] = 0.5
    cmap.colors[0,:] = 1
    cmap.colors[0,3] = 0
    
    # if image is a mask, color (last value) should be red
    cmap.colors[-1,0] = 1
    cmap.colors[-1,1:3] = 0
    return cmap

# load random colormap
#from course_functions import random_cmap
cmap = random_cmap()

data_path = '/home/exacloud/gscratch/HeiserLab/images/'
plateID = 'AU00601_subset'
well = 'C4'
field = '3'

data_paths =glob.glob(os.path.join(data_path+plateID+'/'+well+'/','field_'+field+'/*_R_*m.tif'))
df = pd.DataFrame(data_paths, columns =['path']).sort_values(by='path', ignore_index = True)

use collection method to read in our image files
then combine red, green and phase into an RGB file
save file and test in ilastik to see if it matches Sean's format

for path_name in df['path']:
    path_name_p = str.replace(path_name, '_R_', '_P_')
    path_name_g = str.replace(path_name, '_R_', '_G_')
    
    # load the image of the red nuclear channel and run a smoothing filter
    image_r = io.imread(path_name)
    image_rf = filters.median(image_r, selem=morphology.disk(2))
    
    # load the phase image for display purposes
    image_p = io.imread(path_name_p)

    #Combine phase and nuclear images with nuceli in R 1 channel and phase in G 2 channel
    image = np.dstack([image_rf, image_p])


In [117]:
data_path = '/home/exacloud/gscratch/HeiserLab/images/'
plateID = 'AU00601'
well = 'C4'
field = 'field_3'
flourescent_scaler = 255/4095 #rescale from 12 to 8 bits

#load and prepare red, green and phase channels. Scale for 8 bits but these are uint16 data types
r_data_paths = glob.glob(os.path.join(data_path+plateID+'/'+well+'/',field+'/',"*_R_*m.tif"))
img_r_ic = io.imread_collection(r_data_paths) # 3 dimensions : frames x width x height
img_rs = np.stack(img_r_ic)*flourescent_scaler

g_data_paths = glob.glob(os.path.join(data_path+plateID+'/'+well+'/',field+'/',"*_G_*m.tif"))
img_g_ic = io.imread_collection(g_data_paths) # 3 dimensions : frames x width x height
img_gs = np.stack(img_g_ic)*flourescent_scaler

p_data_paths = glob.glob(os.path.join(data_path+plateID+'/'+well+'/',field+'/',"*_P_*m.tif"))
img_p_ic = io.imread_collection(p_data_paths) # 3 dimensions : frames x width x height
img_ps = np.stack(img_p_ic)

img_c = np.stack([img_rs.astype('B'), img_gs.astype('B'), img_ps.astype('B')], axis = -1) 

#Filter and change contrast on each red image in the stack
#img_rfs = np.empty_like(img_rs)
#for pln, image in enumerate(img_rs):
    # Iterate over the leading dimension
    # Contrast stretching
#    p2, p98 = np.percentile(image, (.2, 99.8))
#    image_rescale = exposure.rescale_intensity(image, in_range=(p2, p98))
#    img_rfs[pln] = filters.gaussian(image_rescale, sigma = .4, preserve_range=True)

#sr = StackReg(StackReg.TRANSLATION)

# register each frame using the red images
#reg_reference = 'previous'
#tmats = sr.register_stack(img_rs, reference=reg_reference,axis=0)
#img_rs_reg = sr.transform_stack(img_rs, tmats=tmats)
#img_gs_reg = sr.transform_stack(img_gs, tmats=tmats)
#img_ps_reg = sr.transform_stack(img_ps, tmats=tmats) 

#combine into a 3 channel inage and change types to unsigned 8 bit integer
#img_c_reg = np.stack([img_rs_reg.astype('B'), img_gs_reg.astype('B'), img_ps_reg.astype('B')], axis = -1) 

#red_scaler = 0.7
#red_multiplier = [red_scaler, 0, 0]

#img_p = color.gray2rgb(img_ps_reg.astype(int))
#img_r = color.gray2rgb(exposure.rescale_intensity(img_rfs_reg, out_range=np.uint8))
#img_c = (red_multiplier*img_r+img_p)/(1+red_scaler)
#img_c = img_r*[.67,0,0]+img_p*[.33, .33, .33]
#img_rp = color.gray2rgb(img_c.astype(int))

#r_filename = data_path+plateID+'/'+well+'/'+field+'/registered_stacks/'+well+'_'+field+'_R_'+reg_reference+'.tif'
#g_filename = data_path+plateID+'/'+well+'/'+field+'/registered_stacks/'+well+'_'+field+'_G_'+reg_reference+'.tif'
#p_filename = data_path+plateID+'/'+well+'/'+field+'/registered_stacks/'+well+'_'+field+'_P_'+reg_reference+'.tif'
#rp_filename = data_path+plateID+'/'+well+'/'+field+'/registered_stacks/'+well+'_'+field+'_RP_'+reg_reference+'.tif'
c_filename = data_path+plateID+'/'+well+'/'+field+'/output_stacks/'+well+'_'+field+'_RGP_stack.tif'
#c_reg_filename = data_path+plateID+'/'+well+'/'+field+'/registered_stacks/'+well+'_'+field+'_RGP_'+reg_reference+'.tif'

if not os.path.exists(data_path+plateID+'/'+well+'/'+field+'/output_stacks/'):
        os.makedirs(data_path+plateID+'/'+well+'/'+field+'/output_stacks/')

#io.imsave(r_filename, img_rfs_reg, plugin='tifffile')
#io.imsave(g_filename, img_gs_reg, plugin='tifffile')
#io.imsave(p_filename, img_ps_reg, plugin='tifffile')
#io.imsave(rp_filename, img_c.astype(int), plugin='tifffile', bigtiff=False)
io.imsave(c_filename, img_c, plugin='tifffile')
#io.imsave(c_reg_filename, img_c_reg, plugin='tifffile')

#np.save(data_path+plateID+'/'+well+'/'+field+'/registered_stacks/'+plateID+'_'+well+'_'+field+'_R_'+reg_reference+'_transformation_matrices.npy', tmats)

In [138]:
#from skimage import color
#from skimage import img_as_float
#red_scaler = 0.7
#red_multiplier = [red_scaler, 0, 0]

#img = color.gray2rgb(img_ps_reg[0].astype(int))
#img_r = color.gray2rgb(exposure.rescale_intensity(img_rfs_reg[0], out_range=np.uint8))
#img_c = (red_multiplier*img_r+img)/(1+red_scaler)

#fig, ax1 = plt.subplots(ncols=1, figsize=(64, 32),
#                              sharex=True, sharey=True)
#ax1.imshow(img_c[0,:,:,:],cmap = 'gray')

The next pipeline step is to run ilastik on the 3 channel files and output pixel masks for each image.

Instead of using Cellprofiler, convert the pixel masks to nuclei masks and apply them to the green images

In [118]:
#load the pixel masks
pixel_data_paths = glob.glob(os.path.join(data_path+plateID+'/'+well+'/',field+'/','registered_stacks/development_dataset/'+well+'_'+field+'_RGP_*.tif'))
img_pixel_ic = io.imread_collection(pixel_data_paths) # 3 dimensions : frames x width x height
img_pixel_stack = np.stack(img_pixel_ic)

In [163]:
pipeline_name = 'PI' # python + ilastik
ch1_name = 'NR'
ch2_name = 'CC'
cyto_expansion = 5
minimum_nuclear_radius = 3
minimum_nuclear_area = 3.14*minimum_nuclear_radius**2

nuclei_masks_raw = img_pixel_stack == 4 # set nuclei pixels to True and the rest to False
results = []
for img_num, image in enumerate(nuclei_masks_raw[0:1,:,:]):
    # open masks to delete small regions
    nuclei_masks_open = morphology.binary_opening(image, selem=morphology.disk(2))     
    
    # label the masks with unique integers starting at 0
    nuclei_masks_all = measure.label(nuclei_masks_open)

    nuclei_g = measure.regionprops_table(nuclei_masks_all, intensity_image = img_gs[img_num], properties=('label', 'area','eccentricity',
                                                                                                                      'mean_intensity','max_intensity','min_intensity'))
    #remove masks too small to be a nucleus
    indices_to_keep = np.array([x if x-1 in np.argwhere(nuclei_g['area']>minimum_nuclear_area)
                                else 0 for x in range(nuclei_g['label'].max()+1)])
    nuclei_masks = indices_to_keep[nuclei_masks_all]
    
    #expand the masks to get cytoplasmic regions
    nuclei_boundaries = segmentation.find_boundaries(nuclei_masks, mode='thick')*nuclei_masks
    nuclei_expansions = segmentation.expand_labels(nuclei_masks, cyto_expansion) - nuclei_masks + nuclei_boundaries
    nuclei_exp_g = measure.regionprops_table(measure.label(nuclei_expansions), intensity_image = img_gs[img_num],
                                                 properties=('label','mean_intensity','max_intensity','min_intensity'))

    # turn results into a dataframe
    nuclei_g_data = pd.DataFrame(nuclei_g)
    nuclei_g_data.rename(columns={col: 'Nuclei_'+pipeline_name+'_' +ch2_name+'_'+col  for col in nuclei_g_data.columns if col not in ['label']}, inplace=True)
    

    nuclei_exp_g_data = pd.DataFrame(nuclei_exp_g)
    nuclei_exp_g_data.rename(columns={col: 'Cyto_'+pipeline_name+'_' +ch2_name+'_'+col  for col in nuclei_exp_g_data.columns if col not in ['label']}, inplace=True)
    
    # add an image number and collect the data                                                                                                             
    nuclei_g_data['image'] = img_num+1
    #Calculate ratio of ch2 cyto to nuclei intensities
    nuclei_exp_g_data['Cell_'+pipeline_name+'_' +ch2_name+'_mean_intensity_ratio'] = nuclei_exp_g_data['Cyto_'+pipeline_name+'_' +ch2_name+'_mean_intensity']/nuclei_g_data['Nuclei_'+pipeline_name+'_' +ch2_name+'_mean_intensity']
    nuclei_exp_g_data['Cell_'+pipeline_name+'_' +ch2_name+'_max_intensity_ratio'] = nuclei_exp_g_data['Cyto_'+pipeline_name+'_' +ch2_name+'_max_intensity']/nuclei_g_data['Nuclei_'+pipeline_name+'_' +ch2_name+'_max_intensity']
    nuclei_exp_g_data['Cell_'+pipeline_name+'_' +ch2_name+'_min_intensity_ratio'] = nuclei_exp_g_data['Cyto_'+pipeline_name+'_' +ch2_name+'_min_intensity']/nuclei_g_data['Nuclei_'+pipeline_name+'_' +ch2_name+'_min_intensity']

    #concatenate the dataframes
    df_all = pd.concat([nuclei_g_data, nuclei_exp_g_data], axis=1, join="outer")
    results.append(df_all)
    
    #Save mask image
    mask_filename = data_path+plateID+'/'+well+'/'+field+'/output_stacks/'+well+'_'+field+'_image'+str(img_num)+'_nuclei_masks.tif'
    io.imsave(mask_filename, nuclei_masks.astype('uint16'))
    cyto_mask_filename = data_path+plateID+'/'+well+'/'+field+'/output_stacks/'+well+'_'+field+'_image'+str(img_num)+'_cyto_masks.tif'
    io.imsave(cyto_mask_filename, nuclei_expansions.astype('uint16'))
    



In [153]:
#concatenate all of the results
all_results = pd.concat(results)
all_results

Unnamed: 0,label,Nuclei_PI_CC_area,Nuclei_PI_CC_eccentricity,Nuclei_PI_CC_mean_intensity,Nuclei_PI_CC_max_intensity,Nuclei_PI_CC_min_intensity,image,label.1,Cyto_PI_CC_mean_intensity,Cyto_PI_CC_max_intensity,Cyto_PI_CC_min_intensity,Cell_PI_CC_mean_intensity_ratio,Cell_PI_CC_max_intensity_ratio,Cell_PI_CC_min_intensity_ratio
0,1,34,0.908856,98.613553,112,89,1,1.0,51.346614,79.0,26.0,0.520685,0.705357,0.292135
1,2,68,0.800255,41.157509,46,35,1,2.0,87.992156,112.0,32.0,2.137937,2.434783,0.914286
2,3,41,0.624072,41.173323,43,38,1,3.0,31.397788,40.0,25.0,0.762576,0.930233,0.657895
3,4,94,0.533412,37.437495,39,34,1,4.0,35.994938,42.0,27.0,0.961468,1.076923,0.794118
4,5,65,0.544949,71.764046,79,61,1,5.0,33.218808,37.0,28.0,0.462889,0.468354,0.459016
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
324,325,62,0.608257,23.749380,25,22,192,,,,,,,
325,326,116,0.482275,25.847859,27,24,192,,,,,,,
326,327,172,0.793262,31.631165,35,28,192,,,,,,,
327,328,82,0.620630,30.746717,35,25,192,,,,,,,


mainpath = '../../../images/'+plateID+'/Analysis/CL/'
if not os.path.exists(mainpath):
    os.makedirs(mainpath, exist_ok=True)
all_results.to_csv(mainpath+plateID+'_'+well+'_level_0.csv')

results = []
#Segment using the nuclear signal in the grayscale image from the red channel

# create an index that allows us to go through each pair of images
for path_name in df['path'][0:1]:
    path_name_p = str.replace(path_name, '_R_', '_P_')
    path_name_g = str.replace(path_name, '_R_', '_G_')
    
    # load the image of the red nuclear channel and run a smoothing filter
    image_r = io.imread(path_name)
    image_rf = filters.median(image_r, selem=morphology.disk(2))
    
    # load the phase image for display purposes
    image_p = io.imread(path_name_p)

    # load the green image for display purposes
    image_g = io.imread(path_name_g)

    #Combine phase and nuclear images with nuceli in R 1 channel and phase in G 2 channel
    #image = np.dstack([image_rf, image_p])

    # create masks with cellpose 
    masks, flows, styles, diams = model.eval(image_rf, diameter=diameter, flow_threshold=flow_threshold, cellprob_threshold=cellprob_threshold, channels=channels, min_size=min_size, resample = resample)

    #measure reporter intensity and nuclear morphology, texture
    nuclei = measure.regionprops_table(measure.label(masks), intensity_image=image_r,
                                           properties=('label',
                                                       'area','bbox_area','convex_area','centroid','eccentricity','equivalent_diameter','extent','feret_diameter_max','filled_area',
                                                        'major_axis_length','minor_axis_length','moments_hu','perimeter','perimeter_crofton','solidity',
                                                        'mean_intensity','max_intensity','min_intensity'))
    #Save mask image
    mask_filename = path_name.replace(".tif", "_masks.png")
    io.imsave(mask_filename, masks)
    
    #expand the masks to get cytoplasmic regions
    nuclei_boundaries = segmentation.find_boundaries(masks, mode='thick')*masks
    nuclei_expansions = segmentation.expand_labels(masks, cyto_expansion) - masks + nuclei_boundaries
    
    # measure nuclear and cytoplasmic intensities and textures in the green channel
    nuclei_g = measure.regionprops_table(measure.label(masks), intensity_image=image_g,
                                           properties=('label',
                                                        'mean_intensity','max_intensity','min_intensity'))
    nuclei_exp_g = measure.regionprops_table(measure.label(nuclei_expansions), intensity_image=image_g,
                                                 properties=('label',
                                                             'mean_intensity','max_intensity','min_intensity'))
    
    # turn results into a dataframe
    nuclei_data = pd.DataFrame(nuclei)
    nuclei_data.rename(columns={col: 'Nuclei_'+pipeline_name+'_' +ch1_name+'_'+col  for col in nuclei_data.columns if col not in ['label']}, inplace=True)
   
    nuclei_g_data = pd.DataFrame(nuclei_g)
    nuclei_g_data.rename(columns={col: 'Nuclei_'+pipeline_name+'_' +ch2_name+'_'+col  for col in nuclei_g_data.columns if col not in ['label']}, inplace=True)
    

    nuclei_exp_g_data = pd.DataFrame(nuclei_exp_g)
    nuclei_exp_g_data.rename(columns={col: 'Cyto_'+pipeline_name+'_' +ch2_name+'_'+col  for col in nuclei_exp_g_data.columns if col not in ['label']}, inplace=True)
       
    # recover the well and field values and add them to the dataframe
    well = re.findall('_[A-Z][0-9]+_',path_name)[0]
    well = re.sub('_','', well)
    nuclei_data['well'] = well
    field = re.findall('_[0-9]+_',path_name)[0]
    field = re.sub('_','', field)
    nuclei_data['field'] = field
    time_slice = re.findall('[a-z0-9]*.tif',path_name)[0]
    time_slice = re.sub('.tif','', time_slice)
    nuclei_data['time'] = time_slice
    
    #Calculate ratio of ch2 cyto to nuclei intensities
    nuclei_exp_g_data['Cell_'+pipeline_name+'_' +ch2_name+'_mean_intensity_ratio'] = nuclei_exp_g_data['Cyto_'+pipeline_name+'_' +ch2_name+'_mean_intensity']/nuclei_g_data['Nuclei_'+pipeline_name+'_' +ch2_name+'_mean_intensity']
    nuclei_exp_g_data['Cell_'+pipeline_name+'_' +ch2_name+'_max_intensity_ratio'] = nuclei_exp_g_data['Cyto_'+pipeline_name+'_' +ch2_name+'_max_intensity']/nuclei_g_data['Nuclei_'+pipeline_name+'_' +ch2_name+'_max_intensity']
    nuclei_exp_g_data['Cell_'+pipeline_name+'_' +ch2_name+'_min_intensity_ratio'] = nuclei_exp_g_data['Cyto_'+pipeline_name+'_' +ch2_name+'_min_intensity']/nuclei_g_data['Nuclei_'+pipeline_name+'_' +ch2_name+'_min_intensity']

    #concatenate the dataframes from the different channels
    df_all = pd.concat([nuclei_data, nuclei_g_data, nuclei_exp_g_data], axis=1, join="outer")
    # append the dataframe to the results list
    results.append(df_all)
    print("processing "+path_name)

In [154]:
mainpath = '../../../images/'+plateID+'/Analysis/PI/'
if not os.path.exists(mainpath):
    os.makedirs(mainpath, exist_ok=True)
all_results.to_csv(mainpath+plateID+'_'+well+'_level_0.csv')