# Infer NUCLEI - 1️⃣

--------------

## OBJECTIVE: ✅ Infer sub-cellular component #1: NUCLEI  in order to understand interactome 

To measure shape, position, size, and interaction of eight organelles/cellular components Nuclei (NU).  

Dependencies:
SOMA and CYTOSOL inference rely on the Nuclei inference.  Therefore all of the sub-cellular objects rely on the NU segmentation.





# IMPORTS

In [1]:
# top level imports
from pathlib import Path
import os, sys
from collections import defaultdict

import numpy as np
import scipy

# # function for core algorithm
from scipy import ndimage as ndi
import aicssegmentation
from aicssegmentation.core.seg_dot import dot_3d_wrapper, dot_slice_by_slice, dot_2d_slice_by_slice_wrapper, dot_3d
from aicssegmentation.core.pre_processing_utils import ( intensity_normalization, 
                                                         image_smoothing_gaussian_3d,  
                                                         image_smoothing_gaussian_slice_by_slice )
from aicssegmentation.core.utils import topology_preserving_thinning
from aicssegmentation.core.MO_threshold import MO
from aicssegmentation.core.utils import hole_filling
from aicssegmentation.core.vessel import filament_2d_wrapper, vesselnessSliceBySlice
from aicssegmentation.core.output_utils import   save_segmentation,  generate_segmentation_contour
                                                 
from skimage import filters
from skimage import morphology
from skimage.segmentation import watershed
from skimage.feature import peak_local_max
from skimage.morphology import remove_small_objects, binary_closing, ball , dilation   # function for post-processing (size filter)
from skimage.measure import label

# # package for io 
from aicsimageio import AICSImage

import napari

### import local python functions in ../infer_subc
sys.path.append(os.path.abspath((os.path.join(os.getcwd(), '..'))))

from infer_subc.utils.file_io import read_input_image, list_image_files, export_ome_tiff
from infer_subc.utils.img import *

%load_ext autoreload
%autoreload 2

from infer_subc.organelles.nuclei import infer_NUCLEI

SyntaxError: invalid syntax (nuclei.py, line 40)

# IMAGE PROCESSING  OBJECTIVE :  infer NUCLEI
 

NOTE:  using Allen Cell Segmenter  [Nucleophosmin](https://www.allencell.org/cell-observations/category/nucleophosmin) might be a good generic mechanism.  e.g.
-  [playground_npm1.ipynb](https://github.com/AllenInstitute/aics-segmentation/blob/master/lookup_table_demo/playground_npm1.ipynb) and [npm1.py](https://github.com/AllenInstitute/aics-segmentation/blob/master/aicssegmentation/structure_wrapper/seg_npm1.py) and [npm1_SR.py](https://github.com/AllenInstitute/aics-segmentation/blob/master/aicssegmentation/structure_wrapper/seg_npm1_SR.py)


> #### Note:  this initial inferred object -- the Nuclei of the brightest cell -- will be used in inferring the Soma and Cytosol objects.   This is a straightforward procedure, but also note that any inconsistencies will flow into the Soma and Cytosol objects which in turn affect ALL inferred objects.


------------------------
# LOAD RAW IMAGE DATA
Identify path to _raw_ image data and load our example image


In [2]:
# build the datapath
# all the imaging data goes here.
# CUSTOMIZE HERE --->
data_root_path = Path(os.path.expanduser("~")) / "Projects/Imaging/data"

# linearly unmixed ".czi" files are here
data_path = data_root_path / "raw"
im_type = ".czi"

# get the list of all files
img_file_list = list_image_files(data_path,im_type)
test_img_name = img_file_list[5]


In [3]:
Path(os.path.expanduser("~"))

PosixPath('/Users/ahenrie')

In [3]:

bioim_image = read_input_image(test_img_name)
img_data = bioim_image.image
raw_meta_data = bioim_image.raw_meta
ome_types = []
meta_dict = bioim_image.meta


# get some top-level info about the RAW data
channel_names = meta_dict['name']
img = meta_dict['metadata']['aicsimage']
scale = meta_dict['scale']
channel_axis = meta_dict['channel_axis']



  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


In [4]:

viewer = napari.view_image(
    img_data,
    channel_axis=0,
    name=channel_names,
    scale=scale
)
viewer.scale_bar.visible = True


Assistant skips harvesting pyclesperanto as it's not installed.



# IMAGE PROCESSING Objective 1:  infer NUCLEI
 
## details

➡️ INPUT

- channel 0

PRE-PROCESSING
-  scale to min 0, max 1.0
- median Filter window 4
-  gaussian 1.34

CORE-PROCESSING
  - threshold method minimum cross-entropy.  
    - objects 50-400 pixels, 
    - threshold smoothing scale: 1.34 (later 1 pixel
    - threshold correction factor: 0.9 (later 1.2 )
    - lower / upper bounds  (.1,1) ?
    - log transformed thresholding
    - fill holes


POST-PROCESSING
  - fill holes
  - remove small objects


OUTPUT ➡️ 
- labels of NUCLEI


> #### Note:  in later steps we will limit each analysis to a single object, but at this stage we have multiple


In [5]:


chan_name = 'nuclei'
out_path = data_path / "inferred_objects" 
object_name = 'NU_object'


In [6]:
##########################################################################
# DEFAULT PARAMETERS:
#   note that these parameters are supposed to be fixed for the structure
#   and work well accross different datasets
from collections import defaultdict
# default_params = defaultdict(str)

default_params = defaultdict(str, **{
    #"intensity_norm_param" : [0.5, 15]
    "intensity_norm_param" : [0],
    "gaussian_smoothing_sigma" : 1.34,
    "gaussian_smoothing_truncate_range" : 3.0,
    "dot_2d_sigma" : 2,
    "dot_2d_sigma_extra" : 1,
    "dot_2d_cutoff" : 0.025,
    "min_area" : 10,
    "low_level_min_size" :  100,
    "median_filter_size" : 4
})


################################

# calculate a filter dimension for median filtering which considers the difference in scale of Z
z_factor = scale[0]//scale[1]
med_filter_size = 4 #2D 
med_filter_size_3D = (1,med_filter_size,med_filter_size)  # set the scale for a typical median filter
print(f"median filtering scale is ~ : { [x*y for x,y in zip(scale,med_filter_size_3D)]}")

default_params['z_factor'] = z_factor
default_params['scale'] = scale


gaussian_smoothing_sigma = 1.34
gaussian_smoothing_truncate_range = 3.0


median filtering scale is ~ : [0.5804527163320905, 0.3194866073934927, 0.3194866073934927]


DATA IMPORT

Get the "raw" signals we need to analyze as well as any other dependencies in "inferred" objects.  

> NOTE: we are operating on a single "test" image in this notebook.  The batch-processing of all the images will be happen at the end of the notebook after we have developed/confirmed the setmentation procedures and parameter settings.

In [7]:


###################
# INPUT
###################
raw_nuclei = img_data[0,:,:,:].copy()
# raw_lyso    = img_data[1,:,:,:].copy()
# raw_mito    = img_data[2,:,:,:].copy()
# raw_golgi   = img_data[3,:,:,:].copy()
# raw_peroxi = img_data[4,:,:,:].copy()
# raw_ER      = img_data[5,:,:,:].copy()
# raw_lipid   = img_data[6,:,:,:].copy()
# raw_residual = img_data[7,:,:,:].copy()
# total_flourescence = intensity_normalization(img_data.copy(), scaling_param=[0]).sum(axis=0)
# total_flourescence_scaled = intensity_normalization(total_flourescence, scaling_param=[0])



## PRE- PROCESSING

In [8]:


###################
# PRE_PROCESSING
###################                         )
struct_img = intensity_normalization(raw_nuclei.copy(), scaling_param=[0])

# structure_img_median_3D = ndi.median_filter(struct_img,    size=med_filter_size  )
# # very little difference in 2D vs 3D
structure_img_median = median_filter_slice_by_slice( 
                                                                struct_img,
                                                                size=med_filter_size  )


structure_img_smooth = image_smoothing_gaussian_slice_by_slice(   structure_img_median,
                                                                                                                        sigma=gaussian_smoothing_sigma,
                                                                                                                        truncate_range=gaussian_smoothing_truncate_range,
                                                                                                                    )


intensity normalization: min-max normalization with NO absoluteintensity upper bound



> #### NOTE: Thresholding
> [Thresholding](https://en.wikipedia.org/wiki/Thresholding_%28image_processing%29) is used to create binary images. A threshold value determines the intensity value separating foreground pixels from background pixels. Foregound pixels are pixels brighter than the threshold value, background pixels are darker. In many cases, images can be adequately segmented by thresholding followed by labelling of *connected components*, which is a fancy way of saying "groups of pixels that touch each other".
> 
> Different thresholding algorithms produce different results. [Otsu's method](https://en.wikipedia.org/wiki/Otsu%27s_method) and [Li's minimum cross entropy threshold](https://scikit-image.org/docs/dev/auto_examples/developers/plot_threshold_li.html) are two common algorithms. Below, we use Li. You can use `skimage.filters.threshold_<TAB>` to find different thresholding methods.

_Li_ procedure  better matches the CellProfiler pipeline which simply calls it "Minimum Cross Entropy" .


## CORE PROCESSING

In [9]:

###################
# CORE_PROCESSING
###################

#tol = max(numpy.min(numpy.diff(numpy.unique(structure_img_median))) / 2, 0.5 / 65536) #assumes 16bit?
#threshold_value = filters.threshold_li(structure_img_smooth)

threshold_value_log = threshold_li_log(structure_img_smooth)

threshold_factor = 0.9 #from cellProfiler
thresh_min = 0.1
thresh_max = 1.0
threshold = min( max(threshold_value_log*threshold_factor, thresh_min), thresh_max)

threshold

0.1

In [10]:


li_thresholded = structure_img_smooth > threshold


## POST PROCESSING

In [11]:
###################
# POST_PROCESSING
###################

hole_width = 5  
# # wrapper to remoce_small_objects
removed_holes = morphology.remove_small_holes(li_thresholded, hole_width ** 3 )


small_object_max = 5
cleaned_img = aicssegmentation.core.utils.size_filter(removed_holes, # wrapper to remove_small_objects which can do slice by slice
                                                         min_size= small_object_max**3, 
                                                         method = "slice_by_slice", #"3D", # 
                                                         connectivity=1)



OUTPUT + Visualize Results

In [12]:
NU_object = cleaned_img
NU_labels = label(cleaned_img   )
NU_signal = struct_img

In [13]:

viewer.add_image(
    NU_object,
    scale=scale,
    opacity=0.3,
)    


viewer.add_labels(
    NU_labels,
    scale=scale,
    opacity=0.3,
)



<Labels layer 'NU_labels' at 0x170987c10>

# DEFINE `infer_NUCLEI` function

In [14]:
# copy this to base.py for easy import

def _infer_NUCLEI(struct_img, in_params) -> tuple:
    """
    Procedure to infer NUCLEI from linearly unmixed input.

    Parameters:
    ------------
    struct_img: np.ndarray
        a 3d image containing the NUCLEI signal

    in_params: dict
        holds the needed parameters

    Returns:
    -------------
    tuple of:
        object
            mask defined boundaries of NU
        label
            label (could be more than 1)
        signal
            scaled/filtered (pre-processed) flourescence image
        parameters: dict
            updated parameters in case any needed were missing
    
    """

    out_p= in_params.copy()


    ###################
    # PRE_PROCESSING
    ###################                         

    #TODO: replace params below with the input params
    scaling_param =  [0]   
    struct_img = intensity_normalization(struct_img, scaling_param=scaling_param)
    out_p["intensity_norm_param"] = scaling_param

    med_filter_size = 4   
    # structure_img_median_3D = ndi.median_filter(struct_img,    size=med_filter_size  )
    struct_img = median_filter_slice_by_slice( 
                                                                    struct_img,
                                                                    size=med_filter_size  )
    out_p["median_filter_size"] = med_filter_size 

    gaussian_smoothing_sigma = 1.34
    gaussian_smoothing_truncate_range = 3.0
    struct_img = image_smoothing_gaussian_slice_by_slice(   struct_img,
                                                                                                        sigma=gaussian_smoothing_sigma,
                                                                                                        truncate_range = gaussian_smoothing_truncate_range
                                                                                                    )
    out_p["gaussian_smoothing_sigma"] = gaussian_smoothing_sigma 
    out_p["gaussian_smoothing_truncate_range"] = gaussian_smoothing_truncate_range

    ###################
    # CORE_PROCESSING
    ###################

    struct_obj = struct_img > filters.threshold_li(struct_img)
    threshold_value_log = threshold_li_log(struct_img)

    threshold_factor = 0.9 #from cellProfiler
    thresh_min = .1
    thresh_max = 1.
    threshold = min( max(threshold_value_log*threshold_factor, thresh_min), thresh_max)
    out_p['threshold_factor'] = threshold_factor
    out_p['thresh_min'] = thresh_min
    out_p['thresh_max'] = thresh_max

    struct_obj = struct_img > threshold

    ###################
    # POST_PROCESSING
    ###################

    hole_width = 5  
    # # wrapper to remoce_small_objects
    struct_obj = morphology.remove_small_holes(struct_obj, hole_width ** 3 )
    out_p['hole_width'] = hole_width


    small_object_max = 5
    struct_obj = aicssegmentation.core.utils.size_filter(struct_obj, # wrapper to remove_small_objects which can do slice by slice
                                                            min_size= small_object_max**3, 
                                                         method = "slice_by_slice", #"3D", # 
                                                            connectivity=1)
    out_p['small_object_max'] = small_object_max


    retval = (struct_obj,  label(struct_obj), out_p)
    return retval


---------------------
Use the `infer_NUCLEI` function to infer the Nucleus and export it as an _ome.tif_ for easy reference.

In [17]:
# test - 9.7 secods to run...


NU_object, NU_label, out_p =  infer_NUCLEI(raw_nuclei.copy(), default_params) 
NU_object, NU_label, out_p =  _infer_NUCLEI(raw_nuclei.copy(), default_params) 


intensity normalization: min-max normalization with NO absoluteintensity upper bound
intensity normalization: min-max normalization with NO absoluteintensity upper bound


In [20]:
# TODO:  make export ome_tiff export:   XX_object, XX_label, XX_signal
#              also fix Path vs. str action for export wrapper

chan_name = 'nuclei'
out_path = data_root_path / "inferred_objects" 
object_name = 'NU_object'

NU_object_filen = export_ome_tiff(NU_object, meta_dict, object_name, str(out_path)+"/", curr_chan=0)

['NU_object']


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


In [35]:
NU_object_filen

'/Users/ahenrie/Projects/Imaging/mcz_subcell/data/inferred_objects/NU_object.ome.tiff'

In [36]:
out_p

defaultdict(str,
            {'intensity_norm_param': [0],
             'gaussian_smoothing_sigma': 1.34,
             'gaussian_smoothing_truncate_range': 3.0,
             'dot_2d_sigma': 2,
             'dot_2d_sigma_extra': 1,
             'dot_2d_cutoff': 0.025,
             'min_area': 10,
             'low_level_min_size': 100,
             'median_filter_size': 4,
             'z_factor': 7.0,
             'scale': (0.5804527163320905,
              0.07987165184837318,
              0.07987165184837318),
             'threshold_factor': 0.9,
             'thresh_min': 0.1,
             'thresh_max': 1.0,
             'hole_width': 5,
             'small_object_max': 5})