# Infer NUCLEI - 1️⃣

--------------

## OBJECTIVE: ✅ Infer sub-cellular component #1: NUCLEI  in order to understand interactome 

To measure shape, position, size, and interaction of eight organelles/cellular components Nuclei (NU).  

Dependencies:
SOMA and CYTOSOL inference rely on the Nuclei inference.  Therefore all of the sub-cellular objects rely on the NU segmentation.





# IMPORTS

In [1]:
# top level imports
from pathlib import Path
import os, sys
from collections import defaultdict

import numpy as np
import scipy

# TODO:  prune the imports.. this is the big set for almost all organelles
# # function for core algorithm
from scipy import ndimage as ndi
import aicssegmentation
from aicssegmentation.core.seg_dot import dot_3d_wrapper, dot_slice_by_slice, dot_2d_slice_by_slice_wrapper, dot_3d
from aicssegmentation.core.pre_processing_utils import ( intensity_normalization, 
                                                         image_smoothing_gaussian_3d,  
                                                         image_smoothing_gaussian_slice_by_slice )
from aicssegmentation.core.utils import topology_preserving_thinning, size_filter
from aicssegmentation.core.MO_threshold import MO
from aicssegmentation.core.utils import hole_filling
from aicssegmentation.core.vessel import filament_2d_wrapper, vesselnessSliceBySlice
from aicssegmentation.core.output_utils import   save_segmentation,  generate_segmentation_contour
                                                 
from skimage import filters
from skimage.segmentation import watershed
from skimage.feature import peak_local_max
from skimage.morphology import remove_small_objects, binary_closing, ball , dilation, remove_small_holes   # function for post-processing (size filter)
from skimage.measure import label

# # package for io 
from aicsimageio import AICSImage

import napari

### import local python functions in ../infer_subc_2d
sys.path.append(os.path.abspath((os.path.join(os.getcwd(), '..'))))

from infer_subc_2d.utils.file_io import (read_input_image, 
                                                                    list_image_files, 
                                                                    export_ome_tiff, 
                                                                    etree_to_dict, 
                                                                    save_parameters, 
                                                                    load_parameters, 
                                                                    export_ndarray)
from infer_subc_2d.utils.img import *

%load_ext autoreload
%autoreload 2

test_img_n = 5

# IMAGE PROCESSING  OBJECTIVE :  infer NUCLEI
 

NOTE:  using Allen Cell Segmenter  [Nucleophosmin](https://www.allencell.org/cell-observations/category/nucleophosmin) might be a good generic mechanism.  e.g.
-  [playground_npm1.ipynb](https://github.com/AllenInstitute/aics-segmentation/blob/master/lookup_table_demo/playground_npm1.ipynb) and [npm1.py](https://github.com/AllenInstitute/aics-segmentation/blob/master/aicssegmentation/structure_wrapper/seg_npm1.py) and [npm1_SR.py](https://github.com/AllenInstitute/aics-segmentation/blob/master/aicssegmentation/structure_wrapper/seg_npm1_SR.py)


> #### Note:  this initial inferred object -- the Nuclei of the brightest cell -- will be used in inferring the Soma and Cytosol objects.   This is a straightforward procedure, but also note that any inconsistencies will flow into the Soma and Cytosol objects which in turn affect ALL inferred objects.


------------------------
# LOAD RAW IMAGE DATA
Identify path to _raw_ image data and load our example image


In [2]:
# build the datapath
# all the imaging data goes here.
# CUSTOMIZE HERE --->
data_root_path = Path(os.path.expanduser("~")) / "Projects/Imaging/data"

# linearly unmixed ".czi" files are here
data_path = data_root_path / "raw"
im_type = ".czi"

# get the list of all files
img_file_list = list_image_files(data_path,im_type)
test_img_name = img_file_list[test_img_n]


In [3]:
Path(os.path.expanduser("~"))

PosixPath('/Users/ahenrie')

In [4]:

bioim_image = read_input_image(test_img_name)
img_data = bioim_image.image
raw_meta_data = bioim_image.raw_meta
ome_types = []
meta_dict = bioim_image.meta


# get some top-level info about the RAW data
channel_names = meta_dict['name']
img = meta_dict['metadata']['aicsimage']
scale = meta_dict['scale']
channel_axis = meta_dict['channel_axis']



  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


In [5]:

# from ome_types import from_tiff

# ome2 = from_tiff(test_img_name)


## CHOOZE Z-SLICE

Lets find the slice with the most overall intensity...

> NOTE:  below we could also load the parameters we saved in [`00_pipeline_setup.ipynb`](00_pipeline_setup.ipynb) rather than recalculating.

In [7]:
load_Z_from_params = False


if load_Z_from_params:

    default_params = load_parameters( test_img_name.split("/")[-1], data_root_path / "intermediate" )

    ch_to_agg = default_params["ch_to_agg"]
    nuc_ch = default_params['nuc_ch']
    optimal_Z = default_params["optimal_Z"] #find_optimal_Z(img_data, nuc_ch, ch_to_agg) 
else:
    ch_to_agg = (1,2,3,4,5,6)
    nuc_ch = 0
    optimal_Z = find_optimal_Z(img_data, nuc_ch, ch_to_agg) 

    default_params = defaultdict(str, **{
        #"intensity_norm_param" : [0.5, 15]
        "intensity_norm_param" : [0],
        "gaussian_smoothing_sigma" : 1.34,
        "gaussian_smoothing_truncate_range" : 3.0,
        "dot_2d_sigma" : 2,
        "dot_2d_sigma_extra" : 1,
        "dot_2d_cutoff" : 0.025,
        "min_area" : 10,
        "low_level_min_size" :  100,
        "median_filter_size" : 4,
        "ch_to_agg" : (1,2,3,4,5,6), # exclude residual
        "nuc_ch" : 0,
        "optimal_Z": optimal_Z,
    })
    save_parameters(default_params, test_img_name.split("/")[-1], data_root_path / "intermediate" )
# make sure we have removed Z
if len(scale)>2:
    scale = scale[1:]



Now get the single "optimal" slice of all our organelle channels....

In [9]:

img_2D = img_data[:,[optimal_Z],:,:].copy()



... and visualize.

In [10]:

viewer = napari.view_image(
    img_2D,
    channel_axis=0,
    name=channel_names,
    scale=scale
)
viewer.scale_bar.visible = True



# IMAGE PROCESSING PROTOTYPE, Objective 1:  infer NUCLEI
 
## details

➡️ INPUT

- channel 0

PRE-PROCESSING
-  scale to min 0, max 1.0
- median Filter window 4
-  gaussian 1.34

CORE-PROCESSING
  - threshold method minimum cross-entropy.  
    - objects 50-400 pixels, 
    - threshold smoothing scale: 1.34 (later 1 pixel
    - threshold correction factor: 0.9 (later 1.2 )
    - lower / upper bounds  (.1,1) ?
    - log transformed thresholding
    - fill holes


POST-PROCESSING
  - fill holes
  - remove small objects


OUTPUT ➡️ 
- labels of NUCLEI


> #### Note:  in later steps we will limit each analysis to a single object, but at this stage we have multiple


DATA IMPORT

Get the "raw" signals we need to analyze as well as any other dependencies in "inferred" objects.  

> NOTE: we are operating on a single "test" image in this notebook.  The batch-processing of all the images will be happen at the end of the notebook after we have developed/confirmed the setmentation procedures and parameter settings.

## PRE- PROCESSING

In [11]:
###################
# INPUT
###################
raw_nuclei = img_2D[0].copy()
# raw_lyso    = imng_2D[1].copy()
# raw_mito    = imng_2D[2].copy()
# raw_golgi   = imng_2D[3].copy()
# raw_peroxi = imng_2D[4].copy()
# raw_ER      = imng_2D[5].copy()
# raw_lipid   = imng_2D[6].copy()
# raw_residual = imng_2D[7].copy()



In [12]:
###################
# PRE_PROCESSING
###################           
# 
# 
gaussian_smoothing_sigma = 1.34
gaussian_smoothing_truncate_range = 3.0
med_filter_size = 4   

struct_img = simple_intensity_normalization(raw_nuclei.copy())

structure_img_median = ndi.median_filter(struct_img, size=med_filter_size)

structure_img_smooth = ndi.gaussian_filter( struct_img, 
                                                                            sigma=gaussian_smoothing_sigma, 
                                                                            mode="nearest", 
                                                                            truncate=gaussian_smoothing_truncate_range)



> #### NOTE: Thresholding
> [Thresholding](https://en.wikipedia.org/wiki/Thresholding_%28image_processing%29) is used to create binary images. A threshold value determines the intensity value separating foreground pixels from background pixels. Foregound pixels are pixels brighter than the threshold value, background pixels are darker. In many cases, images can be adequately segmented by thresholding followed by labelling of *connected components*, which is a fancy way of saying "groups of pixels that touch each other".
> 
> Different thresholding algorithms produce different results. [Otsu's method](https://en.wikipedia.org/wiki/Otsu%27s_method) and [Li's minimum cross entropy threshold](https://scikit-image.org/docs/dev/auto_examples/developers/plot_threshold_li.html) are two common algorithms. Below, we use Li. You can use `skimage.filters.threshold_<TAB>` to find different thresholding methods.

_Li_ procedure  better matches the CellProfiler pipeline which simply calls it "Minimum Cross Entropy" .


## CORE PROCESSING

In [13]:

###################
# CORE_PROCESSING
###################

#tol = max(numpy.min(numpy.diff(numpy.unique(structure_img_median))) / 2, 0.5 / 65536) #assumes 16bit?
#threshold_value = filters.threshold_li(structure_img_smooth)

threshold_value_log = threshold_li_log(structure_img_smooth)

threshold_factor = 0.9 #from cellProfiler
thresh_min = 0.1
thresh_max = 1.0
threshold = min( max(threshold_value_log*threshold_factor, thresh_min), thresh_max)

threshold

0.1

In [14]:

li_thresholded = structure_img_smooth > threshold
li_thresholded.shape

(1, 768, 768)

## POST PROCESSING

> NOTE: the size parameters are by convention defined as one dimensional "width", so the inputs to the functions need to be _squared_ i.e. raised to the power of 2: `** 2`.   For volumetric (3D) analysis this would be _cubed_:`**3`

In [15]:
###################
# POST_PROCESSING
###################

hole_width = 5  
# # wrapper to remoce_small_objects
removed_holes = remove_small_holes(li_thresholded, hole_width ** 2 )

small_object_max = 5

cleaned_img = size_filter_2D(removed_holes, # wrapper to remove_small_objects which can do slice by slice
                                                         min_size= small_object_max ** 2, 
                                                         connectivity=1)


OUTPUT + Visualize Results

In [16]:
NU_object = cleaned_img
NU_labels = label(cleaned_img   )
NU_signal = struct_img

In [17]:

viewer.add_image(
    NU_object,
    scale=scale,
    opacity=0.3,
)    


viewer.add_labels(
    NU_labels,
    scale=scale,
    opacity=0.3,
)



<Labels layer 'NU_labels' at 0x178aacd00>

# DEFINE `_infer_NUCLEI` function

Based on the _prototyping_ above define the function to infer nuclei.  

> NOTE:  although it takes the parameters as input, they are all "hard coded" below, and the function returns the parameters in the same `defaultdict`

In [18]:
# copy this to base.py for easy import

def _infer_NUCLEI(struct_img, in_params) -> tuple:
    """
    Procedure to infer NUCLEI from linearly unmixed input.

    Parameters:
    ------------
    struct_img: np.ndarray
        a 3d image containing the NUCLEI signal

    in_params: dict
        holds the needed parameters (though they are not used)

    Returns:
    -------------
    tuple of:
        object
            mask defined boundaries of NU
        label
            label (could be more than 1)
        signal
            scaled/filtered (pre-processed) flourescence image
        parameters: dict
            updated parameters in case any needed were missing
    
    """
    out_p= in_params.copy()

    ###################
    # PRE_PROCESSING
    ###################                         
    #TODO: replace params below with the input params
    scaling_param =  [0]   
    struct_img = intensity_normalization(struct_img, scaling_param=scaling_param)
    out_p["intensity_norm_param"] = scaling_param

    med_filter_size = 4   
    # structure_img_median_3D = ndi.median_filter(struct_img,    size=med_filter_size  )
    struct_img = ndi.median_filter( struct_img,
                                                         size=med_filter_size  )
    out_p["median_filter_size"] = med_filter_size 

    gaussian_smoothing_sigma = 1.34
    gaussian_smoothing_truncate_range = 3.0
    struct_img = ndi.gaussian_filter( struct_img,
                                                            sigma=gaussian_smoothing_sigma, 
                                                            mode="nearest", 
                                                            truncate=gaussian_smoothing_truncate_range)
    out_p["gaussian_smoothing_sigma"] = gaussian_smoothing_sigma 
    out_p["gaussian_smoothing_truncate_range"] = gaussian_smoothing_truncate_range

    ###################
    # CORE_PROCESSING
    ###################
    struct_obj = struct_img > filters.threshold_li(struct_img)
    threshold_value_log = threshold_li_log(struct_img)

    threshold_factor = 0.9 #from cellProfiler
    thresh_min = .1
    thresh_max = 1.
    threshold = min( max(threshold_value_log*threshold_factor, thresh_min), thresh_max)
    out_p['threshold_factor'] = threshold_factor
    out_p['thresh_min'] = thresh_min
    out_p['thresh_max'] = thresh_max

    struct_obj = struct_img > threshold

    ###################
    # POST_PROCESSING
    ###################
    hole_width = 5  
    # # wrapper to remoce_small_objects
    struct_obj = remove_small_holes(struct_obj, hole_width ** 2 )
    out_p['hole_width'] = hole_width

    small_object_max = 5
    struct_obj = size_filter_2D(struct_obj, 
                                                            min_size= small_object_max**2, 
                                                            connectivity=1)
    out_p['small_object_max'] = small_object_max

    retval = (struct_obj,  label(struct_obj), out_p)
    return retval


---------------------
# TEST `_infer_NUCLEI`  function defined above


##


In [19]:
# test - 9.7 secods to run...
# 
chan_name = 'nuclei'
out_path = data_path / "inferred_objects" 
object_name = 'NU_object'


default_params = load_parameters( test_img_name.split("/")[-1], data_root_path / "intermediate" )

# ch_to_agg = (1,2,3,4,5,6)
# nuc_ch = 0

optimal_Z = default_params["optimal_Z"] #find_optimal_Z(img_data, nuc_ch, ch_to_agg) 

img_2D = img_data[:,[optimal_Z],:,:].copy()
raw_nuclei = img_2D[0].copy()

default_params["ch_to_agg"] =  ch_to_agg
default_params["nuc_ch"]= nuc_ch

_NU_object, _NU_label, _out_p =  _infer_NUCLEI(raw_nuclei.copy(), default_params) 
# NU_object, NU_label, out_p =  _infer_NUCLEI(raw_nuclei.copy(), default_params) 


intensity normalization: min-max normalization with NO absoluteintensity upper bound


In [20]:
viewer.add_image(
    _NU_object,
    scale=scale,
    opacity=0.3,
)    


viewer.add_labels(
    _NU_label,
    scale=scale,
    opacity=0.3,
)


<Labels layer '_NU_label' at 0x178c60d00>

---------------------
# TEST `infer_NUCLEI` exported functions

> the prototype `_infer_NUCLEI` was copied to the [`.organelles.nuclei`](../infer_subc_2d/organelles/nuclei.py) sub-module 
##
`infer_NUCLEI` procedure

Use the `infer_NUCLEI` function to infer the Nucleus and export it as an _ome.tif_ for easy reference.

In [21]:
from infer_subc_2d.organelles.nuclei import infer_NUCLEI

# test - 9.7 secods to run...

default_params = load_parameters( test_img_name.split("/")[-1], data_root_path / "intermediate" )

# ch_to_agg = (1,2,3,4,5,6)
# nuc_ch = 0

optimal_Z = default_params["optimal_Z"] #find_optimal_Z(img_data, nuc_ch, ch_to_agg) 

img_2D = img_data[:,[optimal_Z],:,:].copy()
raw_nuclei = img_2D[0].copy()

default_params["ch_to_agg"] =  ch_to_agg
default_params["nuc_ch"]= nuc_ch

NU_object, NU_label, out_p =  infer_NUCLEI(raw_nuclei.copy(), default_params) 
# NU_object, NU_label, out_p =  _infer_NUCLEI(raw_nuclei.copy(), default_params) 


intensity normalization: min-max normalization with NO absoluteintensity upper bound


In [22]:
viewer.add_image(
    NU_object,
    scale=scale,
    opacity=0.3,
)    


viewer.add_labels(
    NU_labels,
    scale=scale,
    opacity=0.3,
)


<Labels layer 'NU_labels [1]' at 0x17acd6880>

In [23]:
# TODO:  make export ome_tiff export:   XX_object, XX_label, XX_signal
#              also fix Path vs. str action for export wrapper

chan_name = 'nuclei'
out_path = data_root_path / "inferred_objects" 
object_name = 'NU_object'

NU_object_filen = export_ome_tiff(NU_object, meta_dict, object_name, str(out_path)+"/", curr_chan=0)

['NU_object']


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


In [24]:
NU_object_filen
NU_object_filen = export_ndarray(NU_object,  object_name, str(out_path)+"/")


In [25]:
NU_object_filen

'/Users/ahenrie/Projects/Imaging/data/inferred_objects/NU_object.npy'

In [26]:
out_p

defaultdict(str,
            {'intensity_norm_param': [0],
             'gaussian_smoothing_sigma': 1.34,
             'gaussian_smoothing_truncate_range': 3.0,
             'dot_2d_sigma': 2,
             'dot_2d_sigma_extra': 1,
             'dot_2d_cutoff': 0.025,
             'min_area': 10,
             'low_level_min_size': 100,
             'median_filter_size': 4,
             'ch_to_agg': (1, 2, 3, 4, 5, 6),
             'nuc_ch': 0,
             'optimal_Z': 10,
             'threshold_factor': 0.9,
             'thresh_min': 0.1,
             'thresh_max': 1.0,
             'hole_width': 5,
             'small_object_max': 5})