# Infer NUCLEI - 2️⃣ 

--------------

## OBJECTIVE: ✅ Infer sub-cellular component #2: NUCLEI  in order to understand interactome 

To measure shape, position, size, and interaction of eight organelles/cellular components Nuclei (NU).  

Dependencies:
SOMA and CYTOSOL inference rely on the Nuclei inference.  Therefore all of the sub-cellular objects rely on the NU segmentation.





# IMPORTS

In [4]:
# top level imports
from pathlib import Path
import os, sys
from collections import defaultdict

import numpy as np
import scipy

# TODO:  prune the imports.. this is the big set for almost all organelles
# # function for core algorithm
from scipy import ndimage as ndi
import aicssegmentation
from aicssegmentation.core.seg_dot import dot_3d_wrapper, dot_slice_by_slice, dot_2d_slice_by_slice_wrapper, dot_3d
from aicssegmentation.core.pre_processing_utils import ( intensity_normalization, 
                                                         image_smoothing_gaussian_3d,  
                                                         image_smoothing_gaussian_slice_by_slice )
from aicssegmentation.core.utils import topology_preserving_thinning, size_filter
from aicssegmentation.core.MO_threshold import MO
from aicssegmentation.core.utils import hole_filling
from aicssegmentation.core.vessel import filament_2d_wrapper, vesselnessSliceBySlice
from aicssegmentation.core.output_utils import   save_segmentation,  generate_segmentation_contour
                                                 
from skimage import filters
from skimage.segmentation import watershed
from skimage.feature import peak_local_max
from skimage.morphology import remove_small_objects, binary_closing, ball , dilation, remove_small_holes   # function for post-processing (size filter)
from skimage.measure import label

# # package for io 
from aicsimageio import AICSImage

import napari

### import local python functions in ../infer_subc_2d
sys.path.append(os.path.abspath((os.path.join(os.getcwd(), '..'))))


from infer_subc_2d.utils.file_io import (read_czi_image,
                                                                    read_input_image, 
                                                                    list_image_files, 
                                                                    export_ome_tiff, 
                                                                    etree_to_dict, 
                                                                    save_parameters, 
                                                                    load_parameters, 
                                                                    export_ndarray)
from infer_subc_2d.utils.img import *

from infer_subc_2d.constants import (TEST_IMG_N,
                                                                    NUC_CH ,
                                                                    LYSO_CH ,
                                                                    MITO_CH ,
                                                                    GOLGI_CH ,
                                                                    PEROXI_CH ,
                                                                    ER_CH ,
                                                                    LIPID_CH ,
                                                                    RESIDUAL_CH )          

from infer_subc_2d.organelles.soma import infer_SOMA

%load_ext autoreload
%autoreload 2

test_img_n = TEST_IMG_N

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# IMAGE PROCESSING  OBJECTIVE :  infer NUCLEI
 

NOTE:  using Allen Cell Segmenter  [Nucleophosmin](https://www.allencell.org/cell-observations/category/nucleophosmin) might be a good generic mechanism.  e.g.
-  [playground_npm1.ipynb](https://github.com/AllenInstitute/aics-segmentation/blob/master/lookup_table_demo/playground_npm1.ipynb) and [npm1.py](https://github.com/AllenInstitute/aics-segmentation/blob/master/aicssegmentation/structure_wrapper/seg_npm1.py) and [npm1_SR.py](https://github.com/AllenInstitute/aics-segmentation/blob/master/aicssegmentation/structure_wrapper/seg_npm1_SR.py)


> #### Note:  this initial inferred object -- the Nuclei of the brightest cell -- will be used in inferring the Soma and Cytosol objects.   This is a straightforward procedure, but also note that any inconsistencies will flow into the Soma and Cytosol objects which in turn affect ALL inferred objects.


------------------------
# LOAD RAW IMAGE DATA
Identify path to _raw_ image data and load our example image


In [6]:
# build the datapath
# all the imaging data goes here.
# CUSTOMIZE HERE --->
data_root_path = Path(os.path.expanduser("~")) / "Projects/Imaging/data"

# linearly unmixed ".czi" files are here
data_path = data_root_path / "raw"
im_type = ".czi"

# get the list of all files
img_file_list = list_image_files(data_path,im_type)
test_img_name = img_file_list[test_img_n]


In [7]:
Path(os.path.expanduser("~"))

PosixPath('/Users/ahenrie')

In [8]:
img_data,meta_dict = read_czi_image(test_img_name)


# get some top-level info about the RAW data
channel_names = meta_dict['name']
img = meta_dict['metadata']['aicsimage']
scale = meta_dict['scale']
channel_axis = meta_dict['channel_axis']



  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


In [9]:
# make sure we have removed Z
if len(scale)>2:
    scale = scale[1:]

## CHOOZE Z-SLICE

Lets find the slice with the most overall intensity...

> NOTE:  below we could also load the parameters we saved in [`00_pipeline_setup.ipynb`](00_pipeline_setup.ipynb) rather than recalculating.

In [10]:
ch_to_agg = ( LYSO_CH ,
                        MITO_CH ,
                        GOLGI_CH ,
                        PEROXI_CH ,
                        ER_CH ,
                        LIPID_CH )
                            
nuc_ch = NUC_CH
optimal_Z = find_optimal_Z(img_data, nuc_ch, ch_to_agg) 




Now get the single "optimal" slice of all our organelle channels....

In [11]:

img_2D = img_data[:,[optimal_Z],:,:].copy()

SO_mask =  infer_SOMA(img_2D) 

... and visualize.

In [20]:
viewer = napari.Viewer()
viewer.add_image(SO_mask,
    scale=scale)

<Image layer 'SO_mask' at 0x1714c4cd0>


# IMAGE PROCESSING PROTOTYPE, Objective 2:  infer NUCLEI
 
## details

➡️ INPUT

- channel 0

PRE-PROCESSING
-  scale to min 0, max 1.0
- median Filter window 4
-  gaussian 1.34

CORE-PROCESSING
  - threshold method minimum cross-entropy.  
    - objects 50-400 pixels, 
    - threshold smoothing scale: 1.34 (later 1 pixel
    - threshold correction factor: 0.9 (later 1.2 )
    - lower / upper bounds  (.1,1) ?
    - log transformed thresholding
    - fill holes


POST-PROCESSING
  - fill holes
  - remove small objects


OUTPUT ➡️ 
- labels of NUCLEI


> #### Note:  in later steps we will limit each analysis to a single object, but at this stage we have multiple


DATA IMPORT

Get the "raw" signals we need to analyze as well as any other dependencies in "inferred" objects.  

> NOTE: we are operating on a single "test" image in this notebook.  The batch-processing of all the images will be happen at the end of the notebook after we have developed/confirmed the setmentation procedures and parameter settings.

## PRE- PROCESSING

In [None]:
###################
# INPUT
###################
raw_nuclei = img_2D[NUC_CH].copy()


In [None]:
###################
# PRE_PROCESSING
###################           
# 
# 
med_filter_size = 4   
# structure_img_median_3D = ndi.median_filter(struct_img,    size=med_filter_size  )
nuclei = median_filter_slice_by_slice( 
                                                                nuclei,
                                                                size=med_filter_size  )

gaussian_smoothing_sigma = 1.34
gaussian_smoothing_truncate_range = 3.0
nuclei = image_smoothing_gaussian_slice_by_slice(  nuclei,
                                                                                            sigma=gaussian_smoothing_sigma,
                                                                                            truncate_range = gaussian_smoothing_truncate_range
                                                                                            )




> #### NOTE: Thresholding
> [Thresholding](https://en.wikipedia.org/wiki/Thresholding_%28image_processing%29) is used to create binary images. A threshold value determines the intensity value separating foreground pixels from background pixels. Foregound pixels are pixels brighter than the threshold value, background pixels are darker. In many cases, images can be adequately segmented by thresholding followed by labelling of *connected components*, which is a fancy way of saying "groups of pixels that touch each other".
> 
> Different thresholding algorithms produce different results. [Otsu's method](https://en.wikipedia.org/wiki/Otsu%27s_method) and [Li's minimum cross entropy threshold](https://scikit-image.org/docs/dev/auto_examples/developers/plot_threshold_li.html) are two common algorithms. Below, we use Li. You can use `skimage.filters.threshold_<TAB>` to find different thresholding methods.

_Li_ procedure  better matches the CellProfiler pipeline which simply calls it "Minimum Cross Entropy" .


## CORE PROCESSING

In [None]:

###################
# CORE_PROCESSING
###################

threshold_factor = 0.9 #from cellProfiler
thresh_min = .1
thresh_max = 1.
li_thresholded = apply_log_li_threshold(nuclei, threshold_factor=threshold_factor, thresh_min=thresh_min, thresh_max=thresh_max)


## POST PROCESSING

> NOTE: the size parameters are by convention defined as one dimensional "width", so the inputs to the functions need to be _squared_ i.e. raised to the power of 2: `** 2`.   For volumetric (3D) analysis this would be _cubed_:`**3`

In [None]:
###################
# POST_PROCESSING
###################

hole_width = 5  
# # wrapper to remoce_small_objects
#removed_holes = remove_small_holes(li_thresholded, hole_width ** 2 )
removed_holes = hole_filling(li_thresholded, hole_min=0, hole_max=hole_width**2, fill_2d=True)

small_object_max = 45
cleaned_img = size_filter_2D(removed_holes, # wrapper to remove_small_objects which can do slice by slice
                                                         min_size= small_object_max ** 2, 
                                                         connectivity=1)


OUTPUT + Visualize Results

In [None]:
NU_object = cleaned_img
NU_labels = label(cleaned_img   )
NU_signal = struct_img

In [None]:

viewer.add_image(
    NU_object,
    scale=scale,
    opacity=0.3,
)    


viewer.add_labels(
    NU_labels,
    scale=scale,
    opacity=0.3,
)



# DEFINE `_infer_NUCLEI` function

Based on the _prototyping_ above define the function to infer nuclei.  

> NOTE:  although it takes the parameters as input, they are all "hard coded" below, and the function returns the parameters in the same `defaultdict`

In [28]:
# copy this to base.py for easy import

def _infer_NUCLEI(in_img, soma_mask) -> tuple:
    """
    Procedure to infer NUCLEI from linearly unmixed input.

    Parameters:
    ------------
    in_img: np.ndarray
        a 3d image containing all the channels

    soma_mask: np.ndarray
        mask
 
    Returns:
    -------------
    nuclei_object
        mask defined extent of NU
    
    """

    ###################
    # PRE_PROCESSING
    ###################                         
    nuclei = min_max_intensity_normalization(in_img[NUC_CH].copy() )

    med_filter_size = 4   
    # structure_img_median_3D = ndi.median_filter(struct_img,    size=med_filter_size  )
    nuclei = median_filter_slice_by_slice( 
                                                                    nuclei,
                                                                    size=med_filter_size  )

    gaussian_smoothing_sigma = 1.34
    gaussian_smoothing_truncate_range = 3.0
    nuclei = image_smoothing_gaussian_slice_by_slice(  nuclei,
                                                                                                sigma=gaussian_smoothing_sigma,
                                                                                                truncate_range = gaussian_smoothing_truncate_range
                                                                                                )

    ###################
    # CORE_PROCESSING
    ###################
    #struct_obj = struct_img > filters.threshold_li(struct_img)
    threshold_factor = 0.9 #from cellProfiler
    thresh_min = .1
    thresh_max = 1.
    NU_object = apply_log_li_threshold(nuclei, threshold_factor=threshold_factor, thresh_min=thresh_min, thresh_max=thresh_max)



    NU_labels = label(NU_object)
    ###################
    # POST_PROCESSING
    ###################
    hole_width = 5  
    # # wrapper to remoce_small_objects
    #NU_object = remove_small_holes(NU_object, hole_width ** 2 )
    NU_object = hole_filling(NU_object, hole_min=0, hole_max=hole_width**2, fill_2d=True)

    small_object_max = 45
    NU_object = size_filter_2D(NU_object, 
                                                                min_size= small_object_max**2, 
                                                                connectivity=1)

    return apply_mask(NU_object, soma_mask)


---------------------
# TEST `_infer_NUCLEI`  function defined above


##


In [29]:

_NU_object =  _infer_NUCLEI(img_2D, SO_mask) 



In [30]:
viewer.add_image(
    _NU_object,
    scale=scale,
    opacity=0.3,
)    


viewer.add_labels(
    label(_NU_object),
    scale=scale,
    opacity=0.3,
)


<Labels layer 'Labels [2]' at 0x17eeba130>

---------------------
# TEST `infer_NUCLEI` exported functions

> the prototype `_infer_NUCLEI` was copied to the [`.organelles.nuclei`](../infer_subc_2d/organelles/nuclei.py) sub-module 
##
`infer_NUCLEI` procedure

Use the `infer_NUCLEI` function to infer the Nucleus and export it as an _ome.tif_ for easy reference.

In [31]:
from infer_subc_2d.organelles.nuclei import infer_NUCLEI

NU_object =  _infer_NUCLEI(img_2D, SO_mask) 


In [32]:
viewer.add_image(
    NU_object,
    scale=scale,
    opacity=0.3,
)    



<Image layer 'NU_object' at 0x17ed6d370>

In [None]:
# TODO:  make export ome_tiff export:   XX_object, XX_label, XX_signal
#              also fix Path vs. str action for export wrapper

chan_name = 'nuclei'
out_path = data_root_path / "inferred_objects" 
object_name = 'NU_object'

NU_object_filen = export_ome_tiff(NU_object, meta_dict, object_name, str(out_path)+"/", curr_chan=0)

In [None]:
NU_object_filen
NU_object_filen = export_ndarray(NU_object,  object_name, str(out_path)+"/")


In [None]:
NU_object_filen

In [None]:
out_p