# Infer ***nuclei*** - 2️⃣ 

--------------

## OBJECTIVE: ✅ Infer sub-cellular component #2: ***nuclei***  in order to understand interactome 

To measure shape, position, size, and interaction of eight organelles/cellular components ***nuclei***.  

Dependencies:
***Soma*** and ***cytosol*** inference rely on the ***nuclei*** inference.  Therefore all of the sub-cellular objects rely on the NU segmentation.





# IMPORTS

In [1]:
# top level imports
from pathlib import Path
import os, sys
from collections import defaultdict

import numpy as np
import scipy

# TODO:  prune the imports.. this is the big set for almost all organelles
# # function for core algorithm
from scipy import ndimage as ndi
import aicssegmentation
from aicssegmentation.core.seg_dot import dot_3d_wrapper, dot_slice_by_slice, dot_2d_slice_by_slice_wrapper, dot_3d
from aicssegmentation.core.pre_processing_utils import ( intensity_normalization, 
                                                         image_smoothing_gaussian_3d,  
                                                         image_smoothing_gaussian_slice_by_slice )
from aicssegmentation.core.utils import topology_preserving_thinning, size_filter
from aicssegmentation.core.MO_threshold import MO
from aicssegmentation.core.utils import hole_filling
from aicssegmentation.core.vessel import filament_2d_wrapper, vesselnessSliceBySlice
from aicssegmentation.core.output_utils import   save_segmentation,  generate_segmentation_contour
                                                 
from skimage import filters
from skimage.segmentation import watershed
from skimage.feature import peak_local_max
from skimage.morphology import remove_small_objects, binary_closing, ball , dilation, remove_small_holes   # function for post-processing (size filter)
from skimage.measure import label

# # package for io 
from aicsimageio import AICSImage

import napari

### import local python functions in ../infer_subc_2d
sys.path.append(os.path.abspath((os.path.join(os.getcwd(), '..'))))


from infer_subc_2d.utils.file_io import (read_czi_image,
                                                                    read_input_image, 
                                                                    list_image_files, 
                                                                    export_ome_tiff, 
                                                                    etree_to_dict, 
                                                                    save_parameters, 
                                                                    load_parameters, 
                                                                    export_ndarray)
from infer_subc_2d.utils.img import *

from infer_subc_2d.constants import (TEST_IMG_N,
                                                                    NUC_CH ,
                                                                    LYSO_CH ,
                                                                    MITO_CH ,
                                                                    GOLGI_CH ,
                                                                    PEROXI_CH ,
                                                                    ER_CH ,
                                                                    LIPID_CH ,
                                                                    RESIDUAL_CH )          

from infer_subc_2d.organelles.soma import infer_soma

%load_ext autoreload
%autoreload 2

test_img_n = TEST_IMG_N

# IMAGE PROCESSING  OBJECTIVE :  infer ***nuclei***
 

NOTE:  using Allen Cell Segmenter  [Nucleophosmin](https://www.allencell.org/cell-observations/category/nucleophosmin) might be a good generic mechanism.  e.g.
-  [playground_npm1.ipynb](https://github.com/AllenInstitute/aics-segmentation/blob/master/lookup_table_demo/playground_npm1.ipynb) and [npm1.py](https://github.com/AllenInstitute/aics-segmentation/blob/master/aicssegmentation/structure_wrapper/seg_npm1.py) and [npm1_SR.py](https://github.com/AllenInstitute/aics-segmentation/blob/master/aicssegmentation/structure_wrapper/seg_npm1_SR.py)


> #### Note:  this initial inferred object -- the ***nuclei*** of the brightest cell -- is implicitly used in inferring the ***soma*** and ***cytosol*** objects.  


------------------------
# LOAD RAW IMAGE DATA
Identify path to _raw_ image data and load our example image


In [2]:
# build the datapath
# all the imaging data goes here.
# CUSTOMIZE HERE --->
data_root_path = Path(os.path.expanduser("~")) / "Projects/Imaging/data"

# linearly unmixed ".czi" files are here
data_path = data_root_path / "raw"
im_type = ".czi"

# get the list of all files
img_file_list = list_image_files(data_path,im_type)
test_img_name = img_file_list[test_img_n]


In [3]:
Path(os.path.expanduser("~"))

PosixPath('/Users/ahenrie')

In [4]:
img_data,meta_dict = read_czi_image(test_img_name)


# get some top-level info about the RAW data
channel_names = meta_dict['name']
img = meta_dict['metadata']['aicsimage']
scale = meta_dict['scale']
channel_axis = meta_dict['channel_axis']



  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


In [5]:
# make sure we have removed Z
if len(scale)>2:
    scale = scale[1:]

## CHOOZE Z-SLICE

Lets find the slice with the most overall intensity...

In [6]:
ch_to_agg = ( LYSO_CH ,
                        MITO_CH ,
                        GOLGI_CH ,
                        PEROXI_CH ,
                        ER_CH ,
                        LIPID_CH )
                            
nuc_ch = NUC_CH
optimal_Z = find_optimal_Z(img_data, nuc_ch, ch_to_agg) 




Now get the single "optimal" slice of all our organelle channels....

In [7]:

img_2D = img_data[:,[optimal_Z],:,:].copy()

soma_mask =  infer_soma(img_2D) 

... and visualize.

In [8]:
viewer = napari.Viewer()
viewer.add_image(SO_mask,
    scale=scale)

NameError: name 'SO_mask' is not defined


# IMAGE PROCESSING PROTOTYPE, Objective 2:  infer NUCLEI
 
## details

➡️ INPUT

- channel 0

PRE-PROCESSING
-  scale to min 0, max 1.0
- median Filter window 4
-  gaussian 1.34

CORE-PROCESSING
  - threshold method minimum cross-entropy.  
    - objects 50-400 pixels, 
    - threshold smoothing scale: 1.34 (later 1 pixel
    - threshold correction factor: 0.9 (later 1.2 )
    - lower / upper bounds  (.1,1) ?
    - log transformed thresholding
    - fill holes


POST-PROCESSING
  - fill holes
  - remove small objects


OUTPUT ➡️ 
- labels of NUCLEI


> #### Note:  in later steps we will limit each analysis to a single object, but at this stage we have multiple


DATA IMPORT

Get the "raw" signals we need to analyze as well as any other dependencies in "inferred" objects.  

> NOTE: we are operating on a single "test" image in this notebook.  The batch-processing of all the images will be happen at the end of the notebook after we have developed/confirmed the setmentation procedures and parameter settings.

## PRE- PROCESSING

In [None]:
###################
# INPUT
###################
raw_nuclei = img_2D[NUC_CH].copy()


In [None]:
###################
# PRE_PROCESSING
###################           
# 
nuclei = min_max_intensity_normalization(raw_nuclei )

med_filter_size = 4   
# structure_img_median_3D = ndi.median_filter(struct_img,    size=med_filter_size  )
nuclei = median_filter_slice_by_slice( 
                                                                nuclei,
                                                                size=med_filter_size  )

gaussian_smoothing_sigma = 1.34
nuclei = image_smoothing_gaussian_slice_by_slice(  nuclei,
                                                                                            sigma=gaussian_smoothing_sigma)




> #### NOTE: Thresholding
> [Thresholding](https://en.wikipedia.org/wiki/Thresholding_%28image_processing%29) is used to create binary images. A threshold value determines the intensity value separating foreground pixels from background pixels. Foregound pixels are pixels brighter than the threshold value, background pixels are darker. In many cases, images can be adequately segmented by thresholding followed by labelling of *connected components*, which is a fancy way of saying "groups of pixels that touch each other".
> 
> Different thresholding algorithms produce different results. [Otsu's method](https://en.wikipedia.org/wiki/Otsu%27s_method) and [Li's minimum cross entropy threshold](https://scikit-image.org/docs/dev/auto_examples/developers/plot_threshold_li.html) are two common algorithms. Below, we use Li. You can use `skimage.filters.threshold_<TAB>` to find different thresholding methods.

_Li_ procedure  better matches the CellProfiler pipeline which simply calls it "Minimum Cross Entropy" .


## CORE PROCESSING

In [None]:

###################
# CORE_PROCESSING
###################

threshold_factor = 0.9 #from cellProfiler
thresh_min = .1
thresh_max = 1.
li_thresholded = apply_log_li_threshold(nuclei, threshold_factor=threshold_factor, thresh_min=thresh_min, thresh_max=thresh_max)


## POST PROCESSING

> NOTE: the size parameters are by convention defined as one dimensional "width", so the inputs to the functions need to be _squared_ i.e. raised to the power of 2: `** 2`.   For volumetric (3D) analysis this would be _cubed_:`**3`

In [None]:
###################
# POST_PROCESSING
###################

hole_width = 5  
# # wrapper to remoce_small_objects
#removed_holes = remove_small_holes(li_thresholded, hole_width ** 2 )
removed_holes = hole_filling(li_thresholded, hole_min=0, hole_max=hole_width**2, fill_2d=True)

small_object_width = 15
cleaned_img = size_filter_2D(removed_holes, # wrapper to remove_small_objects which can do slice by slice
                                                         min_size= small_object_width ** 2, 
                                                         connectivity=1)


OUTPUT + Visualize Results

In [None]:
nuclei_object = cleaned_img
NU_labels = label(cleaned_img   )
NU_signal = struct_img

In [None]:

viewer.add_image(
    nuclei_object,
    scale=scale,
    opacity=0.3,
)    


viewer.add_labels(
    NU_labels,
    scale=scale,
    opacity=0.3,
)



# DEFINE `_infer_nuclei` function

Based on the _prototyping_ above define the function to infer nuclei.  

> NOTE:  although it takes the parameters as input, they are all "hard coded" below, and the function returns the parameters in the same `defaultdict`

In [None]:
# copy this to base.py for easy import
##########################
#  _infer_nuclei
##########################
def _infer_nuclei(in_img:  np.ndarray, soma_mask:  np.ndarray) -> np.ndarray:
    """
    Procedure to infer nuclei from linearly unmixed input.

    Parameters:
    ------------
    in_img: np.ndarray
        a 3d image containing all the channels

    soma_mask: np.ndarray
        mask
 
    Returns:
    -------------
    nuclei_object
        mask defined extent of NU
    
    """

    ###################
    # PRE_PROCESSING
    ###################                
    nuc_ch = NUC_CH
    nuclei = _select_channel_from_raw(in_img, nuc_ch)
    # nuclei = min_max_intensity_normalization(in_img[NUC_CH].copy() )

    med_filter_size = 4   
    # structure_img_median_3D = ndi.median_filter(struct_img,    size=med_filter_size  )
    nuclei = median_filter_slice_by_slice( nuclei,
                                                                    size=med_filter_size  )

    gaussian_smoothing_sigma = 1.34
    nuclei = image_smoothing_gaussian_slice_by_slice(  nuclei,
                                                                                                sigma=gaussian_smoothing_sigma )

    ###################
    # CORE_PROCESSING
    ###################
    #struct_obj = struct_img > filters.threshold_li(struct_img)
    threshold_factor = 0.9 #from cellProfiler
    thresh_min = .1
    thresh_max = 1.
    nuclei_object = apply_log_li_threshold(nuclei, threshold_factor=threshold_factor, thresh_min=thresh_min, thresh_max=thresh_max)


    NU_labels = label(nuclei_object)
    ###################
    # POST_PROCESSING
    ###################
    hole_width = 5  
    # # wrapper to remoce_small_objects
    #nuclei_object = remove_small_holes(nuclei_object, hole_width ** 2 )
    nuclei_object = hole_filling(nuclei_object, hole_min=0, hole_max=hole_width**2, fill_2d=True)
    nuclei_object = apply_mask(nuclei_object, soma_mask)

    small_object_width = 15
    nuclei_object = size_filter_2D(nuclei_object, 
                                                                min_size= small_object_width**2, 
                                                                connectivity=1)

    return nuclei_object


---------------------
# TEST `_infer_nuclei`  function defined above


##


In [None]:

_NU_object =  _infer_nuclei(img_2D, SO_mask) 



In [None]:
viewer.add_image(
    _NU_object,
    scale=scale,
    opacity=0.3,
)    


viewer.add_labels(
    label(_NU_object),
    scale=scale,
    opacity=0.3,
)


---------------------
# TEST `infer_nuclei` exported functions

> the prototype `_infer_nuclei` was copied to the [`.organelles.nuclei`](../infer_subc_2d/organelles/nuclei.py) sub-module 
##
`infer_nuclei` procedure

Use the `infer_nuclei` function to infer the Nucleus and export it as an _ome.tif_ for easy reference.

In [None]:
from infer_subc_2d.organelles.nuclei import infer_nuclei

nuclei_object =  infer_nuclei(img_2D, soma_mask) 


In [None]:
viewer.add_image(
    nuclei_object,
    scale=scale,
    opacity=0.3,
)    



In [None]:
from napari.utils.notebook_display import nbscreenshot

# viewer.dims.ndisplay = 3
# viewer.camera.angles = (-30, 25, 120)
nbscreenshot(viewer, canvas_only=True)

-----------------
## make function json to add to `all_functions.json`
### infer_nuclei 

In [34]:
from infer_subc_2d.organelles_config.helper import add_function_spec_to_widget_json

_infer_nuclei =  {
        "name": "infer nuclei",
        "python::module": "infer_subc_2d.organelles",
        "python::function": "infer_nuclei",
        "parameters": None
        }

add_function_spec_to_widget_json("infer_nuclei",_infer_nuclei)

1

In [87]:
_median_filter_slice_by_slice =  {
                "name": "Median Smoothing Slice by Slice",
                "python::module": "infer_subc_2d.utils.img",
                "python::function": "median_filter_slice_by_slice",
                "parameters": {
                    "size": {
                        "widget_type": "slider",
                        "data_type": "int",
                        "min": 1,
                        "max": 20,
                        "increment": 1
                    }
                }
            } 
add_function_spec_to_widget_json("median_filter_slice_by_slice",_median_filter_slice_by_slice)

function median_filter_slice_by_slice is already in all_functions.json


0

In [36]:

    # gaussian_smoothing_sigma = 1.34
    # gaussian_smoothing_truncate_range = 3.0
    # nuclei = image_smoothing_gaussian_slice_by_slice(  nuclei,
    #                                                                                             sigma=gaussian_smoothing_sigma,
    #                                                                                             truncate_range = gaussian_smoothing_truncate_range
    #                                                                                             )
_image_smoothing_gaussian_slice_by_slice = {
        "name": "Gaussian Smoothing Slice by Slice",
        "python::module": "aicssegmentation.core.pre_processing_utils",
        "python::function": "image_smoothing_gaussian_slice_by_slice",
        "parameters": {
            "sigma": {
                "widget_type": "slider",
                "data_type": "float",
                "min": 0.8,
                "max": 20,
                "increment": 0.2
            }
        }
        }

# json.dumps({"image_smoothing_gaussian_slice_by_slice": _image_smoothing_gaussian_slice_by_slice} )
add_function_spec_to_widget_json("image_smoothing_gaussian_slice_by_slice",_image_smoothing_gaussian_slice_by_slice)        


1

In [37]:

    # threshold_factor = 0.9 #from cellProfiler
    # thresh_min = .1
    # thresh_max = 1.
    # nuclei_object = apply_log_li_threshold(nuclei, threshold_factor=threshold_factor, thresh_min=thresh_min, thresh_max=thresh_max)

# WARNING: not a good way to set to None
_apply_log_li_threshold = {
        "name": "threshold log Li",
        "python::module": "infer_subc_2d.utils.img",
        "python::function": "apply_log_li_threshold",
        "parameters": {
            "threshold_factor": {
                "widget_type": "slider",
                "data_type": "float",
                "min": 0.3,
                "max": 1.1,
                "increment": 0.05
            },
            "thresh_min": {
                "widget_type": "slider",
                "data_type": "float",
                "min": 0.0,
                "max": 0.8,
                "increment": 0.01
            },
            "thresh_max": {
                "widget_type": "slider",
                "data_type": "float",
                "min": 0.3,
                "max": 1.0,
                "increment": 0.05
            },
        }
        }

# json.dumps({"apply_log_li_threshold": _apply_log_li_threshold} )
add_function_spec_to_widget_json("apply_log_li_threshold",_apply_log_li_threshold)        


1

In [38]:


    # NU_labels = label(nuclei_object)

_label =  {
        "name": "label objects",
        "python::module": "skimage.measure",
        "python::function": "label",
        "parameters": None
        }
# json.dumps({"label":_label})
add_function_spec_to_widget_json("label",_label)        


1

In [39]:

#  nulei_object = apply_mask(nuclei_object, soma_mask)

_apply_mask=  {
        "name": "label objects",
        "python::module": "infer_subc_2d.utils.img",
        "python::function": "apply_mask",
        "parameters": None
        }
# json.dumps({"apply_mask":_apply_mask})
add_function_spec_to_widget_json("apply_mask",_apply_mask)        


1

In [40]:

    # small_object_width = 45
    # nuclei_object = size_filter_2D(nuclei_object, 
    #                                                             min_size= small_object_width**2, 
    #                                                             connectivity=1)


_size_filter_2D = {
        "name": "Size Filter 2D",
        "python::module": "infer_subc_2d.utils.img",
        "python::function": "size_filter_2D",
        "parameters": {
            "min_size": {
                "widget_type": "slider",
                "data_type": "int",
                "min": 0,
                "max": 500,
                "increment": 1
            }
        }
    }
# json.dumps({  "size_filter_2D":  _size_filter_2D   })

add_function_spec_to_widget_json("size_filter_2D",_size_filter_2D)        


1

## Write workflow .json
Now that we've added our function specs we can compose workflows.

In [94]:
def make_infer_nuclei_dict():
    """
    Procedure to infer nuclei from linearly unmixed input.

    Parameters:
    ------------
    in_img: np.ndarray
        a 3d image containing all the channels

    soma_mask: np.ndarray
        mask

    Returns:
    -------------
    nuclei_object
        mask defined extent of NU

    """
    step_name = []
    function_name = []
    category =[]
    parameter_values = []
    parent = []
   
    ###################
    # EXTRACT
    ###################   
    # struct_img = _raw_soma_MCZ(in_img)
    step_name.append("1")
    function_name.append("extract")
    category.append("preprocessing")
    parameter_values.append(None)
    parent.append(0)

    ###################
    # PRE_PROCESSING
    ###################                         
    # nuclei = min_max_intensity_normalization(in_img[NUC_CH].copy() )
    step_name.append("2")
    function_name.append("min_max_intensity_normalization")
    category.append("preprocessing")
    parameter_values.append(None)
    parent.append(1)

    # size = 4   
    # nuclei = median_filter_slice_by_slice( 
    #                                                                 nuclei,
    #                                                                 size=size  )
    step_name.append("3")
    function_name.append("median_filter_slice_by_slice")
    category.append("preprocessing")
    parameter_values.append(dict(size = 4 ))
    parent.append(2)

    # sigma = 1.34
    # truncate_range = 3.0
    # nuclei = image_smoothing_gaussian_slice_by_slice(  nuclei,
    #                                                                                             sigma=sigma,
    #                                                                                             truncate_range = truncate_range
    #                                                                                             )
    step_name.append("4")
    function_name.append("image_smoothing_gaussian_slice_by_slice")
    category.append("preprocessing")
    parameter_values.append(dict( sigma = 1.34 ))
    parent.append(3)

    ###################
    # CORE_PROCESSING
    ###################
    # threshold_factor = 0.9 
    # thresh_min = .1
    # thresh_max = 1.
    # nuclei_object = apply_log_li_threshold(nuclei, threshold_factor=threshold_factor, thresh_min=thresh_min, thresh_max=thresh_max)
    step_name.append("5")
    function_name.append("apply_log_li_threshold")
    category.append("core")
    parameter_values.append(dict(threshold_factor = 0.9, 
                                                            thresh_min = .1,
                                                            thresh_max = 1.))
    parent.append(4)


    # NU_labels = label(nuclei_object)
    step_name.append("6")
    function_name.append("label")
    category.append("core")
    parameter_values.append(None)
    parent.append(5)


    ###################
    # POST_PROCESSING
    ###################
    # hole_width = 5  
    # nuclei_object = hole_filling(nuclei_object, hole_min=0, hole_max=hole_width**2, fill_2d=True)
    step_name.append("6")
    function_name.append("hole_filling")
    category.append("postprocessing")
    parameter_values.append(dict( hole_min=0, hole_max=5**2, fill_2d=True))
    parent.append(5)

    # # EEEEEK I don't know how to compose where the mask comes from... 
    # nuclei_object = apply_mask(nuclei_object, soma_mask)

    # small_object_width = 15
    # nuclei_object = size_filter_2D(nuclei_object, 
    #                                                             min_size= small_object_width**2, 
    #                                                             connectivity=1)
    step_name.append("7")
    function_name.append("size_filter_2D")
    category.append("postprocessing")
    parameter_values.append(dict( min_size = 15**2  ))
    parent.append(6)

    out_dict = dict()
    for i,stepn in enumerate(step_name):
        entry = dict(category=category[i],
                            function=function_name[i],
                            parameter_values=parameter_values[i],
                            parent=parent[i]
        )
        if entry['parameter_values'] is None:
            _ = entry.pop('parameter_values')
        out_dict[stepn] = entry
        
    return out_dict

In [99]:
def _write_workflow_json(wf_name, wf_dict):

    # read all_functions.json into dict
    # if not wf_name.startswith("conf"):
    #     wf_name = f"conf_{wf_name}"
    path = Directories.get_structure_config_dir() / f"{wf_name}.json"

    # re-write file
    with open(path, "w") as file:
        json.dump(wf_dict, file, indent=4, sort_keys=False)

    return path




In [100]:
# from infer_subc_2d.organelles_config.helper import write_workflow_json

infer_nuclei_dict = make_infer_nuclei_dict()

_write_workflow_json("infer_nuclei", infer_nuclei_dict)

PosixPath('/Users/ahenrie/Projects/Imaging/infer-subc-2D/infer_subc_2d/organelles_config/infer_nuclei.json')

In [84]:
wf_name = "conf_nuclei"
not wf_name.startswith("conf"), wf_name

(False, 'conf_nuclei')

In [66]:
entry['parameter_values'],entry.pop('parameter_values')


({'small_object_width': 2025}, {'small_object_width': 2025})

In [67]:
entry

{'category': 'postprocessing', 'function': 'size_filter_2D', 'parent': 6}

In [None]:
# TODO:  make export ome_tiff export:   XX_object, XX_label, XX_signal
#              also fix Path vs. str action for export wrapper

chan_name = 'nuclei'
out_path = data_root_path / "inferred_objects" 
object_name = 'NU_object'

nuclei_object_filen = export_ome_tiff(nuclei_object, meta_dict, object_name, str(out_path)+"/", curr_chan=0)

In [None]:
nuclei_object_filen
nuclei_object_filen = export_ndarray(nuclei_object,  object_name, str(out_path)+"/")


In [None]:
NU_object_filen

In [None]:
out_p