Agglomeration operators (#125)

* add agglomerate operator * add result image * add change log
seung-lab · Aug 20, 2019 · 3ba448d · 3ba448d
1 parent b4ffeb0
commit 3ba448d
Show file tree

Hide file tree

Showing 6 changed files with 118 additions and 69 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -9,6 +9,7 @@ ChangeLog history
 ## Features
 - neuroglancer operator works with multiple chunks (https://github.com/seung-lab/chunkflow/pull/123)
 - add connected components operator
+- add iterative mean agglomeration operator, including watershed computation.
 
 ## Bug Fixes 
 

diff --git a/chunkflow/flow/agglomerate.py b/chunkflow/flow/agglomerate.py
@@ -0,0 +1,55 @@
+import numpy as np
+from waterz import agglomerate
+
+from chunkflow.chunk import Chunk
+from .base import OperatorBase
+
+
+class AgglomerateOperator(OperatorBase):
+    """Mean/max agglomeration of affinity map including watershed step."""
+    def __init__(self, verbose: bool = True, name: str = 'agglomerate',
+                 threshold: float = 0.7,
+                 aff_threshold_low: float = 0.0001,
+                 aff_threshold_high: float = 0.9999,
+                 scoring_function: str = 'OneMinus<MeanAffinity<RegionGraphType, ScoreValue>>',
+                 flip_channel: bool = True):
+        super().__init__(name=name, verbose=verbose)
+        self.threshold = threshold
+        self.aff_threshold_low = aff_threshold_low
+        self.aff_threshold_high = aff_threshold_high
+        self.scoring_function = scoring_function
+        self.flip_channel = flip_channel
+
+    def __call__(self, affs: np.ndarray, fragments: np.ndarray = None):
+        """
+        Parameters:
+        -----------
+        affs: affinity map with 4 dimensions: channel, z, y, x
+        """
+        if isinstance(affs, Chunk):
+            # the segmentation is 3d, so we only need the zyx
+            global_offset = affs.global_offset[-3:]
+        else:
+            global_offset = None
+
+        # our affinity map channel order is x,y,z!
+        # meaning the first channel is x, the second is y,
+        # the third is z. We have to reverse the channel.
+        if self.flip_channel:
+            affs = np.flip(affs, axis=0)
+
+        # waterz need datatype float32 and 
+        # the array memory layout is contiguous
+        affs = np.ascontiguousarray(affs, dtype=np.float32)
+
+        # the output is a generator, and the computation is delayed
+        seg_generator = agglomerate(
+            affs, [self.threshold], fragments=fragments,
+            aff_threshold_low=self.aff_threshold_low,
+            aff_threshold_high=self.aff_threshold_high,
+            scoring_function=self.scoring_function,
+            force_rebuild=False)
+        # there is only one threshold, so there is also only one result
+        # in generator
+        seg = next(seg_generator)
+        return Chunk(seg, global_offset=global_offset)
diff --git a/chunkflow/flow/flow.py b/chunkflow/flow/flow.py
@@ -1,14 +1,15 @@
 #!/usr/bin/env python
-import click
-from functools import wraps, update_wrapper
+from functools import update_wrapper, wraps
 from time import time
 import numpy as np
+import click
 from cloudvolume.lib import Bbox
 
 from chunkflow.lib.aws.sqs_queue import SQSQueue
 from chunkflow.chunk.segmentation import Segmentation
 
 # import operator functions
+from .agglomerate import AgglomerateOperator
 from .cloud_watch import CloudWatchOperator
 from .create_chunk import CreateChunkOperator
 from .crop_margin import CropMarginOperator
@@ -33,6 +34,7 @@
 # global dict to hold the operators and parameters
 state = {'operators': {}}
 INITIAL_TASK = {'skip': False, 'log': {'timer': {}}}
+DEFAULT_CHUNK_NAME = 'chunk'
 
 
 def handle_task_skip(task, name):
@@ -169,7 +171,47 @@ def fetch_task(queue_name, visibility_timeout):
         yield task
 
 
+@main.command('agglomerate')
+@click.option('--name', type=str, default='create-chunk', help='name of operator')
+@click.option('--threshold', '-t',
+              type=float, default=0.7, help='agglomeration threshold')
+@click.option('--aff-threshold-low', '-l',
+              type=float, default=0.0001, help='low threshold for watershed')
+@click.option('--aff-threshold-high', '-h',
+              type=float, default=0.9999, help='high threshold for watershed')
+@click.option('--fragments-chunk-name', '-f',
+              type=str, default=None, help='optional fragments/supervoxel chunk to use.')
+@click.option('--scoring-function', '-s',
+              type=str, default='OneMinus<MeanAffinity<RegionGraphType, ScoreValue>>',
+              help='A C++ type string specifying the edge scoring function to use.')
+@click.option('--input-chunk-name', '-i',
+              type=str, default=DEFAULT_CHUNK_NAME, help='input chunk name')
+@click.option('--output-chunk-name', '-o',
+              type=str, default=DEFAULT_CHUNK_NAME, help='output chunk name')
+@operator
+def agglomerate(tasks, name, threshold, aff_threshold_low, aff_threshold_high,
+                fragments_chunk_name, scoring_function, input_chunk_name, output_chunk_name):
+    state['operators'][name] = AgglomerateOperator(name=name, verbose=state['verbose'],
+                                                   threshold=threshold, 
+                                                   aff_threshold_low=aff_threshold_low,
+                                                   aff_threshold_high=aff_threshold_high,
+                                                   scoring_function=scoring_function)
+    for task in tasks:
+        if fragments_chunk_name and fragments_chunk_name in task:
+            fragments = task[fragments_chunk_name]
+        else:
+            fragments = None 
+
+        task[output_chunk_name] = state['operators'][name](
+            task[input_chunk_name], fragments=fragments)
+        yield task
+
+
 @main.command('create-chunk')
+@click.option('--name',
+              type=str,
+              default='create-chunk',
+              help='name of operator')
 @click.option('--size',
               type=int,
               nargs=3,
@@ -191,12 +233,12 @@ def fetch_task(queue_name, visibility_timeout):
               default="chunk",
               help="name of created chunk")
 @operator
-def create_chunk(tasks, size, dtype, voxel_offset, output_chunk_name):
+def create_chunk(tasks, name, size, dtype, voxel_offset, output_chunk_name):
     """Create a fake chunk for easy test."""
-    create_chunk_operator = CreateChunkOperator()
+    state['operators'][name] = CreateChunkOperator()
     print("creating chunk: ", output_chunk_name)
     for task in tasks:
-        task[output_chunk_name] = create_chunk_operator(
+        task[output_chunk_name] = state['operators'][name](
             size=size, dtype=dtype, voxel_offset=voxel_offset)
         yield task
 

diff --git a/dev-requirements.txt b/dev-requirements.txt
@@ -0,0 +1,2 @@
+yapf
+flake8
diff --git a/docs/source/_static/image/image_seg.png b/docs/source/_static/image/image_seg.png
diff --git a/docs/source/tutorial.rst b/docs/source/tutorial.rst
@@ -105,68 +105,6 @@ Given a trained convolution network model, it can process small patches of image
 
 In order to provide a general interface for broader application, the ConvNet model should be instantiated, called ``InstantiatedModel``, with all of it's parameter setup inside. Chunkflow also provide a interface for customized preprocessing and postprocessing. You can define ``pre_process`` and ``post_process`` function to add your specialized operations. This is an example of code:
 
-.. code-block:: python
-   
-   def pre_process(input_patch):
-      # we do not need to do anything, 
-      # just transfer input patch to net
-      net_input = input_patch
-      return net_input
-
-   def post_process(net_output):                                
-      # the net output is a list of 5D tensor, 
-      # and there is only one element. 
-      output_patch = net_output[0]
-      # the output patch is a 5D tensor with dimension of batch, channel, z, y, x
-      # there is only one channel, so we drop it.
-      # use narrow function to avoid memory copy. 
-      output_patch = output_patch.narrow(1, 0, 1)
-      # We need to apply sigmoid function to get the softmax result
-      output_patch = torch.sigmoid(output_patch)               
-      return output_patch                                      
-                                                             
-   in_dim = 1                                                   
-   output_spec = OrderedDict(psd_label=1)
-   depth = 3                                                    
-   InstantiatedModel = Model(in_dim, output_spec, depth)        
-
-.. note::
-
-   If you do not define the pre_process and post_process function, it will automatically be replaced as identity function and do not do any transformation.
-
-Synaptic Cleft Detection
-------------------------
-With only one command, you can perform the inference to produce cleft map and visualize it::
-
-   chunkflow read-tif -f "$IMAGE_PATH" -o image inference --convnet-model model.py --convnet-weight-path weight.chkpt --patch-size 18 192 192 --patch-overlap 4 64 64 --framework pytorch --batch-size 6 --bump wu --num-output-channels 1 --mask-output-chunk -i image -o cleft write-tif -i cleft -f cleft.tif neuroglancer -c image,cleft -p 33333 -v 30 6 6
-
-You can see the image with output synapse cleft map:
-
-|cleft|
-
-.. |cleft| image:: _static/image/cleft.png
-
-
-You can also apply a threshold to get a segmentation of the cleft map::
-
-   chunkflow read-tif -f "$IMAGE_PATH" -o image read-tif -f cleft.tif -o cleft connected-components -i cleft -o seg -t 0.1 neuroglancer -p 33333 -c image,seg -v 30 6 6
-
-You should see segmentation overlayed with image:
-
-|cleft_label|
-
-.. |cleft_label| image:: _static/image/cleft_label.png
-
-Of course, you can add a writing operator, such as ``write-tif``, before the ``neuroglancer`` operator to save the segmentation.
-
-Dense Neuron Segmentation
--------------------------
-
-.. note::
-   If there is GPU and cuda available, chunkflow will automatically use GPU for both inference and reweighting.
-
-In order to provide a general interface for broader application, the ConvNet model should be instantiated, called ``InstantiatedModel``, with all of it's parameter setup inside. Chunkflow also provide a interface for customized preprocessing and postprocessing. You can define ``pre_process`` and ``post_process`` function to add your specialized operations. This is an example of code:
-
 .. code-block:: python
    
    def pre_process(input_patch):
@@ -226,16 +164,27 @@ Dense Neuron Segmentation
 
 We used a ConvNet trained using SNEMI3D_ dataset, you can download the data from the website. Then, we can perform boundary detection with one single command:: 
 
-    chunkflow read-tif --file-name path/of/image.tif -o image inference --convnet-model path/of/model.py --convnet-weight-path path/of/weight.pt --patch-size 20 256 256 --patch-overlap 4 64 64 --num-output-channels 3 -f pytorch --batch-size 12 --mask-output-chunk -i image -o aff write-h5 -i aff --file-name aff.h5 neuroglancer -c image,aff -p 33333 -v 30 6 6
+    chunkflow read-tif --file-name path/of/image.tif -o image inference --convnet-model path/of/model.py --convnet-weight-path path/of/weight.pt --patch-size 20 256 256 --patch-overlap 4 64 64 --num-output-channels 3 -f pytorch --batch-size 12 --mask-output-chunk -i image -o affs write-h5 -i affs --file-name affs.h5 neuroglancer -c image,affs -p 33333 -v 30 6 6
 
 .. _SNEMI3D: http://brainiac2.mit.edu/SNEMI3D/home
 
 |image_aff|
 
 .. |image_aff| image:: _static/image/image_aff.png
 
-The boundary map is also saved in ``aff.h5`` file and could be used in later processing.
+The boundary map is also saved in ``affs.h5`` file and could be used in later processing. The affinitymap array axis is ``channel,z,y,x``, and the channel order is ``x,y,z`` for our model output, meaning the first channel is ``x`` direction. 
+
+You can perform mean affinity segmentation with one single command::
+
+   chunkflow read-h5 --file-name affs.h5 -o affs agglomerate --threshold 0.7 --aff-threshold-low 0.001 --aff-threshold-high 0.9999 -i affs -o seg write-tif -i seg -f seg.tif read-tif --file-name image.tif -o image neuroglancer -c image,affs,seg -p 33333 -v 30 6 6
+
+You should be able to see the image, affinity map and segmentation in neuroglancer. Overlay the segmentation with the image looks like this:
+
+|image_seg|
+
+.. |image_seg| image:: _static/image/image_seg.png
 
+If the computation takes too long, you can decrease the ``aff-threshold-high`` to create bigger supervoxels or decrease the ``threshold`` to merge less watershed domains.
 
 Distributed Computation
 ************************