dipy · MarcCote · Mar 3, 2018 · Nov 21, 2017 · Nov 21, 2017 · Nov 21, 2017
diff --git a/dipy/segment/benchmarks/bench_quickbundles.py b/dipy/segment/benchmarks/bench_quickbundles.py
@@ -33,15 +33,15 @@ def are_compatible(self, shape1, shape2):
         return shape1 == shape2
 
     def dist(self, features1, features2):
-        dist = np.sqrt(np.sum((features1-features2)**2, axis=1))
-        dist = np.sum(dist/len(features1))
+        dist = np.sqrt(np.sum((features1 - features2)**2, axis=1))
+        dist = np.sum(dist / len(features1))
         return dist
 
 
 def bench_quickbundles():
     dtype = "float32"
     repeat = 10
-    nb_points = 18
+    nb_points = 12
 
     streams, hdr = nib.trackvis.read(get_data('fornix'))
     fornix = [s[0].astype(dtype) for s in streams]
@@ -60,7 +60,7 @@ def bench_quickbundles():
 
     # The expected number of clusters of the fornix using threshold=10 is 4.
     threshold = 10.
-    expected_nb_clusters = 4*8
+    expected_nb_clusters = 4 * 8
 
     print("Timing QuickBundles 1.0 vs. 2.0")
 
@@ -75,21 +75,21 @@ def bench_quickbundles():
     qb2 = QB_New(threshold)
     qb2_time = measure("clusters = qb2.cluster(streamlines)", repeat)
     print("QuickBundles2 time: {0:.4}sec".format(qb2_time))
-    print("Speed up of {0}x".format(qb1_time/qb2_time))
+    print("Speed up of {0}x".format(qb1_time / qb2_time))
     clusters = qb2.cluster(streamlines)
     sizes2 = map(len, clusters)
     indices2 = map(lambda c: c.indices, clusters)
     assert_equal(len(clusters), expected_nb_clusters)
-    assert_array_equal(sizes2, sizes1)
+    assert_array_equal(list(sizes2), sizes1)
     assert_arrays_equal(indices2, indices1)
 
     qb = QB_New(threshold, metric=MDFpy())
     qb3_time = measure("clusters = qb.cluster(streamlines)", repeat)
     print("QuickBundles2_python time: {0:.4}sec".format(qb3_time))
-    print("Speed up of {0}x".format(qb1_time/qb3_time))
+    print("Speed up of {0}x".format(qb1_time / qb3_time))
     clusters = qb.cluster(streamlines)
     sizes3 = map(len, clusters)
     indices3 = map(lambda c: c.indices, clusters)
     assert_equal(len(clusters), expected_nb_clusters)
-    assert_array_equal(sizes3, sizes1)
+    assert_array_equal(list(sizes3), sizes1)
     assert_arrays_equal(indices3, indices1)
diff --git a/dipy/segment/clustering.py b/dipy/segment/clustering.py
@@ -508,3 +508,145 @@ def cluster(self, streamlines, ordering=None):
 
         cluster_map.refdata = streamlines
         return cluster_map
+
+
+class QuickBundlesX(Clustering):
+    r""" Clusters streamlines using QuickBundlesX.
+
+    Parameters
+    ----------
+    thresholds : list of float
+        Thresholds to use for each clustering layer. A threshold represents the
+        maximum distance from a cluster for a streamline to be still considered
+        as part of it.
+    metric : str or `Metric` object (optional)
+        The distance metric to use when comparing two streamlines. By default,
+        the Minimum average Direct-Flip (MDF) distance [Garyfallidis12]_ is
+        used and streamlines are automatically resampled so they have 12 points.
+
+    References
+    ----------
+    .. [Garyfallidis12] Garyfallidis E. et al., QuickBundles a method for
+                        tractography simplification, Frontiers in Neuroscience,
+                        vol 6, no 175, 2012.
+
+    .. [Garyfallidis16] Garyfallidis E. et al. QuickBundlesX: Sequential
+                        clustering of millions of streamlines in multiple
+                        levels of detail at record execution time. Proceedings
+                        of the, International Society of Magnetic Resonance
+                        in Medicine (ISMRM). Singapore, 4187, 2016.
+    """
+    def __init__(self, thresholds, metric="MDF_12points"):
+        self.thresholds = thresholds
+
+        if isinstance(metric, Metric):
+            self.metric = metric
+        elif metric == "MDF_12points":
+            feature = ResampleFeature(nb_points=12)
+            self.metric = AveragePointwiseEuclideanMetric(feature)
+        else:
+            raise ValueError("Unknown metric: {0}".format(metric))
+
+    def cluster(self, streamlines, ordering=None):
+        """ Clusters `streamlines` into bundles.
+
+        Performs QuickbundleX using a predefined metric and thresholds.
+
+        Parameters
+        ----------
+        streamlines : list of 2D arrays
+            Each 2D array represents a sequence of 3D points (points, 3).
+        ordering : iterable of indices
+            Specifies the order in which data points will be clustered.
+
+        Returns
+        -------
+        `TreeClusterMap` object
+            Result of the clustering.
+        """
+        from dipy.segment.clustering_algorithms import quickbundlesx
+        tree = quickbundlesx(streamlines, self.metric,
+                             thresholds=self.thresholds,
+                             ordering=ordering)
+        tree.refdata = streamlines
+        return tree
+
+
+class TreeCluster(ClusterCentroid):
+    def __init__(self, threshold, centroid, indices=None):
+        super(TreeCluster, self).__init__(centroid=centroid, indices=indices)
+        self.threshold = threshold
+        self.parent = None
+        self.children = []
+
+    def add(self, child):
+        child.parent = self
+        self.children.append(child)
+
+    @property
+    def is_leaf(self):
+        return len(self.children) == 0
+
+
+class TreeClusterMap(ClusterMap):
+    def __init__(self, root):
+        self.root = root
+        self.leaves = []
+
+        def _retrieves_leaves(node):
+            if node.is_leaf:
+                self.leaves.append(node)
+
+        self.traverse_postorder(self.root, _retrieves_leaves)
+
+    @property
+    def refdata(self):
+        return self._refdata
+
+    @refdata.setter
+    def refdata(self, value):
+        if value is None:
+            value = Identity()
+
+        self._refdata = value
+
+        def _set_refdata(node):
+            node.refdata = self._refdata
+
+        self.traverse_postorder(self.root, _set_refdata)
+
+    def traverse_postorder(self, node, visit):
+        for child in node.children:
+            self.traverse_postorder(child, visit)
+
+        visit(node)
+
+    def iter_preorder(self, node):
+        parent_stack = []
+        while len(parent_stack) > 0 or node is not None:
+            if node is not None:
+                yield node
+                if len(node.children) > 0:
+                    parent_stack += node.children[1:]
+                    node = node.children[0]
+                else:
+                    node = None
+            else:
+                node = parent_stack.pop()
+
+    def __iter__(self):
+        return self.iter_preorder(self.root)
+
+    def get_clusters(self, wanted_level):
+        clusters = ClusterMapCentroid()
+
+        def _traverse(node, level=0):
+            if level == wanted_level:
+                clusters.add_cluster(node)
+                return
+
+            for child in node.children:
+                _traverse(child, level + 1)
+
+        _traverse(self.root)
+        return clusters
diff --git a/dipy/segment/clustering_algorithms.pyx b/dipy/segment/clustering_algorithms.pyx
@@ -1,14 +1,23 @@
 # distutils: language = c
-# cython: wraparound=False, cdivision=True, boundscheck=False
+# cython: wraparound=False, cdivision=True, boundscheck=False, initializedcheck=False
 
+from dipy.utils.six.moves import xrange
 import itertools
 import numpy as np
 
 from cythonutils cimport Data2D, shape2tuple
 from metricspeed cimport Metric
-from clusteringspeed cimport ClustersCentroid, Centroid, QuickBundles
+from clusteringspeed cimport ClustersCentroid, Centroid, QuickBundles, QuickBundlesX
 from dipy.segment.clustering import ClusterMapCentroid, ClusterCentroid
 
+cdef extern from "stdlib.h" nogil:
+    ctypedef unsigned long size_t
+    void free(void *ptr)
+    void *calloc(size_t nelem, size_t elsize)
+    void *realloc(void *ptr, size_t elsize)
+    void *memset(void *ptr, int value, size_t num)
+
+
 DTYPE = np.float32
 DEF BIGGEST_DOUBLE = 1.7976931348623157e+308  # np.finfo('f8').max
 DEF BIGGEST_FLOAT = 3.4028235e+38  # np.finfo('f4').max
@@ -34,8 +43,13 @@ def clusters_centroid2clustermap_centroid(ClustersCentroid clusters_list):
         Result of the clustering contained in a Python's object.
     """
     clusters = ClusterMapCentroid()
-    for i in range(clusters_list._nb_clusters):
-        centroid = np.asarray(clusters_list.centroids[i].features)
+    cdef Data2D features
+    shape = clusters_list._centroid_shape
+
+    for i in xrange(clusters_list._nb_clusters):
+        features = <float[:shape.dims[0], :shape.dims[1]]> \
+            &clusters_list.centroids[i].features[0][0,0]
+        centroid = np.asarray(features)
         indices = np.asarray(<int[:clusters_list.clusters_size[i]]> clusters_list.clusters_indices[i]).tolist()
         clusters.add_cluster(ClusterCentroid(id=i, centroid=centroid, indices=indices))
 
@@ -50,7 +64,8 @@ def peek(iterable):
     return first, iterator
 
 
-def quickbundles(streamlines, Metric metric, double threshold, long max_nb_clusters=BIGGEST_INT, ordering=None):
+def quickbundles(streamlines, Metric metric, double threshold,
+                 long max_nb_clusters=BIGGEST_INT, ordering=None):
     """ Clusters streamlines using QuickBundles.
 
     Parameters
@@ -82,7 +97,6 @@ def quickbundles(streamlines, Metric metric, double threshold, long max_nb_clust
     threshold = min(threshold, BIGGEST_DOUBLE)
     # Threshold of -np.inf is not supported, set it to 0
     threshold = max(threshold, 0)
-
     if ordering is None:
         ordering = xrange(len(streamlines))
 
@@ -94,15 +108,70 @@ def quickbundles(streamlines, Metric metric, double threshold, long max_nb_clust
     features_shape = shape2tuple(metric.feature.c_infer_shape(streamlines[first_idx].astype(DTYPE)))
     cdef QuickBundles qb = QuickBundles(features_shape, metric, threshold, max_nb_clusters)
     cdef int idx
-
     for idx in ordering:
         streamline = streamlines[idx]
         if not streamline.flags.writeable or streamline.dtype != DTYPE:
             streamline = streamline.astype(DTYPE)
-
         cluster_id = qb.assignment_step(streamline, idx)
         # The update step is performed right after the assignement step instead
         # of after all streamlines have been assigned like k-means algorithm.
         qb.update_step(cluster_id)
 
     return clusters_centroid2clustermap_centroid(qb.clusters)
+
+
+def quickbundlesx(streamlines, Metric metric, thresholds, ordering=None):
+    """ Clusters streamlines using QuickBundlesX.
+
+    Parameters
+    ----------
+    streamlines : list of 2D arrays
+        List of streamlines to cluster.
+    metric : `Metric` object
+        Tells how to compute the distance between two streamlines.
+    thresholds : list of double
+        Thresholds to use for each clustering layer. A threshold represents the
+        maximum distance from a cluster for a streamline to be still considered
+        as part of it.
+    ordering : iterable of indices, optional
+        Iterate through `data` using the given ordering.
+
+    Returns
+    -------
+    `TreeClusterMap` object
+        Result of the clustering. Use get_clusters() to get the clusters at
+        a specific level of the hierarchy.
+
+    References
+    ----------
+
+    .. [Garyfallidis16] Garyfallidis E. et al. QuickBundlesX: Sequential
+                    clustering of millions of streamlines in multiple
+                    levels of detail at record execution time. Proceedings
+                    of the, International Society of Magnetic Resonance
+                    in Medicine (ISMRM). Singapore, 4187, 2016.
+
+     .. [Garyfallidis12] Garyfallidis E. et al., QuickBundles a method for
+                        tractography simplification, Frontiers in Neuroscience,
+                        vol 6, no 175, 2012.
+    """
+    if ordering is None:
+        ordering = xrange(len(streamlines))
+
+    # Check if `ordering` or `streamlines` are empty
+    first_idx, ordering = peek(ordering)
+    if first_idx is None or len(streamlines) == 0:
+        return ClusterMapCentroid()
+
+    features_shape = shape2tuple(metric.feature.c_infer_shape(streamlines[first_idx].astype(DTYPE)))
+    cdef QuickBundlesX qbx = QuickBundlesX(features_shape, thresholds, metric)
+    cdef int idx
+
+    for idx in ordering:
+        streamline = streamlines[idx]
+        if not streamline.flags.writeable or streamline.dtype != DTYPE:
+            streamline = streamline.astype(DTYPE)
+
+        qbx.insert(streamline, idx)
+
+    return qbx.get_tree_cluster_map()