Parallel working

the-virtual-brain · Jul 28, 2020 · a156691 · a156691
1 parent 3b17adc
commit a156691
Show file tree

Hide file tree

Showing 3 changed files with 219 additions and 107 deletions.
diff --git a/gdist.pyx b/gdist.pyx
@@ -58,36 +58,42 @@ import scipy.sparse
 # Pre-cdef'd containers from the C++ standard library
 from libcpp cimport bool
 from libcpp.vector cimport vector
-
-import cython
-from cython.parallel import prange
+# from libcpp.tuple cimport tuple
 
 ################################################################################
 ############ Defininitions to access the C++ geodesic distance library #########
 ################################################################################
 cdef extern from "geodesic_mesh_elements.h" namespace "geodesic":
     cdef cppclass Vertex:
-        Vertex() nogil
+        Vertex()
 
 cdef extern from "geodesic_mesh_elements.h" namespace "geodesic":
     cdef cppclass SurfacePoint:
-        SurfacePoint() nogil
-        SurfacePoint(Vertex*) nogil
+        SurfacePoint()
+        SurfacePoint(Vertex*)
         double& x()
         double& y()
         double& z()
 
 cdef extern from "geodesic_mesh.h" namespace "geodesic":
     cdef cppclass Mesh:
-        Mesh() nogil
+        Mesh()
         void initialize_mesh_data(vector[double]&, vector[unsigned]&, bool)
-        vector[Vertex]& vertices() nogil
+        vector[Vertex]& vertices()
+
+cdef extern from "geodesic_utils.h":
+    cdef cppclass SparseMatrix:
+        vector[unsigned] rows
+        vector[unsigned] columns
+        vector[double] data
+    vector[double] compute_gdist_impl(vector[double], vector[unsigned], vector[unsigned], vector[unsigned], double, bool, bool)
+    SparseMatrix local_gdist_matrix_impl(vector[double], vector[unsigned], double, bool)
 
 cdef extern from "geodesic_algorithm_exact.h" namespace "geodesic":
     cdef cppclass GeodesicAlgorithmExact:
-        GeodesicAlgorithmExact(Mesh*) nogil
-        void propagate(vector[SurfacePoint]&, double, vector[SurfacePoint]*) nogil
-        unsigned best_source(SurfacePoint&, double&) nogil
+        GeodesicAlgorithmExact(Mesh*)
+        void propagate(vector[SurfacePoint]&, double, vector[SurfacePoint]*)
+        unsigned best_source(SurfacePoint&, double&)
 
 cdef extern from "geodesic_constants_and_simple_functions.h" namespace "geodesic":
     double GEODESIC_INF
@@ -103,12 +109,12 @@ cdef get_mesh(
     # Define C++ vectors to contain the mesh surface components.
     cdef vector[double] points
     cdef vector[unsigned] faces
-    
+
     # Map numpy array of mesh "vertices" to C++ vector of mesh "points" 
     cdef numpy.float64_t coord
     for coord in vertices.flatten():
         points.push_back(coord)
-    
+
     # Map numpy array of mesh "triangles" to C++ vector of mesh "faces" 
     cdef numpy.int32_t indx
     for indx in triangles.flatten():
@@ -162,78 +168,62 @@ def compute_gdist(numpy.ndarray[numpy.float64_t, ndim=2] vertices,
         >>> src = numpy.array([1], dtype=numpy.int32)
         >>> trg = numpy.array([2], dtype=numpy.int32)
         >>> import gdist
-        >>> gdist.compute_gdist(vertices, triangles, source_indices = src, target_indices = trg)
-         array([ 0.2])
-    
+        >>> gdist.compute_gdist(vertices, triangles, source_indices=src, target_indices=trg)
+         array([0.2])
     """
 
-    cdef Mesh amesh
-    get_mesh(vertices, triangles, amesh, is_one_indexed)
-
-    # Define and create object for exact algorithm on that mesh:
-    cdef GeodesicAlgorithmExact *algorithm = new GeodesicAlgorithmExact(&amesh)
-
-    # Define a C++ vector for the source vertices
-    cdef vector[SurfacePoint] all_sources
-
-    # Define a C++ vector for the target vertices
-    cdef vector[SurfacePoint] stop_points
-
-    # Define a NumPy array to hold the results
-    cdef numpy.ndarray[numpy.float64_t, ndim=1] distances
+    cdef bool propagate_on_max_distance = False
+    cdef vector[double] points
+    cdef vector[unsigned] faces
 
-    cdef Py_ssize_t  k
-
-    if source_indices is None: #Default to 0
-        source_indices = numpy.arange(0, dtype=numpy.int32)
-    # Map the NumPy sources of targets to a C++ vector
-    for k in source_indices:
-        all_sources.push_back(SurfacePoint(&amesh.vertices()[k]))
-
+    if source_indices is None:
+        source_indices = numpy.arange(0, dtype=numpy.int32)  # default to 0
     if target_indices is None:
-        # Propagate purely on max_distance
-        algorithm.propagate(all_sources, max_distance, NULL)
-        # Make all vertices targets
-        # NOTE: this is inefficient in the best_source step below, but not sure
-        # how to avoid.
-        target_indices = numpy.arange(vertices.shape[0], dtype=numpy.int32)
-        # Map the NumPy array of targets to a C++ vector
-        for k in target_indices:
-            stop_points.push_back(SurfacePoint(&amesh.vertices()[k]))
-    else:
-        # Map the NumPy array of targets to a C++ vector
-        for k in target_indices:
-            stop_points.push_back(SurfacePoint(&amesh.vertices()[k]))
-        # Propagate to the specified targets
-        algorithm.propagate(all_sources, max_distance, &stop_points)
-
-    distances = numpy.zeros((len(target_indices), ), dtype=numpy.float64)
-    for k in range(stop_points.size()):
-        algorithm.best_source(stop_points[k], distances[k])
-
-    distances[distances==GEODESIC_INF] = numpy.inf
+        propagate_on_max_distance = True
+        target_indices = numpy.arange(vertices.size(), dtype=numpy.int32)
 
+    for k in vertices.flatten():
+        points.push_back(k)
+    for k in triangles.flatten():
+        faces.push_back(k)
+
+    distances = compute_gdist_impl(
+        points,
+        faces,
+        source_indices,
+        target_indices,
+        max_distance,
+        is_one_indexed,
+        propagate_on_max_distance,
+    )
+    # TODO: Basically copies, can be improved as memory is contiguous.
+    distances = numpy.asarray(distances)
+    distances[distances==max_distance] = numpy.inf
     return distances
 
 
-@cython.boundscheck(False)
-@cython.wraparound(False)
 def local_gdist_matrix(numpy.ndarray[numpy.float64_t, ndim=2] vertices,
                        numpy.ndarray[numpy.int32_t, ndim=2] triangles,
                        double max_distance = GEODESIC_INF,
                        bool is_one_indexed = False):
-    """
-    This is the wrapper function for computing geodesic distance from every 
+    """This is the wrapper function for computing geodesic distance from every 
     vertex on the surface to all those within a distance ``max_distance`` of 
-    them. The function accepts three arguments:
-        ``vertices``: defines x,y,z coordinates of the mesh's vertices
-        ``triangles``: defines faces of the mesh as index triplets into vertices.
-        ``max_distance``:
-        ``is_one_indexed``: defines if the index of the triangles data is one-
-        indexed
-    and returns a scipy.sparse.csc_matrix((N, N), dtype=numpy.float64), where N
-    is the number of vertices, specifying the shortest distance from all 
-    vertices to all the vertices within max_distance.
+    them.
+
+    Args:
+        vertices (numpy.ndarray[numpy.float64_t, ndim=2]): Defines x,y,z
+            coordinates of the mesh's vertices.
+        triangles (numpy.ndarray[numpy.float64_t, ndim=2]): Defines faces of
+            the mesh as index triplets into vertices.
+        max_distance (double): Propagation algorithm stops after reaching the
+            certain distance from the source.
+        is_one_indexed (bool): defines if the index of the triangles data is
+            one-indexed.
+        
+    Returns:
+        scipy.sparse.csc_matrix((N, N), dtype=numpy.float64): where N
+        is the number of vertices, specifying the shortest distance from all 
+        vertices to all the vertices within max_distance.
     
     Basic usage then looks like::
         >>> import numpy
@@ -252,48 +242,117 @@ def local_gdist_matrix(numpy.ndarray[numpy.float64_t, ndim=2] vertices,
          [19, 28, 49, 81, 125, 181, 248, 331, 422, 522], # s
          [ 3, 13, 30, 56,  89, 129, 177, 232, 292, 358]] # MB]
          
-    where memory is a min-guestimate given by: mem_req = nnz * 8 / 1024 / 1024
-    
+    where memory is a min-guestimate given by: mem_req = nnz * 8 / 1024 / 1024.
+    """
+
+    """
     NOTE: The best_source loop could be sped up considerably by restricting 
           targets to those vertices within max_distance of the source, however,
           this will first require the efficient extraction of this information
           from the propgate step...
     """
 
-    cdef Mesh amesh
-    get_mesh(vertices, triangles, amesh, is_one_indexed)
-
-    # Define and create object for exact algorithm on that mesh:
-    cdef GeodesicAlgorithmExact *algorithm = new GeodesicAlgorithmExact(&amesh)
-
-    cdef vector[SurfacePoint] source, targets
     cdef Py_ssize_t N = vertices.shape[0]
-    cdef Py_ssize_t k
-    cdef Py_ssize_t kk
-    cdef numpy.float64_t distance = 0
-
-    # Add all vertices as targets
-    for k in range(N):
-        targets.push_back(SurfacePoint(&amesh.vertices()[k]))
 
-    cdef vector[unsigned int] rows
-    cdef vector[unsigned int] columns
+    cdef vector[double] points
+    cdef vector[unsigned] faces
+
+    for k in vertices.flatten():
+        points.push_back(k)
+    for k in triangles.flatten():
+        faces.push_back(k)
+
+    cdef SparseMatrix distances = local_gdist_matrix_impl(
+        points,
+        faces,
+        max_distance,
+        is_one_indexed,
+    )
+
+    cdef vector[unsigned] rows
+    cdef vector[unsigned] columns
     cdef vector[double] data
-    for k in prange(N, nogil=True, num_threads=1):
-        source.push_back(SurfacePoint(&amesh.vertices()[k]))
-        algorithm.propagate(source, max_distance, NULL)
-        source.pop_back()
-
-        for kk in prange(N, num_threads=2): #TODO: Reduce to vertices reached during propagate.
-            algorithm.best_source(targets[kk], distance)
-
-            if (
-                distance is not GEODESIC_INF
-                and distance is not 0
-                and distance <= max_distance
-            ):
-                rows.push_back(k)
-                columns.push_back(kk)
-                data.push_back(distance)
+
+    for distance_row in distances.rows:
+        rows.push_back(distance_row)
+    for distance_column in distances.columns:
+        columns.push_back(distance_column)
+    for distance_data in distances.data:
+        data.push_back(distance_data)
 
     return scipy.sparse.csc_matrix((data, (rows, columns)), shape=(N, N))
+
+
+def distance_matrix_of_selected_points(
+    numpy.ndarray[numpy.float64_t, ndim=2] vertices,
+    numpy.ndarray[numpy.int32_t, ndim=2] triangles,
+    numpy.ndarray[numpy.int32_t, ndim=1] points,
+    double max_distance = GEODESIC_INF,
+    bool is_one_indexed = False
+):
+    """Function for calculating pairwise geodesic distance for a set of points
+    within a distance ``max_distance`` of them.
+
+    Args:
+        vertices (numpy.ndarray[numpy.float64_t, ndim=2]): Defines x,y,z
+            coordinates of the mesh's vertices.
+        triangles (numpy.ndarray[numpy.float64_t, ndim=2]): Defines faces of
+            the mesh as index triplets into vertices.
+        points (numpy.ndarray[numpy.int32_t, ndim=1]): Indices of the points
+            among which the pairwise distances are to be calculated.
+        max_distance (double): Propagation algorithm stops after reaching the
+            certain distance from the source.
+        is_one_indexed (bool): defines if the index of the triangles data is
+            one-indexed.
+
+    Returns:
+        scipy.sparse.csc_matrix((N, N), dtype=numpy.float64): where N
+            is the number of vertices, specifying the pairwise distances among
+            the given points.
+    
+    Basic usage then looks like::
+        >>> import numpy
+        >>> temp = numpy.loadtxt("data/flat_triangular_mesh.txt", skiprows=1)
+        >>> vertices = temp[0:121].astype(numpy.float64)
+        >>> triangles = temp[121:321].astype(numpy.int32)
+        >>> points = numpy.array([2, 5, 10], dtype=numpy.int32)
+        >>> import gdist
+        >>> gdist.distance_matrix_of_selected_points(
+                vertices, triangles, points
+            )
+         <121x121 sparse matrix of type '<class 'numpy.float64'>'
+            with 6 stored elements in Compressed Sparse Column format>
+    """
+
+    cdef vector[unsigned] rows
+    cdef vector[unsigned] columns
+    cdef vector[double] distance_matrix
+    for index_point, point in enumerate(points):
+        target_indices = points[index_point + 1 :]
+
+        source_index = numpy.array([point], dtype=numpy.int32)
+        target_indices = numpy.array(target_indices, dtype=numpy.int32)
+
+        distances = compute_gdist(
+            vertices,
+            triangles,
+            source_index,
+            target_indices,
+            max_distance,
+            is_one_indexed,
+        )
+
+        for index_distance, distance in enumerate(distances):
+            rows.push_back(point)
+            columns.push_back(target_indices[index_distance])
+            distance_matrix.push_back(distance)
+            # symmetric
+            rows.push_back(target_indices[index_distance])
+            columns.push_back(point)
+            distance_matrix.push_back(distance)
+
+    cdef Py_ssize_t no_of_vertices = vertices.shape[0]
+    return scipy.sparse.csc_matrix(
+        (distance_matrix, (rows, columns)),
+        shape=(no_of_vertices, no_of_vertices)
+    )
diff --git a/geodesic_library/geodesic_algorithm_exact.h b/geodesic_library/geodesic_algorithm_exact.h
@@ -1163,7 +1163,7 @@ inline void GeodesicAlgorithmExact::construct_propagated_intervals(bool invert,
 
             p->min() = 0;
 
-            assert(p->start() < p->stop());
+            // assert(p->start() < p->stop());
             assert(p->start() >= 0.0);
             assert(p->stop() <= edge->length());
         }