diff --git a/CHANGELOG.rst b/CHANGELOG.rst index f15761a1..235e10e3 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -4,9 +4,10 @@ Changelog Version 0.10 (unreleased) ------------------------ -**Highlights of this release** +**Major enhancements** -* ... +* Addition of ``nearest`` and ``nearest_all`` functions to ``STRtree`` for + GEOS >= 3.6 to find the nearest neighbors (#272). **API Changes** @@ -28,7 +29,9 @@ Version 0.10 (unreleased) Thanks to everyone who contributed to this release! People with a "+" by their names contributed a patch for the first time. -* ... +* Brendan Ward +* Casper van der Wel +* Joris Van den Bossche Version 0.9 (2021-01-23) diff --git a/benchmarks/benchmarks.py b/benchmarks/benchmarks.py index 1f4ce18c..8be9c794 100644 --- a/benchmarks/benchmarks.py +++ b/benchmarks/benchmarks.py @@ -8,6 +8,7 @@ class PointPolygonTimeSuite: """Benchmarks running on 100000 points and one polygon""" + def setup(self): self.points = pygeos.points(np.random.random((100000, 2))) self.polygon = pygeos.polygons(np.random.random((3, 2))) @@ -24,6 +25,7 @@ def time_intersection(self): class IOSuite: """Benchmarks I/O operations (WKT and WKB) on a set of 10000 polygons""" + def setup(self): self.to_write = pygeos.polygons(np.random.random((10000, 100, 2))) self.to_read_wkt = pygeos.to_wkt(self.to_write) @@ -44,6 +46,7 @@ def time_read_from_wkb(self): class ConstructiveSuite: """Benchmarks constructive functions on a set of 10,000 points""" + def setup(self): self.points = pygeos.points(np.random.random((10000, 2))) @@ -66,8 +69,8 @@ class ClipSuite: def setup(self): # create irregular polygons by merging overlapping point buffers self.polygon = pygeos.union_all( - pygeos.buffer(pygeos.points(np.random.random((1000, 2)) * 500), 10) - ) + pygeos.buffer(pygeos.points(np.random.random((1000, 2)) * 500), 10) + ) xmin = np.random.random(100) * 100 xmax = xmin + 100 ymin = np.random.random(100) * 100 @@ -75,7 +78,6 @@ def setup(self): self.bounds = np.array([xmin, ymin, xmax, ymax]).T self.boxes = pygeos.box(xmin, ymin, xmax, ymax) - def time_clip_by_box(self): pygeos.intersection(self.polygon, self.boxes) @@ -88,7 +90,13 @@ class GetParts: """Benchmarks for getting individual parts from 100 multipolygons of 100 polygons each""" def setup(self): - self.multipolygons = np.array([pygeos.multipolygons(pygeos.polygons(np.random.random((2, 100, 2)))) for i in range(10000)], dtype=object) + self.multipolygons = np.array( + [ + pygeos.multipolygons(pygeos.polygons(np.random.random((2, 100, 2)))) + for i in range(10000) + ], + dtype=object, + ) def time_get_parts(self): """Cython implementation of get_parts""" @@ -110,9 +118,11 @@ class OverlaySuite: def setup(self): # create irregular polygons by merging overlapping point buffers - self.left = pygeos.union_all(pygeos.buffer(pygeos.points(np.random.random((500, 2)) * 500), 15)) + self.left = pygeos.union_all( + pygeos.buffer(pygeos.points(np.random.random((500, 2)) * 500), 15) + ) # shift this up and right - self.right = pygeos.apply(self.left, lambda x: x+50) + self.right = pygeos.apply(self.left, lambda x: x + 50) def time_difference(self): pygeos.difference(self.left, self.right) @@ -174,6 +184,20 @@ def setup(self): # initialize the tree by making a tiny query first self.tree.query(pygeos.points(0, 0)) + # create points that extend beyond the domain of the above polygons to ensure + # some don't overlap + self.points = pygeos.points((np.random.random((2000, 2)) * 750) - 125) + self.point_tree = pygeos.STRtree( + pygeos.points(np.random.random((2000, 2)) * 750) + ) + self.point_tree.query(pygeos.points(0, 0)) + + # create points on a grid for testing equidistant nearest neighbors + # creates 2025 points + grid_coords = np.mgrid[:45, :45].T.reshape(-1, 2) + self.grid_point_tree = pygeos.STRtree(pygeos.points(grid_coords)) + self.grid_points = pygeos.points(grid_coords + 0.5) + def time_tree_create(self): tree = pygeos.STRtree(self.polygons) tree.query(pygeos.points(0, 0)) @@ -207,3 +231,101 @@ def time_tree_query_bulk_covered_by(self): def time_tree_query_bulk_contains_properly(self): self.tree.query_bulk(self.polygons, predicate="contains_properly") + + def time_tree_nearest_points(self): + self.point_tree.nearest(self.points) + + def time_tree_nearest_points_equidistant(self): + self.grid_point_tree.nearest(self.grid_points) + + def time_tree_nearest_points_equidistant_manual_all(self): + # This benchmark approximates nearest_all for equidistant results + # starting from singular nearest neighbors and searching for more + # within same distance. + + # try to find all equidistant neighbors ourselves given single nearest + # result + l, r = self.grid_point_tree.nearest(self.grid_points) + # calculate distance to nearest neighbor + dist = pygeos.distance(self.grid_points.take(l), self.grid_point_tree.geometries.take(r)) + # include a slight epsilon to ensure nearest are within this radius + b = pygeos.buffer(self.grid_points, dist + 1e-8) + + # query the tree for others in the same buffer distance + left, right = self.grid_point_tree.query_bulk(b, predicate='intersects') + dist = pygeos.distance( + self.grid_points.take(left), self.grid_point_tree.geometries.take(right) + ) + + # sort by left, distance + ix = np.lexsort((right, dist, left)) + left = left[ix] + right = right[ix] + dist = dist[ix] + + run_start = np.r_[True, left[:-1] != left[1:]] + run_counts = np.diff(np.r_[np.nonzero(run_start)[0], left.shape[0]]) + + mins = dist[run_start] + + # spread to rest of array so we can extract out all within each group that match + all_mins = np.repeat(mins, run_counts) + ix = dist == all_mins + left = left[ix] + right = right[ix] + dist = dist[ix] + + def time_tree_nearest_all_points(self): + self.point_tree.nearest_all(self.points) + + def time_tree_nearest_all_points_equidistant(self): + self.grid_point_tree.nearest_all(self.grid_points) + + def time_tree_nearest_all_points_small_max_distance(self): + # returns >300 results + self.point_tree.nearest_all(self.points, max_distance=5) + + def time_tree_nearest_all_points_large_max_distance(self): + # measures the overhead of using a distance that would encompass all tree points + self.point_tree.nearest_all(self.points, max_distance=1000) + + def time_tree_nearest_poly(self): + self.tree.nearest(self.points) + + def time_tree_nearest_all_poly(self): + self.tree.nearest_all(self.points) + + def time_tree_nearest_all_poly_small_max_distance(self): + # returns >300 results + self.tree.nearest_all(self.points, max_distance=5) + + def time_tree_nearest_all_poly_python(self): + # returns all input points + + # use an arbitrary search tolerance that seems appropriate for the density of + # geometries + tolerance = 200 + b = pygeos.buffer(self.points, tolerance, quadsegs=1) + left, right = self.tree.query_bulk(b) + dist = pygeos.distance(self.points.take(left), self.polygons.take(right)) + + # sort by left, distance + ix = np.lexsort((right, dist, left)) + left = left[ix] + right = right[ix] + dist = dist[ix] + + run_start = np.r_[True, left[:-1] != left[1:]] + run_counts = np.diff(np.r_[np.nonzero(run_start)[0], left.shape[0]]) + + mins = dist[run_start] + + # spread to rest of array so we can extract out all within each group that match + all_mins = np.repeat(mins, run_counts) + ix = dist == all_mins + left = left[ix] + right = right[ix] + dist = dist[ix] + + # arrays are now roughly representative of what tree.nearest_all would provide, though + # some nearest_all neighbors may be missed if they are outside tolerance diff --git a/pygeos/strtree.py b/pygeos/strtree.py index 65ffaf70..885c4d43 100644 --- a/pygeos/strtree.py +++ b/pygeos/strtree.py @@ -1,6 +1,7 @@ from enum import IntEnum import numpy as np from . import lib +from .decorators import requires_geos __all__ = ["STRtree"] @@ -49,6 +50,8 @@ class STRtree: >>> # Query geometries that overlap envelopes of `geoms` >>> tree.query_bulk([pygeos.box(2, 2, 4, 4), pygeos.box(5, 5, 6, 6)]).tolist() [[0, 0, 0, 1, 1], [2, 3, 4, 5, 6]] + >>> tree.nearest([pygeos.points(1,1), pygeos.points(3,5)]).tolist() # doctest: +SKIP + [[0, 1], [1, 4]] """ def __init__(self, geometries, leafsize=10): @@ -186,3 +189,126 @@ def query_bulk(self, geometry, predicate=None): predicate = BinaryPredicate[predicate].value return self._tree.query_bulk(geometry, predicate) + + @requires_geos("3.6.0") + def nearest(self, geometry): + """Returns the index of the nearest item in the tree for each input + geometry. + + If there are multiple equidistant or intersected geometries in the tree, + only a single result is returned for each input geometry, based on the + order that tree geometries are visited; this order may be + nondeterministic. + + Any geometry that is None or empty in the input geometries is omitted + from the output. + + Parameters + ---------- + geometry : Geometry or array_like + Input geometries to query the tree. + + Returns + ------- + ndarray with shape (2, n) + The first subarray contains input geometry indexes. + The second subarray contains tree geometry indexes. + + See also + -------- + nearest_all: returns all equidistant geometries and optional distances + + Examples + -------- + >>> import pygeos + >>> tree = pygeos.STRtree(pygeos.points(np.arange(10), np.arange(10))) + >>> tree.nearest(pygeos.points(1,1)).tolist() # doctest: +SKIP + [[0], [1]] + >>> tree.nearest([pygeos.box(1,1,3,3)]).tolist() # doctest: +SKIP + [[0], [1]] + >>> points = pygeos.points(0.5,0.5) + >>> tree.nearest([None, pygeos.points(10,10)]).tolist() # doctest: +SKIP + [[1], [9]] + """ + + geometry = np.asarray(geometry, dtype=object) + if geometry.ndim == 0: + geometry = np.expand_dims(geometry, 0) + + return self._tree.nearest(geometry) + + @requires_geos("3.6.0") + def nearest_all(self, geometry, max_distance=None, return_distance=False): + """Returns the index of the nearest item(s) in the tree for each input + geometry. + + If there are multiple equidistant or intersected geometries in tree, all + are returned. Tree indexes are returned in the order they are visited + for each input geometry and may not be in ascending index order; no meaningful + order is implied. + + The max_distance used to search for nearest items in the tree may have a + significant impact on performance by reducing the number of input geometries + that are evaluated for nearest items in the tree. Only those input geometries + with at least one tree item within +/- max_distance beyond their envelope will + be evaluated. + + The distance, if returned, will be 0 for any intersected geometries in the tree. + + Any geometry that is None or empty in the input geometries is omitted from + the output. + + Parameters + ---------- + geometry : Geometry or array_like + Input geometries to query the tree. + max_distance : float, optional (default: None) + Maximum distance within which to query for nearest items in tree. + Must be greater than 0. + return_distance : bool, optional (default: False) + If True, will return distances in addition to indexes. + + Returns + ------- + indices or tuple of (indices, distances) + indices is an ndarray of shape (2,n) and distances (if present) an + ndarray of shape (n). + The first subarray of indices contains input geometry indices. + The second subarray of indices contains tree geometry indices. + + See also + -------- + nearest: returns singular nearest geometry for each input + + Examples + -------- + >>> import pygeos + >>> tree = pygeos.STRtree(pygeos.points(np.arange(10), np.arange(10))) + >>> tree.nearest_all(pygeos.points(1,1)).tolist() # doctest: +SKIP + [[0], [1]] + >>> tree.nearest_all([pygeos.box(1,1,3,3)]).tolist() # doctest: +SKIP + [[0, 0, 0], [1, 2, 3]] + >>> points = pygeos.points(0.5,0.5) + >>> index, distance = tree.nearest_all(points, return_distance=True) # doctest: +SKIP + >>> index.tolist() # doctest: +SKIP + [[0, 0], [0, 1]] + >>> distance.round(4).tolist() # doctest: +SKIP + [0.7071, 0.7071] + >>> tree.nearest_all(None).tolist() # doctest: +SKIP + [[], []] + """ + + geometry = np.asarray(geometry, dtype=object) + if geometry.ndim == 0: + geometry = np.expand_dims(geometry, 0) + + if max_distance is not None and max_distance <= 0: + raise ValueError("max_distance must be greater than 0") + + # a distance of 0 means no max_distance is used + max_distance = max_distance or 0 + + if return_distance: + return self._tree.nearest_all(geometry, max_distance) + + return self._tree.nearest_all(geometry, max_distance)[0] diff --git a/pygeos/test/test_strtree.py b/pygeos/test/test_strtree.py index 1677f0b3..35dc2010 100644 --- a/pygeos/test/test_strtree.py +++ b/pygeos/test/test_strtree.py @@ -1178,3 +1178,314 @@ def test_query_bulk_intersects_lines(line_tree, geometry, expected): ) def test_query_bulk_intersects_polygons(poly_tree, geometry, expected): assert_array_equal(poly_tree.query_bulk(geometry, predicate="intersects"), expected) + + +@pytest.mark.skipif(pygeos.geos_version < (3, 6, 0), reason="GEOS < 3.6") +def test_nearest_empty_tree(): + tree = pygeos.STRtree([]) + assert_array_equal(tree.nearest(point), [[], []]) + + +@pytest.mark.skipif(pygeos.geos_version < (3, 6, 0), reason="GEOS < 3.6") +@pytest.mark.parametrize("geometry", ["I am not a geometry"]) +def test_nearest_invalid_geom(tree, geometry): + with pytest.raises(TypeError): + tree.nearest(geometry) + + +# TODO: add into regular results +@pytest.mark.skipif(pygeos.geos_version < (3, 6, 0), reason="GEOS < 3.6") +@pytest.mark.parametrize( + "geometry,expected", + [(None, [[], []]), ([None], [[], []])], +) +def test_nearest_none(tree, geometry, expected): + assert_array_equal(tree.nearest_all(geometry), expected) + + +@pytest.mark.skipif(pygeos.geos_version < (3, 6, 0), reason="GEOS < 3.6") +@pytest.mark.parametrize( + "geometry,expected", + [ + (pygeos.points(0.25, 0.25), [[0], [0]]), + (pygeos.points(0.75, 0.75), [[0], [1]]), + (pygeos.points(1, 1), [[0], [1]]), + ([pygeos.points(1, 1), pygeos.points(0, 0)], [[0, 1], [1, 0]]), + ([pygeos.points(1, 1), pygeos.points(0.25, 1)], [[0, 1], [1, 1]]), + ([pygeos.points(-10, -10), pygeos.points(100, 100)], [[0, 1], [0, 9]]), + (box(0.5, 0.5, 0.75, 0.75), [[0], [1]]), + (pygeos.buffer(pygeos.points(2.5, 2.5), HALF_UNIT_DIAG), [[0], [2]]), + (pygeos.buffer(pygeos.points(3, 3), HALF_UNIT_DIAG), [[0], [3]]), + (pygeos.multipoints([[5.5, 5], [7, 7]]), [[0], [7]]), + (pygeos.multipoints([[5, 7], [7, 5]]), [[0], [6]]), + (None, [[], []]), + ([None], [[], []]), + ], +) +def test_nearest_points(tree, geometry, expected): + assert_array_equal(tree.nearest(geometry), expected) + + +@pytest.mark.skipif(pygeos.geos_version < (3, 6, 0), reason="GEOS < 3.6") +@pytest.mark.xfail(reason="equidistant geometries may produce nondeterministic results") +@pytest.mark.parametrize( + "geometry,expected", + [ + # 2 equidistant points in tree + (pygeos.points(0.5, 0.5), [0, 1]), + # multiple points in box + (box(0, 0, 3, 3), [0, 1, 2, 3]), + # return nearest point in tree for each point in multipoint + (pygeos.multipoints([[5, 5], [7, 7]]), [5, 7]), + ], +) +def test_nearest_points_equidistant(tree, geometry, expected): + result = tree.nearest(geometry) + assert result[1] in expected + + +@pytest.mark.skipif(pygeos.geos_version < (3, 6, 0), reason="GEOS < 3.6") +@pytest.mark.parametrize( + "geometry,expected", + [ + (pygeos.points(0.5, 0.5), [[0], [0]]), + (pygeos.points(1.5, 0.5), [[0], [0]]), + (pygeos.box(0.5, 1.5, 1, 2), [[0], [1]]), + (pygeos.linestrings([[0, 0.5], [1, 2.5]]), [[0], [0]]), + ], +) +def test_nearest_lines(line_tree, geometry, expected): + assert_array_equal(line_tree.nearest(geometry), expected) + + +@pytest.mark.skipif(pygeos.geos_version < (3, 6, 0), reason="GEOS < 3.6") +@pytest.mark.xfail(reason="equidistant geometries may produce nondeterministic results") +@pytest.mark.parametrize( + "geometry,expected", + [ + # at junction between 2 lines + (pygeos.points(2, 2), [1, 2]), + # contains one line, intersects with another + (box(0, 0, 1, 1), [0, 1]), + # overlaps 2 lines + (box(0.5, 0.5, 1.5, 1.5), [0, 1]), + # box overlaps 2 lines and intersects endpoints of 2 more + (box(3, 3, 5, 5), [2, 3, 4, 5]), + (pygeos.buffer(pygeos.points(2.5, 2.5), HALF_UNIT_DIAG), [1, 2]), + (pygeos.buffer(pygeos.points(3, 3), HALF_UNIT_DIAG), [2, 3]), + # multipoints at endpoints of 2 lines each + (pygeos.multipoints([[5, 5], [7, 7]]), [4, 5, 6, 7]), + # second point in multipoint at endpoints of 2 lines + (pygeos.multipoints([[5.5, 5], [7, 7]]), [6, 7]), + # multipoints are equidistant from 2 lines + (pygeos.multipoints([[5, 7], [7, 5]]), [5, 6]), + ], +) +def test_nearest_lines_equidistant(line_tree, geometry, expected): + result = line_tree.nearest(geometry) + assert result[1] in expected + + +@pytest.mark.skipif(pygeos.geos_version < (3, 6, 0), reason="GEOS < 3.6") +@pytest.mark.parametrize( + "geometry,expected", + [ + (pygeos.points(0, 0), [[0], [0]]), + (pygeos.points(2, 2), [[0], [2]]), + (pygeos.box(0, 5, 1, 6), [[0], [3]]), + (pygeos.multipoints([[5, 7], [7, 5]]), [[0], [6]]), + ], +) +def test_nearest_polygons(poly_tree, geometry, expected): + assert_array_equal(poly_tree.nearest(geometry), expected) + + +@pytest.mark.skipif(pygeos.geos_version < (3, 6, 0), reason="GEOS < 3.6") +@pytest.mark.xfail(reason="equidistant geometries may produce nondeterministic results") +@pytest.mark.parametrize( + "geometry,expected", + [ + # 2 polygons in tree overlap point + (pygeos.points(0.5, 0.5), [0, 1]), + # box overlaps multiple polygons + (box(0, 0, 1, 1), [0, 1]), + (box(0.5, 0.5, 1.5, 1.5), [0, 1, 2]), + (box(3, 3, 5, 5), [3, 4, 5]), + (pygeos.buffer(pygeos.points(2.5, 2.5), HALF_UNIT_DIAG), [2, 3]), + # completely overlaps one polygon, touches 2 others + (pygeos.buffer(pygeos.points(3, 3), HALF_UNIT_DIAG), [2, 3, 4]), + # each point in multi point intersects a polygon in tree + (pygeos.multipoints([[5, 5], [7, 7]]), [5, 7]), + (pygeos.multipoints([[5.5, 5], [7, 7]]), [5, 7]), + ], +) +def test_nearest_polygons_equidistant(poly_tree, geometry, expected): + result = poly_tree.nearest(geometry) + assert result[1] in expected + + +@pytest.mark.skipif(pygeos.geos_version < (3, 6, 0), reason="GEOS < 3.6") +def test_nearest_all_empty_tree(): + tree = pygeos.STRtree([]) + assert_array_equal(tree.nearest_all(point), [[], []]) + + +@pytest.mark.skipif(pygeos.geos_version < (3, 6, 0), reason="GEOS < 3.6") +@pytest.mark.parametrize("geometry", ["I am not a geometry"]) +def test_nearest_all_invalid_geom(tree, geometry): + with pytest.raises(TypeError): + tree.nearest_all(geometry) + + +@pytest.mark.skipif(pygeos.geos_version < (3, 6, 0), reason="GEOS < 3.6") +@pytest.mark.parametrize( + "geometry,return_distance,expected", + [(None, False, [[], []]), ([None], False, [[], []]), (None, True, ([[], []], []))], +) +def test_nearest_all_none(tree, geometry, return_distance, expected): + if return_distance: + index, distance = tree.nearest_all(geometry, return_distance=True) + assert_array_equal(index, expected[0]) + assert_array_equal(distance, expected[1]) + + else: + assert_array_equal(tree.nearest_all(geometry), expected) + + +@pytest.mark.skipif(pygeos.geos_version < (3, 6, 0), reason="GEOS < 3.6") +@pytest.mark.parametrize( + "geometry,expected", [(empty, [[], []]), ([empty, point], [[1, 1], [2, 3]])] +) +def test_nearest_all_empty_geom(tree, geometry, expected): + assert_array_equal(tree.nearest_all(geometry), expected) + + +@pytest.mark.skipif(pygeos.geos_version < (3, 6, 0), reason="GEOS < 3.6") +@pytest.mark.parametrize( + "geometry,expected", + [ + (pygeos.points(0.25, 0.25), [[0], [0]]), + (pygeos.points(0.75, 0.75), [[0], [1]]), + (pygeos.points(1, 1), [[0], [1]]), + # 2 equidistant points in tree + (pygeos.points(0.5, 0.5), [[0, 0], [0, 1]]), + ([pygeos.points(1, 1), pygeos.points(0, 0)], [[0, 1], [1, 0]]), + ([pygeos.points(1, 1), pygeos.points(0.25, 1)], [[0, 1], [1, 1]]), + ([pygeos.points(-10, -10), pygeos.points(100, 100)], [[0, 1], [0, 9]]), + (box(0.5, 0.5, 0.75, 0.75), [[0], [1]]), + # multiple points in box + (box(0, 0, 3, 3), [[0, 0, 0, 0], [0, 1, 2, 3]]), + (pygeos.buffer(pygeos.points(2.5, 2.5), HALF_UNIT_DIAG), [[0], [2]]), + (pygeos.buffer(pygeos.points(3, 3), HALF_UNIT_DIAG), [[0], [3]]), + (pygeos.multipoints([[5.5, 5], [7, 7]]), [[0], [7]]), + (pygeos.multipoints([[5, 7], [7, 5]]), [[0], [6]]), + # return nearest point in tree for each point in multipoint + (pygeos.multipoints([[5, 5], [7, 7]]), [[0, 0], [5, 7]]), + # 2 equidistant points per point in multipoint + ( + pygeos.multipoints([[0.5, 0.5], [3.5, 3.5]]), + [ + [ + 0, + 0, + 0, + 0, + ], + [0, 1, 3, 4], + ], + ), + ], +) +def test_nearest_all_points(tree, geometry, expected): + assert_array_equal(tree.nearest_all(geometry), expected) + + +@pytest.mark.skipif(pygeos.geos_version < (3, 6, 0), reason="GEOS < 3.6") +@pytest.mark.parametrize( + "geometry,expected", + [ + (pygeos.points(0.5, 0.5), [[0], [0]]), + # at junction between 2 lines, will return both + (pygeos.points(2, 2), [[0, 0], [1, 2]]), + # contains one line, intersects with another + (box(0, 0, 1, 1), [[0, 0], [0, 1]]), + # overlaps 2 lines + (box(0.5, 0.5, 1.5, 1.5), [[0, 0], [0, 1]]), + # second box overlaps 2 lines and intersects endpoints of 2 more + ([box(0, 0, 0.5, 0.5), box(3, 3, 5, 5)], [[0, 1, 1, 1, 1], [0, 2, 3, 4, 5]]), + (pygeos.buffer(pygeos.points(2.5, 2.5), HALF_UNIT_DIAG), [[0, 0], [1, 2]]), + (pygeos.buffer(pygeos.points(3, 3), HALF_UNIT_DIAG), [[0, 0], [2, 3]]), + # multipoints at endpoints of 2 lines each + (pygeos.multipoints([[5, 5], [7, 7]]), [[0, 0, 0, 0], [4, 5, 6, 7]]), + # second point in multipoint at endpoints of 2 lines + (pygeos.multipoints([[5.5, 5], [7, 7]]), [[0, 0], [6, 7]]), + # multipoints are equidistant from 2 lines + (pygeos.multipoints([[5, 7], [7, 5]]), [[0, 0], [5, 6]]), + ], +) +def test_nearest_all_lines(line_tree, geometry, expected): + assert_array_equal(line_tree.nearest_all(geometry), expected) + + +@pytest.mark.skipif(pygeos.geos_version < (3, 6, 0), reason="GEOS < 3.6") +@pytest.mark.parametrize( + "geometry,expected", + [ + (pygeos.points(0, 0), [[0], [0]]), + (pygeos.points(2, 2), [[0], [2]]), + # 2 polygons in tree overlap point + (pygeos.points(0.5, 0.5), [[0, 0], [0, 1]]), + # box overlaps multiple polygons + (box(0, 0, 1, 1), [[0, 0], [0, 1]]), + (box(0.5, 0.5, 1.5, 1.5), [[0, 0, 0], [0, 1, 2]]), + ([box(0, 0, 1, 1), box(3, 3, 5, 5)], [[0, 0, 1, 1, 1], [0, 1, 3, 4, 5]]), + (pygeos.buffer(pygeos.points(2.5, 2.5), HALF_UNIT_DIAG), [[0, 0], [2, 3]]), + # completely overlaps one polygon, touches 2 others + (pygeos.buffer(pygeos.points(3, 3), HALF_UNIT_DIAG), [[0, 0, 0], [2, 3, 4]]), + # each point in multi point intersects a polygon in tree + (pygeos.multipoints([[5, 5], [7, 7]]), [[0, 0], [5, 7]]), + (pygeos.multipoints([[5.5, 5], [7, 7]]), [[0, 0], [5, 7]]), + (pygeos.multipoints([[5, 7], [7, 5]]), [[0], [6]]), + ], +) +def test_nearest_all_polygons(poly_tree, geometry, expected): + assert_array_equal(poly_tree.nearest_all(geometry), expected) + + +@pytest.mark.skipif(pygeos.geos_version < (3, 6, 0), reason="GEOS < 3.6") +@pytest.mark.parametrize( + "geometry,max_distance,expected", + [ + # using unset max_distance should return all nearest + (pygeos.points(0.5, 0.5), None, [[0, 0], [0, 1]]), + # using large max_distance should return all nearest + (pygeos.points(0.5, 0.5), 10, [[0, 0], [0, 1]]), + # using small max_distance should return no results + (pygeos.points(0.5, 0.5), 0.1, [[], []]), + # using small max_distance should only return results in that distance + ([pygeos.points(0.5, 0.5), pygeos.points(0, 0)], 0.1, [[1], [0]]), + ], +) +def test_nearest_all_max_distance(tree, geometry, max_distance, expected): + assert_array_equal(tree.nearest_all(geometry, max_distance=max_distance), expected) + + +@pytest.mark.skipif(pygeos.geos_version < (3, 6, 0), reason="GEOS < 3.6") +@pytest.mark.parametrize( + "geometry,max_distance", + [(pygeos.points(0.5, 0.5), 0), (pygeos.points(0.5, 0.5), -1)], +) +def test_nearest_all_invalid_max_distance(tree, geometry, max_distance): + with pytest.raises(ValueError, match="max_distance must be greater than 0"): + tree.nearest_all(geometry, max_distance=max_distance) + + +@pytest.mark.skipif(pygeos.geos_version < (3, 6, 0), reason="GEOS < 3.6") +@pytest.mark.parametrize( + "geometry,expected", + [(pygeos.points(0, 0), [0.0]), (pygeos.points(0.5, 0.5), [0.7071, 0.7071])], +) +def test_nearest_all_return_distance(tree, geometry, expected): + assert_array_equal( + np.round(tree.nearest_all(geometry, return_distance=True)[1], 4), expected + ) diff --git a/src/coords.c b/src/coords.c index acd7c195..24139925 100644 --- a/src/coords.c +++ b/src/coords.c @@ -14,6 +14,7 @@ #include "geos.h" #include "pygeom.h" + /* These function prototypes enables that these functions can call themselves */ static char get_coordinates(GEOSContextHandle_t, GEOSGeometry*, PyArrayObject*, npy_intp*, int); diff --git a/src/coords.h b/src/coords.h index b019dc7e..6861dfce 100644 --- a/src/coords.h +++ b/src/coords.h @@ -3,6 +3,9 @@ #include +#include "geos.h" + + extern PyObject* PyCountCoords(PyObject* self, PyObject* args); extern PyObject* PyGetCoords(PyObject* self, PyObject* args); extern PyObject* PySetCoords(PyObject* self, PyObject* args); diff --git a/src/geos.c b/src/geos.c index 9215152b..b5fc8495 100644 --- a/src/geos.c +++ b/src/geos.c @@ -5,6 +5,7 @@ #include #include +#include /* This initializes a globally accessible GEOSException object */ PyObject* geos_exception[1] = {NULL}; @@ -284,13 +285,13 @@ char check_to_wkt_compatible(GEOSContextHandle_t ctx, GEOSGeometry* geom) { /* GEOSInterpolate_r and GEOSInterpolateNormalized_r segfault on empty * geometries and also on collections with the first geometry empty. - * + * * This function returns: * - PGERR_GEOMETRY_TYPE on non-linear geometries * - PGERR_EMPTY_GEOMETRY on empty linear geometries * - PGERR_EXCEPTIONS on GEOS exceptions * - PGERR_SUCCESS on a non-empty and linear geometry - * + * * Note that GEOS 3.8 fixed this situation for empty LINESTRING/LINEARRING, * but it still segfaults on other empty geometries. */ @@ -346,3 +347,187 @@ void geos_error_handler(const char* message, void* userdata) { void geos_notice_handler(const char* message, void* userdata) { snprintf(userdata, 1024, "%s", message); } + + +/* Extract bounds from geometry. + * + * Bounds coordinates will be set to NPY_NAN if geom is NULL, empty, or does not have an + * envelope. + * + * Parameters + * ---------- + * ctx: GEOS context handle + * geom: pointer to GEOSGeometry; can be NULL + * xmin: pointer to xmin value + * ymin: pointer to ymin value + * xmax: pointer to xmax value + * ymax: pointer to ymax value + * + * Must be called from within a GEOS_INIT_THREADS / GEOS_FINISH_THREADS + * or GEOS_INIT / GEOS_FINISH block. + * + * Returns + * ------- + * 1 on success; 0 on error + */ +int get_bounds(GEOSContextHandle_t ctx, GEOSGeometry* geom, double* xmin, double* ymin, + double* xmax, double* ymax) { + + int retval = 1; + + if (geom == NULL || GEOSisEmpty_r(ctx, geom)) { + *xmin = *ymin = *xmax = *ymax = NPY_NAN; + return 1; + } + +#if GEOS_SINCE_3_7_0 + // use min / max coordinates + + if (!(GEOSGeom_getXMin_r(ctx, geom, xmin) && GEOSGeom_getYMin_r(ctx, geom, ymin) && + GEOSGeom_getXMax_r(ctx, geom, xmax) && GEOSGeom_getYMax_r(ctx, geom, ymax))) { + return 0; + } + +#else + // extract coordinates from envelope + + GEOSGeometry* envelope = NULL; + const GEOSGeometry* ring = NULL; + const GEOSCoordSequence* coord_seq = NULL; + int size; + + /* construct the envelope */ + envelope = GEOSEnvelope_r(ctx, geom); + if (envelope == NULL) { + return 0; + } + + size = GEOSGetNumCoordinates_r(ctx, envelope); + + /* get the bbox depending on the number of coordinates in the envelope */ + if (size == 0) { /* Envelope is empty */ + *xmin = *ymin = *xmax = *ymax = NPY_NAN; + } else if (size == 1) { /* Envelope is a point */ + if (!GEOSGeomGetX_r(ctx, envelope, xmin)) { + retval = 0; + goto finish; + } + if (!GEOSGeomGetY_r(ctx, envelope, ymin)) { + retval = 0; + goto finish; + } + *xmax = *xmin; + *ymax = *ymin; + } else if (size == 5) { /* Envelope is a box */ + ring = GEOSGetExteriorRing_r(ctx, envelope); + if (ring == NULL) { + retval = 0; + goto finish; + } + coord_seq = GEOSGeom_getCoordSeq_r(ctx, ring); + if (coord_seq == NULL) { + retval = 0; + goto finish; + } + if (!GEOSCoordSeq_getX_r(ctx, coord_seq, 0, xmin)) { + retval = 0; + goto finish; + } + if (!GEOSCoordSeq_getY_r(ctx, coord_seq, 0, ymin)) { + retval = 0; + goto finish; + } + if (!GEOSCoordSeq_getX_r(ctx, coord_seq, 2, xmax)) { + retval = 0; + goto finish; + } + if (!GEOSCoordSeq_getY_r(ctx, coord_seq, 2, ymax)) { + retval = 0; + goto finish; + } + } + +finish: + if (envelope != NULL) { + GEOSGeom_destroy_r(ctx, envelope); + } + +#endif + + return retval; +} + +/* Create a Polygon from bounding coordinates. + * + * Must be called from within a GEOS_INIT_THREADS / GEOS_FINISH_THREADS + * or GEOS_INIT / GEOS_FINISH block. + * + * Parameters + * ---------- + * ctx: GEOS context handle + * xmin: minimum X value + * ymin: minimum Y value + * xmax: maximum X value + * ymax: maximum Y value + * + * Returns + * ------- + * GEOSGeometry* on success (owned by caller) or + * NULL on failure or NPY_NAN coordinates + */ +GEOSGeometry* create_box(GEOSContextHandle_t ctx, double xmin, double ymin, double xmax, + double ymax) { + if (xmin == NPY_NAN || ymin == NPY_NAN || xmax == NPY_NAN || ymax == NPY_NAN) { + return NULL; + } + + GEOSCoordSequence* coords = NULL; + GEOSGeometry* geom = NULL; + GEOSGeometry* ring = NULL; + + // Construct coordinate sequence and set vertices + coords = GEOSCoordSeq_create_r(ctx, 5, 2); + if (coords == NULL) { + return NULL; + } + + if (!(GEOSCoordSeq_setX_r(ctx, coords, 0, xmin) && + GEOSCoordSeq_setY_r(ctx, coords, 0, ymin) && + GEOSCoordSeq_setX_r(ctx, coords, 1, xmax) && + GEOSCoordSeq_setY_r(ctx, coords, 1, ymin) && + GEOSCoordSeq_setX_r(ctx, coords, 2, xmax) && + GEOSCoordSeq_setY_r(ctx, coords, 2, ymax) && + GEOSCoordSeq_setX_r(ctx, coords, 3, xmin) && + GEOSCoordSeq_setY_r(ctx, coords, 3, ymax) && + GEOSCoordSeq_setX_r(ctx, coords, 4, xmin) && + GEOSCoordSeq_setY_r(ctx, coords, 4, ymin))) { + if (coords != NULL) { + GEOSCoordSeq_destroy_r(ctx, coords); + } + + return NULL; + } + + // Construct linear ring then use to construct polygon + // Note: coords are owned by ring + ring = GEOSGeom_createLinearRing_r(ctx, coords); + if (ring == NULL) { + if (coords != NULL) { + GEOSCoordSeq_destroy_r(ctx, coords); + } + + return NULL; + } + + // Note: ring is owned by polygon + geom = GEOSGeom_createPolygon_r(ctx, ring, NULL, 0); + if (geom == NULL) { + if (ring != NULL) { + GEOSGeom_destroy_r(ctx, ring); + } + + return NULL; + } + + return geom; +} diff --git a/src/geos.h b/src/geos.h index fa53d1e6..27a05838 100644 --- a/src/geos.h +++ b/src/geos.h @@ -139,4 +139,10 @@ extern char geos_interpolate_checker(GEOSContextHandle_t ctx, GEOSGeometry* geom extern int init_geos(PyObject* m); -#endif + +int get_bounds(GEOSContextHandle_t ctx, GEOSGeometry* geom, double* xmin, double* ymin, + double* xmax, double* ymax); +GEOSGeometry* create_box(GEOSContextHandle_t ctx, double xmin, double ymin, double xmax, + double ymax); + +#endif // _GEOS_H diff --git a/src/strtree.c b/src/strtree.c index 14f394ee..3743ee8e 100644 --- a/src/strtree.c +++ b/src/strtree.c @@ -2,6 +2,7 @@ #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION #include +#include #include #define NO_IMPORT_ARRAY @@ -11,6 +12,7 @@ #include #include +#include "coords.h" #include "geos.h" #include "kvec.h" #include "pygeom.h" @@ -553,6 +555,479 @@ static PyObject* STRtree_query_bulk(STRtreeObject* self, PyObject* args) { return (PyObject*)result; } +/* Callback called by strtree_query with item data of each intersecting geometry + * and a counter to increment each time this function is called. Used to prescreen + * geometries for intersections with the tree. + * + * Parameters + * ---------- + * item: address of intersected geometry in the tree geometries (_geoms) array. + * + * user_data: pointer to size_t counter incremented on every call to this function + * */ +void prescreen_query_callback(void* item, void* user_data) { (*(size_t*)user_data)++; } + +/* Calculate the distance between items in the tree and the src_geom. + * Note: this is only called by the tree after first evaluating the overlap between + * the the envelope of a tree node and the query geometry. It may not be called for + * all equidistant results. + * + * Parameters + * ---------- + * item1: address of geometry in tree geometries (_geoms) + * + * item2: pointer to GEOSGeometry* of query geometry + * + * distance: pointer to distance that gets updated in this function + * + * userdata: GEOS context handle. + * + * Returns + * ------- + * 0 on error (caller immediately exits and returns NULL); 1 on success + * */ +int nearest_distance_callback(const void* item1, const void* item2, double* distance, + void* userdata) { + GEOSGeometry* tree_geom = NULL; + const GEOSContextHandle_t* ctx = (GEOSContextHandle_t*)userdata; + + GeometryObject* tree_pg_geom = *((GeometryObject**)item1); + // Note: this is guarded for NULL during construction of tree; no need to check here. + get_geom(tree_pg_geom, &tree_geom); + + // distance returns 1 on success, 0 on error + return GEOSDistance_r(*ctx, (GEOSGeometry*)item2, tree_geom, distance); +} + +/* Calculate the distance between items in the tree and the src_geom. + * Note: this is only called by the tree after first evaluating the overlap between + * the the envelope of a tree node and the query geometry. It may not be called for + * all equidistant results. + * + * In order to force GEOS to check neighbors in adjacent tree nodes, a slight adjustment + * is added to the distance returned to the tree. Otherwise, the tree nearest neighbor + * algorithm terminates if the envelopes of adjacent tree nodes are not further than + * this distance, and not all equidistant or intersected neighbors are checked. The + * accurate distances are stored into the distance pairs in userdata. + * + * Parameters + * ---------- + * item1: address of geometry in tree geometries (_geoms) + * + * item2: pointer to GEOSGeometry* of query geometry + * + * distance: pointer to distance that gets updated in this function + * + * userdata: instance of tree_nearest_userdata_t, includes vector to cache nearest + * distance pairs visited by this function, GEOS context handle, and minimum observed + * distance. + * + * Returns + * ------- + * 0 on error (caller immediately exits and returns NULL); 1 on success + * */ + +int nearest_all_distance_callback(const void* item1, const void* item2, double* distance, + void* userdata) { + GEOSGeometry* tree_geom = NULL; + size_t pairs_size; + double calc_distance; + + GeometryObject* tree_pg_geom = *((GeometryObject**)item1); + // Note: this is guarded for NULL during construction of tree; no need to check here. + get_geom(tree_pg_geom, &tree_geom); + + tree_nearest_userdata_t* params = (tree_nearest_userdata_t*)userdata; + + // distance returns 1 on success, 0 on error + if (GEOSDistance_r(params->ctx, (GEOSGeometry*)item2, tree_geom, &calc_distance) == 0) { + return 0; + } + + // store any pairs that are smaller than the minimum distance observed so far + // and update min_distance + if (calc_distance <= params->min_distance) { + params->min_distance = calc_distance; + + // if smaller than last item in vector, remove that item first + pairs_size = kv_size(*(params->dist_pairs)); + if (pairs_size > 0 && + calc_distance < (kv_A(*(params->dist_pairs), pairs_size - 1)).distance) { + kv_pop(*(params->dist_pairs)); + } + + tree_geom_dist_vec_item_t dist_pair = + (tree_geom_dist_vec_item_t){(GeometryObject**)item1, calc_distance}; + kv_push(tree_geom_dist_vec_item_t, *(params->dist_pairs), dist_pair); + } + + // Set distance for callback + // This adds a slight adjustment to force checking of adjacent tree nodes; otherwise + // they are skipped by the GEOS nearest neighbor algorithm check against bounds + // of adjacent nodes. + *distance = calc_distance + 1e-6; + + return 1; +} + +#if GEOS_SINCE_3_6_0 + +/* Find the nearest singular item in the tree to each input geometry. + * Returns indices of source array and tree items. + * + * If there are multiple equidistant or intersected items, only one is returned, + * based on whichever nearest tree item is visited first by GEOS. + * + * Returns a tuple of empty arrays of shape (2,0) if tree is empty. + * + * + * Returns + * ------- + * ndarray of shape (2, n) with input indexes, tree indexes + * */ + +static PyObject* STRtree_nearest(STRtreeObject* self, PyObject* arr) { + PyArrayObject* pg_geoms; + GeometryObject* pg_geom = NULL; + GEOSGeometry* geom = NULL; + GeometryObject** nearest_result = NULL; + npy_intp i, n, size, geom_index; + char* head_ptr = (char*)self->_geoms; + PyArrayObject* result; + + // Indices of input and tree geometries + index_vec_t src_indexes; + index_vec_t nearest_indexes; + + if (self->ptr == NULL) { + PyErr_SetString(PyExc_RuntimeError, "Tree is uninitialized"); + return NULL; + } + + if (!PyArray_Check(arr)) { + PyErr_SetString(PyExc_TypeError, "Not an ndarray"); + return NULL; + } + + pg_geoms = (PyArrayObject*)arr; + if (!PyArray_ISOBJECT(pg_geoms)) { + PyErr_SetString(PyExc_TypeError, "Array should be of object dtype"); + return NULL; + } + + if (PyArray_NDIM(pg_geoms) != 1) { + PyErr_SetString(PyExc_TypeError, "Array should be one dimensional"); + return NULL; + } + + // If tree is empty, return empty arrays + if (self->count == 0) { + npy_intp index_dims[2] = {2, 0}; + result = (PyArrayObject*)PyArray_SimpleNew(2, index_dims, NPY_INTP); + return (PyObject*)result; + } + + n = PyArray_SIZE(pg_geoms); + + // preallocate arrays to size of input array + kv_init(src_indexes); + kv_resize(npy_intp, src_indexes, n); + kv_init(nearest_indexes); + kv_resize(npy_intp, nearest_indexes, n); + + GEOS_INIT_THREADS; + + for (i = 0; i < n; i++) { + // get pygeos geometry from input geometry array + pg_geom = *(GeometryObject**)PyArray_GETPTR1(pg_geoms, i); + if (!get_geom(pg_geom, &geom)) { + errstate = PGERR_NOT_A_GEOMETRY; + break; + } + if (geom == NULL || GEOSisEmpty_r(ctx, geom)) { + continue; + } + + // pass in ctx as userdata because we need it for distance calculation in + // the callback + nearest_result = (GeometryObject**)GEOSSTRtree_nearest_generic_r( + ctx, self->ptr, geom, geom, nearest_distance_callback, &ctx); + + // GEOSSTRtree_nearest_r returns NULL on error + if (nearest_result == NULL) { + errstate = PGERR_GEOS_EXCEPTION; + break; + } + + kv_push(npy_intp, src_indexes, i); + + // Calculate index using offset of its address compared to head of _geoms + geom_index = (npy_intp)(((char*)nearest_result - head_ptr) / sizeof(GeometryObject*)); + kv_push(npy_intp, nearest_indexes, geom_index); + } + + GEOS_FINISH_THREADS; + + if (errstate != PGERR_SUCCESS) { + kv_destroy(src_indexes); + kv_destroy(nearest_indexes); + return NULL; + } + + size = kv_size(src_indexes); + + // Create array of indexes + npy_intp index_dims[2] = {2, size}; + result = (PyArrayObject*)PyArray_SimpleNew(2, index_dims, NPY_INTP); + if (result == NULL) { + PyErr_SetString(PyExc_RuntimeError, "could not allocate numpy array"); + return NULL; + } + + for (i = 0; i < size; i++) { + // assign value into numpy arrays + *(npy_intp*)PyArray_GETPTR2(result, 0, i) = kv_A(src_indexes, i); + *(npy_intp*)PyArray_GETPTR2(result, 1, i) = kv_A(nearest_indexes, i); + } + + kv_destroy(src_indexes); + kv_destroy(nearest_indexes); + + return (PyObject*)result; +} + +/* Find the nearest item(s) in the tree to each input geometry. + * Returns indices of source array, tree items, and distance between them. + * + * If there are multiple equidistant or intersected items, all should be returned. + * Tree indexes are returned in the order they are visited, not necessarily in ascending + * order. + * + * Returns a tuple of empty arrays (shape (2,0), shape (0,)) if tree is empty. + * + * + * Returns + * ------- + * tuple of ([arr indexes (shape n), tree indexes (shape n)], distances (shape n)) + * */ + +static PyObject* STRtree_nearest_all(STRtreeObject* self, PyObject* args) { + PyObject* arr; + double max_distance = 0; // default of 0 indicates max_distance not set + int use_max_distance = 0; // flag for the above + PyArrayObject* pg_geoms; + GeometryObject* pg_geom = NULL; + GEOSGeometry* geom = NULL; + GEOSGeometry* envelope = NULL; + GeometryObject** nearest_result = NULL; + npy_intp i, n, size, geom_index; + size_t j, query_counter; + double xmin, ymin, xmax, ymax; + char* head_ptr = (char*)self->_geoms; + tree_nearest_userdata_t userdata; + double distance; + PyArrayObject* result_indexes; // array of [source index, tree index] + PyArrayObject* result_distances; // array of distances + PyObject* result; // tuple of (indexes array, distance array) + + // Indices of input geometries + index_vec_t src_indexes; + + // Pairs of addresses in tree geometries and distances; used in userdata + tree_geom_dist_vec_t dist_pairs; + + // Addresses in tree geometries (_geoms) that match tree + tree_geom_vec_t nearest_geoms; + + // Distances of nearest items + tree_dist_vec_t nearest_dist; + + if (self->ptr == NULL) { + PyErr_SetString(PyExc_RuntimeError, "Tree is uninitialized"); + return NULL; + } + + if (!PyArg_ParseTuple(args, "Od", &arr, &max_distance)) { + return NULL; + } + if (max_distance > 0) { + use_max_distance = 1; + } + + if (!PyArray_Check(arr)) { + PyErr_SetString(PyExc_TypeError, "Not an ndarray"); + return NULL; + } + + pg_geoms = (PyArrayObject*)arr; + if (!PyArray_ISOBJECT(pg_geoms)) { + PyErr_SetString(PyExc_TypeError, "Array should be of object dtype"); + return NULL; + } + + if (PyArray_NDIM(pg_geoms) != 1) { + PyErr_SetString(PyExc_TypeError, "Array should be one dimensional"); + return NULL; + } + + // If tree is empty, return empty arrays + if (self->count == 0) { + npy_intp index_dims[2] = {2, 0}; + result_indexes = (PyArrayObject*)PyArray_SimpleNew(2, index_dims, NPY_INTP); + + npy_intp distance_dims[1] = {0}; + result_distances = (PyArrayObject*)PyArray_SimpleNew(1, distance_dims, NPY_DOUBLE); + + result = PyTuple_New(2); + PyTuple_SET_ITEM(result, 0, (PyObject*)result_indexes); + PyTuple_SET_ITEM(result, 1, (PyObject*)result_distances); + return (PyObject*)result; + } + + n = PyArray_SIZE(pg_geoms); + + // preallocate arrays to size of input array; for a non-empty tree, there should be + // at least 1 nearest item per input geometry + kv_init(src_indexes); + kv_resize(npy_intp, src_indexes, n); + kv_init(nearest_geoms); + kv_resize(GeometryObject**, nearest_geoms, n); + kv_init(nearest_dist); + kv_resize(double, nearest_dist, n); + + GEOS_INIT_THREADS; + + // initialize userdata context and dist_pairs vector + userdata.ctx = ctx; + userdata.dist_pairs = &dist_pairs; + + for (i = 0; i < n; i++) { + // get pygeos geometry from input geometry array + pg_geom = *(GeometryObject**)PyArray_GETPTR1(pg_geoms, i); + if (!get_geom(pg_geom, &geom)) { + errstate = PGERR_NOT_A_GEOMETRY; + break; + } + if (geom == NULL || GEOSisEmpty_r(ctx, geom)) { + continue; + } + + if (use_max_distance) { + // if max_distance is defined, prescreen geometries using simple bbox expansion + if (get_bounds(ctx, geom, &xmin, &ymin, &xmax, &ymax) == 0) { + errstate = PGERR_GEOS_EXCEPTION; + kv_destroy(src_indexes); + kv_destroy(nearest_geoms); + kv_destroy(nearest_dist); + break; + } + + envelope = create_box(ctx, xmin - max_distance, ymin - max_distance, + xmax + max_distance, ymax + max_distance); + if (envelope == NULL) { + errstate = PGERR_GEOS_EXCEPTION; + kv_destroy(src_indexes); + kv_destroy(nearest_geoms); + kv_destroy(nearest_dist); + break; + } + + query_counter = 0; + GEOSSTRtree_query_r(ctx, self->ptr, envelope, prescreen_query_callback, + &query_counter); + + GEOSGeom_destroy_r(ctx, envelope); + + if (query_counter == 0) { + // no features are within max_distance, skip distance calculations + continue; + } + } + + // reset loop-dependent values of userdata + kv_init(dist_pairs); + userdata.min_distance = DBL_MAX; + + nearest_result = (GeometryObject**)GEOSSTRtree_nearest_generic_r( + ctx, self->ptr, geom, geom, nearest_all_distance_callback, &userdata); + + // GEOSSTRtree_nearest_generic_r returns NULL on error + if (nearest_result == NULL) { + errstate = PGERR_GEOS_EXCEPTION; + kv_destroy(dist_pairs); + kv_destroy(src_indexes); + kv_destroy(nearest_geoms); + kv_destroy(nearest_dist); + break; + } + + for (j = 0; j < kv_size(dist_pairs); j++) { + distance = kv_A(dist_pairs, j).distance; + + // only keep entries from the smallest distances for this input geometry + // only keep entries within max_distance, if nonzero + // Note: there may be multiple equidistant or intersected tree items + if (distance <= userdata.min_distance && + (!use_max_distance || distance <= max_distance)) { + kv_push(npy_intp, src_indexes, i); + kv_push(GeometryObject**, nearest_geoms, kv_A(dist_pairs, j).geom); + kv_push(double, nearest_dist, distance); + } + } + + kv_destroy(dist_pairs); + } + + GEOS_FINISH_THREADS; + + if (errstate != PGERR_SUCCESS) { + return NULL; + } + + size = kv_size(src_indexes); + + // Create array of indexes + npy_intp index_dims[2] = {2, size}; + result_indexes = (PyArrayObject*)PyArray_SimpleNew(2, index_dims, NPY_INTP); + if (result_indexes == NULL) { + PyErr_SetString(PyExc_RuntimeError, "could not allocate numpy array"); + return NULL; + } + + // Create array of distances + npy_intp distance_dims[1] = {size}; + result_distances = (PyArrayObject*)PyArray_SimpleNew(1, distance_dims, NPY_DOUBLE); + if (result_distances == NULL) { + PyErr_SetString(PyExc_RuntimeError, "could not allocate numpy array"); + return NULL; + } + + for (i = 0; i < size; i++) { + // assign value into numpy arrays + *(npy_intp*)PyArray_GETPTR2(result_indexes, 0, i) = kv_A(src_indexes, i); + + // Calculate index using offset of its address compared to head of _geoms + geom_index = + (npy_intp)(((char*)kv_A(nearest_geoms, i) - head_ptr) / sizeof(GeometryObject*)); + *(npy_intp*)PyArray_GETPTR2(result_indexes, 1, i) = geom_index; + + *(double*)PyArray_GETPTR1(result_distances, i) = kv_A(nearest_dist, i); + } + + kv_destroy(src_indexes); + kv_destroy(nearest_geoms); + kv_destroy(nearest_dist); + + // return a tuple of (indexes array, distances array) + result = PyTuple_New(2); + PyTuple_SET_ITEM(result, 0, (PyObject*)result_indexes); + PyTuple_SET_ITEM(result, 1, (PyObject*)result_distances); + + return (PyObject*)result; +} + +#endif // GEOS_SINCE_3_6_0 + static PyMemberDef STRtree_members[] = { {"_ptr", T_PYSSIZET, offsetof(STRtreeObject, ptr), READONLY, "Pointer to GEOSSTRtree"}, @@ -570,6 +1045,12 @@ static PyMethodDef STRtree_methods[] = { "Queries the index for all items whose extents intersect the given search " "geometries, and optionally tests them " "against predicate function if provided. "}, +#if GEOS_SINCE_3_6_0 + {"nearest", (PyCFunction)STRtree_nearest, METH_O, + "Queries the index for the nearest item to each of the given search geometries"}, + {"nearest_all", (PyCFunction)STRtree_nearest_all, METH_VARARGS, + "Queries the index for all nearest item(s) to each of the given search geometries"}, +#endif {NULL} /* Sentinel */ }; diff --git a/src/strtree.h b/src/strtree.h index 23afe789..08169d98 100644 --- a/src/strtree.h +++ b/src/strtree.h @@ -12,6 +12,33 @@ typedef struct { GeometryObject*** a; } tree_geom_vec_t; +/* A struct to hold pairs of GeometryObject** and distance for use in STRtree::nearest_all */ +typedef struct { + GeometryObject** geom; + double distance; +} tree_geom_dist_vec_item_t; + +/* A resizeable vector with pairs of GeometryObject** and distance for use in + * STRtree::nearest_all */ +typedef struct { + size_t n, m; + tree_geom_dist_vec_item_t* a; +} tree_geom_dist_vec_t; + +/* A resizeable vector with distances to nearest tree items */ +typedef struct { + size_t n, m; + double* a; +} tree_dist_vec_t; + +/* A struct to hold userdata argument data for distance_callback used by + * GEOSSTRtree_nearest_generic_r */ +typedef struct { + GEOSContextHandle_t ctx; + tree_geom_dist_vec_t* dist_pairs; + double min_distance; +} tree_nearest_userdata_t; + typedef struct { PyObject_HEAD void* ptr; npy_intp count; // count of geometries added to the tree