Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: query tree with multiple input geometries #108

Merged
merged 11 commits into from Mar 18, 2020
99 changes: 89 additions & 10 deletions pygeos/strtree.py
Expand Up @@ -36,16 +36,16 @@ class STRtree:
Examples
--------
>>> import pygeos
>>> geoms = pygeos.points(np.arange(10), np.arange(10))
>>> tree = pygeos.STRtree(geoms)
>>> geom = pygeos.box(2, 2, 4, 4)
>>> # Query geometries that overlap envelope of `geom`:
>>> tree.query(geom).tolist()
>>> tree = pygeos.STRtree(pygeos.points(np.arange(10), np.arange(10)))
>>> # Query geometries that overlap envelope of input geometries:
>>> tree.query(pygeos.box(2, 2, 4, 4)).tolist()
[2, 3, 4]
>>> # Query geometries that overlap envelope of `geom`
>>> # and are contained by `geom`:
>>> tree.query(geom, predicate='contains').tolist()
>>> # Query geometries that are contained by input geometry:
>>> tree.query(pygeos.box(2, 2, 4, 4), predicate='contains').tolist()
[3]
>>> # Query geometries that overlap envelopes of `geoms`
>>> tree.query_bulk([pygeos.box(2, 2, 4, 4), pygeos.box(5, 5, 6, 6)])
(array([0, 0, 0, 1, 1]), array([2, 3, 4, 5, 6]))
"""

def __init__(self, geometries, leafsize=5):
Expand All @@ -56,8 +56,8 @@ def __len__(self):
return self._tree.count

def query(self, geometry, predicate=None):
"""Return all items whose extent intersect the envelope of the input
geometry.
"""Return the index of all geometries in the tree with extents that
intersect the envelope of the input geometry.

If predicate is provided, a prepared version of the input geometry
is tested using the predicate function against each item whose
Expand All @@ -74,6 +74,21 @@ def query(self, geometry, predicate=None):
predicate : {None, 'intersects', 'within', 'contains', 'overlaps', 'crosses', 'touches'}, optional
The predicate to use for testing geometries from the tree
that are within the input geometry's envelope.

Returns
-------
ndarray
Indexes of geometries in tree

Examples
--------
>>> import pygeos
>>> tree = pygeos.STRtree(pygeos.points(np.arange(10), np.arange(10)))
>>> tree.query(pygeos.box(1,1, 3,3)).tolist()
[1, 2, 3]
>>> # Query geometries that are contained by input geometry
>>> tree.query(pygeos.box(2, 2, 4, 4), predicate='contains').tolist()
[3]
"""

if geometry is None:
Expand All @@ -93,3 +108,67 @@ def query(self, geometry, predicate=None):
predicate = BinaryPredicate[predicate].value

return self._tree.query(geometry, predicate)

def query_bulk(self, geometries, predicate=None):
"""Returns all combinations of input geometries and geometries in the tree
where the envelope of each input geometry intersects with the envelope of a
tree geometry.

If predicate is provided, a prepared version of each input geometry
is tested using the predicate function against each item whose
extent intersects the envelope of the input geometry:
predicate(geometry, tree_geometry).

This returns two arrays of equal length, which correspond to the indexes
of the input geometries and indexes of the tree geometries associated
each.

In the context of a spatial join, input geometries are the "left" geometries
that determine the order of the results, and tree geometries are "right" geometries
that are joined against the left geometries. This effectively performs
an inner join, where only those combinations of geometries that can be joined
based on envelope overlap or optional predicate are returned.

Any geometry that is None or empty in the input geometries is omitted from
the output.

Parameters
----------
geometry : Geometry
The envelope of each geometry is taken automatically for
querying the tree.
predicate : {None, 'intersects', 'within', 'contains', 'overlaps', 'crosses', 'touches'}, optional
The predicate to use for testing geometries from the tree
that are within the input geometry's envelope.

Returns
-------
ndarray, ndarray
The first array contains input geometry indexes.
The second array contains tree geometry indexes.

Examples
--------
>>> import pygeos
>>> tree = pygeos.STRtree(pygeos.points(np.arange(10), np.arange(10)))
>>> tree.query_bulk([pygeos.box(2, 2, 4, 4), pygeos.box(5, 5, 6, 6)])
(array([0, 0, 0, 1, 1]), array([2, 3, 4, 5, 6]))
>>> # Query for geometries that contain tree geometries
>>> tree.query_bulk([pygeos.box(2, 2, 4, 4), pygeos.box(5, 5, 6, 6)], predicate='contains')
(array([0]), array([3]))
"""

jorisvandenbossche marked this conversation as resolved.
Show resolved Hide resolved
if predicate is None:
predicate = 0

else:
if not predicate in VALID_PREDICATES:
raise ValueError(
"Predicate {} is not valid; must be one of {}".format(
predicate, ", ".join(VALID_PREDICATES)
)
)

predicate = BinaryPredicate[predicate].value

return self._tree.query_bulk(np.asarray(geometries), predicate)
234 changes: 225 additions & 9 deletions pygeos/test/test_strtree.py
Expand Up @@ -419,7 +419,6 @@ def test_query_contains_polygons(poly_tree, geometry, expected):
assert_array_equal(poly_tree.query(geometry, predicate="contains"), expected)



### predicate == 'overlaps'
# Overlaps only returns results where geometries are of same dimensions
# and do not completely contain each other.
Expand Down Expand Up @@ -479,11 +478,11 @@ def test_query_overlaps_lines(line_tree, geometry, expected):
# box overlaps 2 polygons
(box(0, 0, 1, 1), [0, 1]),
# larger box intersects 3 polygons and contains one
(box(0, 0, 2, 2), [0,2]),
(box(0, 0, 2, 2), [0, 2]),
# buffer overlaps 3 polygons and contains 1
(pygeos.buffer(pygeos.points(3, 3), HALF_UNIT_DIAG), [2,4]),
(pygeos.buffer(pygeos.points(3, 3), HALF_UNIT_DIAG), [2, 4]),
# larger buffer overlaps 6 polygons (touches midpoints) but contains one
(pygeos.buffer(pygeos.points(3, 3), 3 * HALF_UNIT_DIAG), [1,2,4,5]),
(pygeos.buffer(pygeos.points(3, 3), 3 * HALF_UNIT_DIAG), [1, 2, 4, 5]),
# one of two points intersects but different dimensions
(pygeos.multipoints([[5, 7], [7, 7]]), []),
],
Expand All @@ -492,7 +491,6 @@ def test_query_overlaps_polygons(poly_tree, geometry, expected):
assert_array_equal(poly_tree.query(geometry, predicate="overlaps"), expected)



### predicate == 'crosses'
# Only valid for certain geometry combinations
# See: https://postgis.net/docs/ST_Crosses.html
Expand Down Expand Up @@ -606,16 +604,234 @@ def test_query_touches_lines(line_tree, geometry, expected):
# point within first polygon
(pygeos.points(0, 0.5), []),
# point is at edge of first polygon
(pygeos.points(HALF_UNIT_DIAG+EPS, 0), [0]),
(pygeos.points(HALF_UNIT_DIAG + EPS, 0), [0]),
# box overlaps envelope of 2 polygons does not touch any at edge
(box(0, 0, 1, 1), []),
# box overlaps 2 polygons and touches edge of first
(box(HALF_UNIT_DIAG+EPS, 0, 2, 2), [0]),
(box(HALF_UNIT_DIAG + EPS, 0, 2, 2), [0]),
# buffer overlaps 3 polygons but does not touch any at edge
(pygeos.buffer(pygeos.points(3, 3), HALF_UNIT_DIAG+EPS), []),
(pygeos.buffer(pygeos.points(3, 3), HALF_UNIT_DIAG + EPS), []),
# only one point of multipoint within polygon but does not touch
(pygeos.multipoints([[0, 0], [7,7], [7, 8]]), []),
(pygeos.multipoints([[0, 0], [7, 7], [7, 8]]), []),
],
)
def test_query_touches_polygons(poly_tree, geometry, expected):
assert_array_equal(poly_tree.query(geometry, predicate="touches"), expected)


### Bulk query tests
@pytest.mark.parametrize(
"geometry", [None, pygeos.points(0.5, 0.5), [[pygeos.points(0.5, 0.5)]]]
)
def test_query_bulk_wrong_dimemsions(tree, geometry):
with pytest.raises(TypeError, match="Array should be one dimensional") as ex:
brendan-ward marked this conversation as resolved.
Show resolved Hide resolved
tree.query_bulk(geometry)


@pytest.mark.parametrize("geometry", [[], "foo", 1])
def test_query_bulk_wrong_type(tree, geometry):
with pytest.raises(TypeError, match="Array should be of object dtype") as ex:
brendan-ward marked this conversation as resolved.
Show resolved Hide resolved
tree.query_bulk(geometry)


@pytest.mark.parametrize(
"geometry,expected",
[
# points do not intersect
([pygeos.points(0.5, 0.5)], [[], []]),
# points intersect
([pygeos.points(1, 1)], [[0], [1]]),
# first and last points intersect
(
[pygeos.points(1, 1), pygeos.points(-1, -1), pygeos.points(2, 2)],
[[0, 2], [1, 2]],
),
# box contains points
([box(0, 0, 1, 1)], [[0, 0], [0, 1]]),
# bigger box contains more points
([box(5, 5, 15, 15)], [[0, 0, 0, 0, 0], [5, 6, 7, 8, 9]]),
# first and last boxes contains points
(
[box(0, 0, 1, 1), box(100, 100, 110, 110), box(5, 5, 15, 15)],
[[0, 0, 2, 2, 2, 2, 2], [0, 1, 5, 6, 7, 8, 9]],
),
# envelope of buffer contains points
([pygeos.buffer(pygeos.points(3, 3), 1)], [[0, 0, 0], [2, 3, 4]]),
# envelope of points contains points
([pygeos.multipoints([[5, 7], [7, 5]])], [[0, 0, 0], [5, 6, 7]]),
],
)
def test_query_bulk_points(tree, geometry, expected):
assert_array_equal(tree.query_bulk(geometry), expected)


@pytest.mark.parametrize(
"geometry,expected",
[
# point intersects first line
([pygeos.points(0, 0)], [[0], [0]]),
([pygeos.points(0.5, 0.5)], [[0], [0]]),
# point within envelope of first line
([pygeos.points(0, 0.5)], [[0], [0]]),
# point at shared vertex between 2 lines
([pygeos.points(1, 1)], [[0, 0], [0, 1]]),
# box overlaps envelope of first 2 lines (touches edge of 1)
([box(0, 0, 1, 1)], [[0, 0], [0, 1]]),
# envelope of buffer overlaps envelope of 2 lines
([pygeos.buffer(pygeos.points(3, 3), 0.5)], [[0, 0], [2, 3]]),
# envelope of points overlaps 5 lines (touches edge of 2 envelopes)
([pygeos.multipoints([[5, 7], [7, 5]])], [[0, 0, 0, 0], [4, 5, 6, 7]]),
],
)
def test_query_bulk_lines(line_tree, geometry, expected):
assert_array_equal(line_tree.query_bulk(geometry), expected)


@pytest.mark.parametrize(
"geometry,expected",
[
# point intersects edge of envelopes of 2 polygons
([pygeos.points(0.5, 0.5)], [[0, 0], [0, 1]]),
# point intersects single polygon
([pygeos.points(1, 1)], [[0], [1]]),
# box overlaps envelope of 2 polygons
([box(0, 0, 1, 1)], [[0, 0], [0, 1]]),
# first and last boxes overlap envelope of 2 polyons
(
[box(0, 0, 1, 1), box(100, 100, 110, 110), box(2, 2, 3, 3)],
[[0, 0, 2, 2], [0, 1, 2, 3]],
),
# larger box overlaps envelope of 3 polygons
([box(0, 0, 1.5, 1.5)], [[0, 0, 0], [0, 1, 2]]),
# envelope of buffer overlaps envelope of 3 polygons
([pygeos.buffer(pygeos.points(3, 3), HALF_UNIT_DIAG)], [[0, 0, 0], [2, 3, 4]]),
# envelope of larger buffer overlaps envelope of 6 polygons
(
[pygeos.buffer(pygeos.points(3, 3), 3 * HALF_UNIT_DIAG)],
[[0, 0, 0, 0, 0], [1, 2, 3, 4, 5]],
),
# envelope of points overlaps 3 polygons
([pygeos.multipoints([[5, 7], [7, 5]])], [[0, 0, 0], [5, 6, 7]]),
],
)
def test_query_bulk_polygons(poly_tree, geometry, expected):
assert_array_equal(poly_tree.query_bulk(geometry), expected)


def test_query_invalid_predicate(tree):
with pytest.raises(ValueError):
tree.query_bulk(pygeos.points(1, 1), predicate="bad_predicate")


### predicate == 'intersects'

# TEMPORARY xfail: MultiPoint intersects with prepared geometries does not work
# properly on GEOS 3.5.x; it was fixed in 3.6+
@pytest.mark.parametrize(
"geometry,expected",
[
# points do not intersect
([pygeos.points(0.5, 0.5)], [[], []]),
# points intersect
([pygeos.points(1, 1)], [[0], [1]]),
# box contains points
([box(3, 3, 6, 6)], [[0, 0, 0, 0], [3, 4, 5, 6]]),
# first and last boxes contain points
(
[box(0, 0, 1, 1), box(100, 100, 110, 110), box(3, 3, 6, 6)],
[[0, 0, 2, 2, 2, 2], [0, 1, 3, 4, 5, 6]],
),
# envelope of buffer contains more points than intersect buffer
# due to diagonal distance
([pygeos.buffer(pygeos.points(3, 3), 1)], [[0], [3]]),
# envelope of buffer with 1/2 distance between points should intersect
# same points as envelope
(
[pygeos.buffer(pygeos.points(3, 3), 3 * HALF_UNIT_DIAG)],
[[0, 0, 0], [2, 3, 4]],
),
# multipoints intersect
pytest.param(
[pygeos.multipoints([[5, 5], [7, 7]])],
[[0, 0], [5, 7]],
marks=pytest.mark.xfail(reason="GEOS 3.5"),
),
# envelope of points contains points, but points do not intersect
([pygeos.multipoints([[5, 7], [7, 5]])], [[], []]),
# only one point of multipoint intersects
pytest.param(
[pygeos.multipoints([[5, 7], [7, 7]])],
[[0], [7]],
marks=pytest.mark.xfail(reason="GEOS 3.5"),
),
],
)
def test_query_bulk_intersects_points(tree, geometry, expected):
assert_array_equal(tree.query_bulk(geometry, predicate="intersects"), expected)


@pytest.mark.parametrize(
"geometry,expected",
[
# point intersects first line
([pygeos.points(0, 0)], [[0], [0]]),
([pygeos.points(0.5, 0.5)], [[0], [0]]),
# point within envelope of first line but does not intersect
([pygeos.points(0, 0.5)], [[], []]),
# point at shared vertex between 2 lines
([pygeos.points(1, 1)], [[0, 0], [0, 1]]),
# box overlaps envelope of first 2 lines (touches edge of 1)
([box(0, 0, 1, 1)], [[0, 0], [0, 1]]),
# first and last boxes overlap multiple lines each
(
[box(0, 0, 1, 1), box(100, 100, 110, 110), box(2, 2, 3, 3)],
[[0, 0, 2, 2, 2], [0, 1, 1, 2, 3]],
),
# buffer intersects 2 lines
([pygeos.buffer(pygeos.points(3, 3), 0.5)], [[0, 0], [2, 3]]),
# buffer intersects midpoint of line at tangent
([pygeos.buffer(pygeos.points(2, 1), HALF_UNIT_DIAG)], [[0], [1]]),
# envelope of points overlaps lines but intersects none
([pygeos.multipoints([[5, 7], [7, 5]])], [[], []]),
# only one point of multipoint intersects
([pygeos.multipoints([[5, 7], [7, 7]])], [[0, 0], [6, 7]]),
],
)
def test_query_bulk_intersects_lines(line_tree, geometry, expected):
assert_array_equal(line_tree.query_bulk(geometry, predicate="intersects"), expected)


@pytest.mark.parametrize(
"geometry,expected",
[
# point within first polygon
([pygeos.points(0, 0.5)], [[0], [0]]),
([pygeos.points(0.5, 0)], [[0], [0]]),
# midpoint between two polygons intersects both
([pygeos.points(0.5, 0.5)], [[0, 0], [0, 1]]),
# point intersects single polygon
([pygeos.points(1, 1)], [[0], [1]]),
# box overlaps envelope of 2 polygons
([box(0, 0, 1, 1)], [[0, 0], [0, 1]]),
# first and last boxes overlap
(
[box(0, 0, 1, 1), box(100, 100, 110, 110), box(2, 2, 3, 3)],
[[0, 0, 2, 2], [0, 1, 2, 3]],
),
# larger box intersects 3 polygons
([box(0, 0, 1.5, 1.5)], [[0, 0, 0], [0, 1, 2]]),
# buffer overlaps 3 polygons
([pygeos.buffer(pygeos.points(3, 3), HALF_UNIT_DIAG)], [[0, 0, 0], [2, 3, 4]]),
# larger buffer overlaps 6 polygons (touches midpoints)
(
[pygeos.buffer(pygeos.points(3, 3), 3 * HALF_UNIT_DIAG)],
[[0, 0, 0, 0, 0], [1, 2, 3, 4, 5]],
),
# envelope of points overlaps polygons, but points do not intersect
([pygeos.multipoints([[5, 7], [7, 5]])], [[], []]),
# only one point of multipoint within polygon
([pygeos.multipoints([[5, 7], [7, 7]])], [[0], [7]]),
],
)
def test_query_bulk_intersects_polygons(poly_tree, geometry, expected):
assert_array_equal(poly_tree.query_bulk(geometry, predicate="intersects"), expected)