Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

STRtree with a reverse mapping and query_items/query_geoms methods #1112

Merged
merged 9 commits into from
Jul 12, 2021
250 changes: 173 additions & 77 deletions shapely/strtree.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,39 +19,41 @@
"""

import ctypes
import logging
from warnings import warn

from shapely.errors import ShapelyDeprecationWarning
from shapely.geos import lgeos

log = logging.getLogger(__name__)


class STRtree:
"""
An STRtree is a spatial index; specifically, an R-tree created
using the Sort-Tile-Recursive algorithm.
"""An STR-packed R-tree spatial index.

Pass a list of geometry objects to the `STRtree` constructor to
create a spatial index. References to these indexed objects are
kept and stored in the R-tree. You can query them with another
geometric object.
An index is initialized from a sequence of geometry objects or
(geometry, item) pairs. The items must be integers and are stored in
nodes of the tree. Stored items and corresponding geometry objects
can be spatially queried using another geometric object.

The `STRtree` is query-only, meaning that once created
you cannot add or remove geometries.
The tree is immutable and query-only, meaning that once created
nodes cannot be added or removed.

*New in version 1.4.0*.

Parameters
----------
geoms : sequence of geometry objects
geometry objects to be indexed
initdata : sequence
A sequence of single geometry objects or (geometry, item) pairs.
The items must be integers and typically serve as identifiers in
an application.

Examples
--------

Creating an index of pologons:
Creating an index of polygons:

>>> from shapely.strtree import STRtree
>>> from shapely.geometry import Polygon, Point
>>> from shapely.geometry import Polygon
>>>
>>> polys = [Polygon(((0, 0), (1, 0), (1, 1))),
... Polygon(((0, 1), (0, 0), (1, 0))),
Expand All @@ -66,78 +68,94 @@ class STRtree:
>>> polys[2] in result
False

Behavior if an `STRtree` is created empty:
Notes
-----
The class maintains a reverse mapping of integer items to geometries
and for performance reasons stores only integers in the STRtree from
the GEOS C API. The tree is filled using the Sort-Tile-Recursive
[1]_ algorithm.

References
----------
.. [1] Leutenegger, Scott T.; Edgington, Jeffrey M.; Lopez, Mario A.
(February 1997). "STR: A Simple and Efficient Algorithm for R-Tree
Packing". https://ia600900.us.archive.org/27/items/nasa_techdoc_19970016975/19970016975.pdf

>>> tree = STRtree([])
>>> tree.query(Point(0, 0))
[]
>>> print(tree.nearest(Point(0, 0)))
None
"""

def __init__(self, geoms):
def __init__(self, initdata):
warn(
"STRtree will be completely changed in 2.0.0. The exact API is not yet decided, but will be documented before 1.8.0",
"STRtree will be changed in 2.0.0. The exact API is not yet decided, but will be documented before 1.8.0",
ShapelyDeprecationWarning,
stacklevel=2,
)
# filter empty geometries out of the input
geoms = [geom for geom in geoms if not geom.is_empty]
self._n_geoms = len(geoms)

self._init_tree_handle(geoms)

# Keep references to geoms.
self._geoms = list(geoms)

def _init_tree_handle(self, geoms):
node_capacity = 10
self._tree_handle = lgeos.GEOSSTRtree_create(node_capacity)
for geom in geoms:
lgeos.GEOSSTRtree_insert(self._tree_handle, geom._geom, ctypes.py_object(geom))
self._tree = None
self._rev = None
if initdata:
self._rev = {
idx: geom
for geom, idx in self._iterinitdata(initdata)
if not geom.is_empty
}
if self._rev:
self._init_tree(self._rev.items())

def _iterinitdata(self, initdata):
for enum_idx, item in enumerate(initdata):
if isinstance(item, tuple): # a geom, idx pair
yield item[:2]
else: # a single geometry
yield (item, enum_idx)
sgillies marked this conversation as resolved.
Show resolved Hide resolved

def _init_tree(self, rev_initdata):
if rev_initdata:
node_capacity = 10
self._tree = lgeos.GEOSSTRtree_create(node_capacity)
for idx, geom in rev_initdata:
lgeos.GEOSSTRtree_insert(self._tree, geom._geom, ctypes.py_object(idx))

def __getstate__(self):
state = self.__dict__.copy()
del state["_tree_handle"]
del state["_tree"]
return state

def __setstate__(self, state):
self.__dict__.update(state)
self._init_tree_handle(self._geoms)
if self._rev:
self._init_tree(self._rev.items())

def __del__(self):
if self._tree_handle is not None:
if self._tree is not None:
try:
lgeos.GEOSSTRtree_destroy(self._tree_handle)
lgeos.GEOSSTRtree_destroy(self._tree)
except AttributeError:
pass # lgeos might be empty on shutdown.

self._tree_handle = None
self._tree = None

def query(self, geom):
"""
Search the index for geometry objects whose extents
intersect the extent of the given object.
def query_item(self, geom):
sgillies marked this conversation as resolved.
Show resolved Hide resolved
"""Query for nodes which intersect the geom's envelope to get
stored items.

Items are integers serving as identifiers for an application.

Parameters
----------
geom : geometry object
The query geometry
The query geometry.

Returns
-------
list of geometry objects
All the geometry objects in the index whose extents
intersect the extent of `geom`.
Collection
A list or array of ints.

Note
----
A geometry object's "extent" is its the minimum xy bounding
A geometry object's "envelope" is its minimum xy bounding
rectangle.

Examples
--------

A buffer around a point can be used to control the extent
of the query.

Expand All @@ -160,45 +178,76 @@ def query(self, geom):
>>> [o.wkt for o in tree.query(query_geom) if o.intersects(query_geom)]
['POINT (2 2)']

To get the original indices of the returned objects, create an
auxiliary dictionary. But use the geometry *ids* as keys since
the shapely geometry objects themselves are not hashable.

>>> index_by_id = dict((id(pt), i) for i, pt in enumerate(points))
>>> [(index_by_id[id(pt)], pt.wkt) for pt in tree.query(Point(2,2).buffer(1.0))]
[(1, 'POINT (1 1)'), (2, 'POINT (2 2)'), (3, 'POINT (3 3)')]
"""
if self._n_geoms == 0:
if self._tree is None or not self._rev:
return []

result = []

def callback(item, userdata):
geom = ctypes.cast(item, ctypes.py_object).value
result.append(geom)

lgeos.GEOSSTRtree_query(self._tree_handle, geom._geom, lgeos.GEOSQueryCallback(callback), None)
idx = ctypes.cast(item, ctypes.py_object).value
result.append(idx)

lgeos.GEOSSTRtree_query(
self._tree, geom._geom, lgeos.GEOSQueryCallback(callback), None
)
return result

def nearest(self, geom):
def query_geom(self, geom):
sgillies marked this conversation as resolved.
Show resolved Hide resolved
"""Query for nodes which intersect the geom's envelope to get
geometries corresponding to the items stored in the nodes.

Parameters
----------
geom : geometry object
The query geometry.

Returns
-------
Collection
A list or array of geometry objects.

"""
Get the nearest object in the index to a geometry object.
items = self.query_item(geom)
return [self._rev[idx] for idx in items]

def query(self, geom):
"""Query for nodes which intersect the geom's envelope to get
geometries corresponding to the items stored in the nodes.

This method is an alias for query_geom. It may be removed in
version 2.0.

Parameters
----------
geom : geometry object
The query geometry
The query geometry.

Returns
-------
geometry object
The nearest geometry object in the index to `geom`.
Collection
A list or array of geometry objects.

"""
return self.query_geom(geom)

def nearest_item(self, geom):
"""Query the tree for the node nearest to geom and get the item
stored in the node.

Items are integers serving as identifiers for an application.

Parameters
----------
geom : geometry object
The query geometry.

Will always only return *one* object even if several
in the index are the minimum distance away.
Returns
-------
int or None

`None` if the index is empty.
None is returned if this index is empty. This may change in
version 2.0.

Examples
--------
Expand All @@ -213,24 +262,71 @@ def nearest(self, geom):
>>> tree = STRtree ([Point(0, 0), Point(0, 0)])
>>> tree.nearest(Point(0, 0)).wkt
'POINT (0 0)'

"""
if self._n_geoms == 0:
if self._tree is None or not self._rev:
return None
Copy link
Contributor Author

@sgillies sgillies Mar 30, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Raising an exception here could be less ambiguous than returning None, which is easy to confuse with an empty sequence (a perfectly normal query result). But that's an API change to save for 2.0.


envelope = geom.envelope

def callback(item1, item2, distance, userdata):
try:
geom1 = ctypes.cast(item1, ctypes.py_object).value
idx = ctypes.cast(item1, ctypes.py_object).value
geom2 = ctypes.cast(item2, ctypes.py_object).value
dist = ctypes.cast(distance, ctypes.POINTER(ctypes.c_double))
lgeos.GEOSDistance(geom1._geom, geom2._geom, dist)
lgeos.GEOSDistance(self._rev[idx]._geom, geom2._geom, dist)
return 1
except Exception:
log.exception("Caught exception")
return 0

item = lgeos.GEOSSTRtree_nearest_generic(self._tree_handle, ctypes.py_object(geom), envelope._geom, \
lgeos.GEOSDistanceCallback(callback), None)
item = lgeos.GEOSSTRtree_nearest_generic(
self._tree,
ctypes.py_object(geom),
envelope._geom,
lgeos.GEOSDistanceCallback(callback),
None,
)
result = ctypes.cast(item, ctypes.py_object).value

return result

def nearest_geom(self, geom):
sgillies marked this conversation as resolved.
Show resolved Hide resolved
"""Query the tree for the node nearest to geom and get the
geometry corresponding to the item stored in the node.

Parameters
----------
geom : geometry object
The query geometry.

Returns
-------
int or None

None is returned if this index is empty. This may change in
version 2.0.

"""
return self._rev[self.nearest_item(geom)]

def nearest(self, geom):
"""Query the tree for the node nearest to geom and get the
geometry corresponding to the item stored in the node.

This method is an alias for nearest_geom. It may be removed in
version 2.0.

Parameters
----------
geom : geometry object
The query geometry.

Returns
-------
int or None

None is returned if this index is empty. This may change in
version 2.0.

"""
return self.nearest_geom(geom)