Skip to content

Commit

Permalink
Accept indices in collection constructors (#290)
Browse files Browse the repository at this point in the history
  • Loading branch information
caspervdw committed Mar 20, 2021
1 parent ce0c5ab commit d7ab2df
Show file tree
Hide file tree
Showing 6 changed files with 267 additions and 12 deletions.
4 changes: 3 additions & 1 deletion CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,15 @@ Version 0.10 (unreleased)

* Addition of ``nearest`` and ``nearest_all`` functions to ``STRtree`` for
GEOS >= 3.6 to find the nearest neighbors (#272).
* Enable bulk construction of collections with different number of geometries
by optionally taking an index arrays in the constructors ``multipoints``,
``multilinestrings``, ``multipolygons``, and ``geometrycollections`` (#290).
* Released GIL for ``points``, ``linestrings``, ``linearrings``, and
``polygons`` (without holes) (#310).
* Updated ``box`` ufunc to use internal C function for creating polygon
(about 2x faster) and added ``ccw`` parameter to create polygon in
counterclockwise (default) or clockwise direction (#308).


**API Changes**

* STRtree default leaf size is now 10 instead of 5, for somewhat better performance
Expand Down
114 changes: 113 additions & 1 deletion pygeos/_geometry.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from cpython cimport PyObject
cimport cython
from cython cimport view

import numpy as np
cimport numpy as np
Expand All @@ -11,7 +12,10 @@ from pygeos._geos cimport (
GEOSGeometry,
GEOSGeom_clone_r,
GEOSGetGeometryN_r,
get_geos_handle
get_geos_handle,
GEOSGeom_destroy_r,
GEOSGeom_createCollection_r,
GEOSGeomTypeId_r,
)
from pygeos._pygeos_api cimport (
import_pygeos_c_api,
Expand Down Expand Up @@ -75,3 +79,111 @@ def get_parts(object[:] array):
idx += 1

return parts, index


@cython.boundscheck(False)
@cython.wraparound(False)
def collections_1d(object geometries, object indices, int geometry_type = 7):
cdef Py_ssize_t geom_idx_1 = 0
cdef Py_ssize_t coll_idx = 0
cdef unsigned int coll_size = 0
cdef Py_ssize_t coll_geom_idx = 0
cdef GEOSGeometry *geom = NULL
cdef GEOSGeometry *coll = NULL
cdef int expected_type = -1
cdef int expected_type_alt = -1
cdef int curr_type = -1

if geometry_type == 4: # MULTIPOINT
expected_type = 0
elif geometry_type == 5: # MULTILINESTRING
expected_type = 1
expected_type_alt = 2
elif geometry_type == 6: # MULTIPOLYGON
expected_type = 3
elif geometry_type == 7:
pass
else:
raise ValueError(f"Invalid geometry_type: {geometry_type}.")

# Cast input arrays and define memoryviews for later usage
geometries = np.asarray(geometries, dtype=np.object)
if geometries.ndim != 1:
raise TypeError("geometries is not a one-dimensional array.")

indices = np.asarray(indices, dtype=np.int32)
if indices.ndim != 1:
raise TypeError("indices is not a one-dimensional array.")

if geometries.shape[0] != indices.shape[0]:
raise ValueError("geometries and indices do not have equal size.")

if geometries.shape[0] == 0:
# return immediately if there are no geometries to return
return np.empty(shape=(0, ), dtype=np.object_)

if np.any(indices[1:] < indices[:indices.shape[0] - 1]):
raise ValueError("The indices should be sorted.")

cdef object[:] geometries_view = geometries
cdef int[:] indices_view = indices

# get the geometry count per collection
cdef int[:] collection_size = np.bincount(indices).astype(np.int32)

# A temporary array for the geometries that will be given to CreateCollection.
# Its size equals max(collection_size) to accomodate the largest collection.
temp_geoms = np.empty(shape=(np.max(collection_size), ), dtype=np.intp)
cdef np.intp_t[:] temp_geoms_view = temp_geoms

# The final target array
cdef Py_ssize_t n_colls = collection_size.shape[0]
result = np.empty(shape=(n_colls, ), dtype=np.object_)
cdef object[:] result_view = result

with get_geos_handle() as geos_handle:
for coll_idx in range(n_colls):
coll_size = 0

# fill the temporary array with geometries belonging to this collection
for coll_geom_idx in range(collection_size[coll_idx]):
if PyGEOS_GetGEOSGeometry(<PyObject *>geometries_view[geom_idx_1 + coll_geom_idx], &geom) == 0:
# deallocate previous temp geometries (preventing memory leaks)
for coll_geom_idx in range(coll_size):
GEOSGeom_destroy_r(geos_handle, <GEOSGeometry *>temp_geoms_view[coll_geom_idx])
raise TypeError(
"One of the arguments is of incorrect type. Please provide only Geometry objects."
)

# ignore missing values
if geom == NULL:
continue

# Check geometry subtype for non-geometrycollections
if geometry_type != 7:
curr_type = GEOSGeomTypeId_r(geos_handle, geom)
if curr_type != expected_type and curr_type != expected_type_alt:
# deallocate previous temp geometries (preventing memory leaks)
for coll_geom_idx in range(coll_size):
GEOSGeom_destroy_r(geos_handle, <GEOSGeometry *>temp_geoms_view[coll_geom_idx])
raise TypeError(
f"One of the arguments has unexpected geometry type {curr_type}."
)

# assign to the temporary geometry array
temp_geoms_view[coll_size] = <np.intp_t>GEOSGeom_clone_r(geos_handle, geom)
coll_size += 1

# create the collection
coll = GEOSGeom_createCollection_r(
geos_handle,
geometry_type,
<GEOSGeometry**> &temp_geoms_view[0],
coll_size
)

result_view[coll_idx] = PyGEOS_CreateGeometry(coll, geos_handle)

geom_idx_1 += collection_size[coll_idx]

return result
10 changes: 10 additions & 0 deletions pygeos/_geos.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,19 @@ cdef extern from "geos_c.h":
const GEOSGeometry* GEOSGetGeometryN_r(GEOSContextHandle_t handle,
const GEOSGeometry* g,
int n) nogil except NULL
int GEOSGeomTypeId_r(GEOSContextHandle_t handle, GEOSGeometry* g) nogil except -1
void GEOSGeom_destroy_r(GEOSContextHandle_t handle,
GEOSGeometry* g) nogil
GEOSGeometry* GEOSGeom_clone_r(GEOSContextHandle_t handle,
const GEOSGeometry* g) nogil except NULL

GEOSGeometry* GEOSGeom_createCollection_r(
GEOSContextHandle_t handle,
int type,
GEOSGeometry** geoms,
unsigned int ngeoms
) nogil except NULL


cdef class get_geos_handle:
cdef GEOSContextHandle_t handle
Expand Down
2 changes: 0 additions & 2 deletions pygeos/_geos.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,3 @@ cdef class get_geos_handle:

def __exit__(self, type, value, traceback):
GEOS_finish_r(self.handle)


51 changes: 43 additions & 8 deletions pygeos/creation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from . import lib
from . import Geometry, GeometryType
from .decorators import multithreading_enabled
from ._geometry import collections_1d

__all__ = [
"points",
Expand Down Expand Up @@ -80,6 +81,7 @@ def linearrings(coords, y=None, z=None, **kwargs):
return _wrap_construct_ufunc(lib.linearrings, coords, y, z, **kwargs)



@multithreading_enabled
def polygons(shells, holes=None):
"""Create an array of polygons.
Expand Down Expand Up @@ -131,63 +133,96 @@ def box(xmin, ymin, xmax, ymax, ccw=True, **kwargs):
return lib.box(xmin, ymin, xmax, ymax, ccw, **kwargs)


def multipoints(geometries):
def multipoints(geometries, indices=None):
"""Create multipoints from arrays of points
Parameters
----------
geometries : array_like
An array of points or coordinates (see points).
indices : array_like or None
Indices into the target array where input geometries belong. If
provided, both geometries and indices should be 1D and have matching
sizes.
"""
typ = GeometryType.MULTIPOINT
geometries = np.asarray(geometries)
if not isinstance(geometries, Geometry) and np.issubdtype(
geometries.dtype, np.number
):
geometries = points(geometries)
return lib.create_collection(geometries, GeometryType.MULTIPOINT)
if indices is None:
return lib.create_collection(geometries, typ)
else:
return collections_1d(geometries, indices, typ)


def multilinestrings(geometries):
def multilinestrings(geometries, indices=None):
"""Create multilinestrings from arrays of linestrings
Parameters
----------
geometries : array_like
An array of linestrings or coordinates (see linestrings).
indices : array_like or None
Indices into the target array where input geometries belong. If
provided, both geometries and indices should be 1D and have matching
sizes.
"""
typ = GeometryType.MULTILINESTRING
geometries = np.asarray(geometries)
if not isinstance(geometries, Geometry) and np.issubdtype(
geometries.dtype, np.number
):
geometries = linestrings(geometries)
return lib.create_collection(geometries, GeometryType.MULTILINESTRING)

if indices is None:
return lib.create_collection(geometries, typ)
else:
return collections_1d(geometries, indices, typ)


def multipolygons(geometries):
def multipolygons(geometries, indices=None):
"""Create multipolygons from arrays of polygons
Parameters
----------
geometries : array_like
An array of polygons or coordinates (see polygons).
indices : array_like or None
Indices into the target array where input geometries belong. If
provided, both geometries and indices should be 1D and have matching
sizes.
"""
typ = GeometryType.MULTIPOLYGON
geometries = np.asarray(geometries)
if not isinstance(geometries, Geometry) and np.issubdtype(
geometries.dtype, np.number
):
geometries = polygons(geometries)
return lib.create_collection(geometries, GeometryType.MULTIPOLYGON)
if indices is None:
return lib.create_collection(geometries, typ)
else:
return collections_1d(geometries, indices, typ)


def geometrycollections(geometries):
def geometrycollections(geometries, indices=None):
"""Create geometrycollections from arrays of geometries
Parameters
----------
geometries : array_like
An array of geometries
indices : array_like or None
Indices into the target array where input geometries belong. If
provided, both geometries and indices should be 1D and have matching
sizes.
"""
return lib.create_collection(geometries, GeometryType.GEOMETRYCOLLECTION)
typ = GeometryType.GEOMETRYCOLLECTION
if indices is None:
return lib.create_collection(geometries, typ)
else:
return collections_1d(geometries, indices, typ)


def prepare(geometry, **kwargs):
Expand Down
98 changes: 98 additions & 0 deletions pygeos/test/test_creation_indices.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
import pygeos
import pytest
import numpy as np
from .common import point, line_string, linear_ring, polygon, empty

geom_coll = pygeos.geometrycollections


@pytest.mark.parametrize(
"geometries",
[
np.array([1, 2], dtype=np.int32),
None,
np.array([[point]]),
"hello",
],
)
def test_invalid_geometries(geometries):
with pytest.raises(TypeError):
pygeos.geometrycollections(geometries, indices=[0, 1])


@pytest.mark.parametrize(
"indices",
[
np.array([point]),
" hello",
[0, 1], # wrong length
],
)
def test_invalid_indices(indices):
with pytest.raises((TypeError, ValueError)):
pygeos.geometrycollections([point], indices=indices)


@pytest.mark.parametrize(
"geometries,indices,expected",
[
([point, line_string], [0, 0], [geom_coll([point, line_string])]),
([point, line_string], [0, 1], [geom_coll([point]), geom_coll([line_string])]),
(
[point, line_string],
[1, 1],
[geom_coll([]), geom_coll([point, line_string])],
),
([point, None], [0, 0], [geom_coll([point])]),
([point, None], [0, 1], [geom_coll([point]), geom_coll([])]),
([point, None, line_string], [0, 0, 0], [geom_coll([point, line_string])]),
],
)
def test_geometrycollections(geometries, indices, expected):
actual = pygeos.geometrycollections(geometries, indices=indices)
assert pygeos.equals(actual, expected).all()


def test_multipoints():
actual = pygeos.multipoints(
[point],
indices=[0],
)
assert pygeos.equals(actual, pygeos.multipoints([point])).all()


def test_multilinestrings():
actual = pygeos.multilinestrings([line_string], indices=[0])
assert pygeos.equals(actual, pygeos.multilinestrings([line_string])).all()


def test_multilinearrings():
actual = pygeos.multilinestrings(
[linear_ring],
indices=[0],
)
assert pygeos.equals(actual, pygeos.multilinestrings([linear_ring])).all()


def test_multipolygons():
actual = pygeos.multipolygons(
[polygon],
indices=[0],
)
assert pygeos.equals(actual, pygeos.multipolygons([polygon])).all()


@pytest.mark.parametrize(
"geometries,func",
[
([line_string], pygeos.multipoints),
([polygon], pygeos.multipoints),
([point], pygeos.multilinestrings),
([polygon], pygeos.multilinestrings),
([point], pygeos.multipolygons),
([line_string], pygeos.multipolygons),
],
)
def test_incompatible_types(geometries, func):
with pytest.raises(TypeError):
func(geometries, indices=[0])

0 comments on commit d7ab2df

Please sign in to comment.