Skip to content

Commit

Permalink
Enable "return_index" in get_coordinates (#318)
Browse files Browse the repository at this point in the history
  • Loading branch information
caspervdw committed Mar 20, 2021
1 parent d7ab2df commit 9193765
Show file tree
Hide file tree
Showing 6 changed files with 101 additions and 23 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ Version 0.10 (unreleased)
``multilinestrings``, ``multipolygons``, and ``geometrycollections`` (#290).
* Released GIL for ``points``, ``linestrings``, ``linearrings``, and
``polygons`` (without holes) (#310).
* Added the option to return the geometry index in ``get_coordinates`` (#318).
* Updated ``box`` ufunc to use internal C function for creating polygon
(about 2x faster) and added ``ccw`` parameter to create polygon in
counterclockwise (default) or clockwise direction (#308).
Expand Down
31 changes: 21 additions & 10 deletions pygeos/coordinates.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ def apply(geometry, transformation, include_z=False):
A function that transforms a (N, 2) or (N, 3) ndarray of float64 to
another (N, 2) or (N, 3) ndarray of float64.
include_z : bool, default False
Whether to include the third dimension in the coordinates array
that is passed to the `transformation` function. If True, and a
If True, include the third dimension in the coordinates array
that is passed to the `transformation` function. If a
geometry has no third dimension, the z-coordinates passed to the
function will be NaN.
Expand All @@ -44,7 +44,7 @@ def apply(geometry, transformation, include_z=False):
<pygeos.Geometry POINT Z (1 1 1)>
"""
geometry_arr = np.array(geometry, dtype=np.object_) # makes a copy
coordinates = lib.get_coordinates(geometry_arr, include_z)
coordinates = lib.get_coordinates(geometry_arr, include_z, False)
new_coordinates = transformation(coordinates)
# check the array to yield understandable error messages
if not isinstance(new_coordinates, np.ndarray):
Expand Down Expand Up @@ -87,7 +87,7 @@ def count_coordinates(geometry):
return lib.count_coordinates(np.asarray(geometry, dtype=np.object_))


def get_coordinates(geometry, include_z=False):
def get_coordinates(geometry, include_z=False, return_index=False):
"""Gets coordinates from a geometry array as an array of floats.
The shape of the returned array is (N, 2), with N being the number of
Expand All @@ -99,8 +99,12 @@ def get_coordinates(geometry, include_z=False):
----------
geometry : Geometry or array_like
include_z : bool, default False
Whether to include the third dimension in the output. If True, and a
geometry has no third dimension, the z-coordinates will be NaN.
If, True include the third dimension in the output. If a geometry
has no third dimension, the z-coordinates will be NaN.
return_index : bool, default False
If True, also return the index of each returned geometry as a separate
ndarray of integers. For multidimensional arrays, this indexes into the
flattened array (in C contiguous order).
Examples
--------
Expand All @@ -117,8 +121,16 @@ def get_coordinates(geometry, include_z=False):
[[0.0, 0.0]]
>>> get_coordinates(Geometry("POINT Z (0 0 0)"), include_z=True).tolist()
[[0.0, 0.0, 0.0]]
When return_index=True, indexes are returned also:
>>> geometries = [Geometry("LINESTRING (2 2, 4 4)"), Geometry("POINT (0 0)")]
>>> coordinates, index = get_coordinates(geometries, return_index=True)
>>> coordinates.tolist(), index.tolist()
([[2.0, 2.0], [4.0, 4.0], [0.0, 0.0]], [0, 0, 1])
"""
return lib.get_coordinates(np.asarray(geometry, dtype=np.object_), include_z)
return lib.get_coordinates(
np.asarray(geometry, dtype=np.object_), include_z, return_index
)


def set_coordinates(geometry, coordinates):
Expand Down Expand Up @@ -159,9 +171,8 @@ def set_coordinates(geometry, coordinates):
"The coordinate array should have dimension of 2 "
"(has {})".format(coordinates.ndim)
)
if (
(coordinates.shape[0] != lib.count_coordinates(geometry_arr))
or (coordinates.shape[1] not in {2, 3})
if (coordinates.shape[0] != lib.count_coordinates(geometry_arr)) or (
coordinates.shape[1] not in {2, 3}
):
raise ValueError(
"The coordinate array has an invalid shape {}".format(coordinates.shape)
Expand Down
4 changes: 2 additions & 2 deletions pygeos/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def to_wkt(
The rounding precision when writing the WKT string. Set to a value of
-1 to indicate the full precision.
trim : bool, default True
Whether to trim unnecessary decimals (trailing zeros).
If True, trim unnecessary decimals (trailing zeros).
output_dimension : int, default 3
The output dimension for the WKT string. Supported values are 2 and 3.
Specifying 3 means that up to 3 dimensions will be written but 2D
Expand Down Expand Up @@ -132,7 +132,7 @@ def to_wkb(
Defaults to native machine byte order (-1). Use 0 to force big endian
and 1 for little endian.
include_srid : bool, default False
Whether the SRID should be included in WKB (this is an extension
If True, the SRID is be included in WKB (this is an extension
to the OGC WKB specification).
Examples
Expand Down
4 changes: 2 additions & 2 deletions pygeos/linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def line_interpolate_point(line, distance, normalized=False, **kwargs):
Negative values measure distance from the end of the line. Out-of-range
values will be clipped to the line endings.
normalized : bool
If normalized is set to True, the distance is a fraction of the total
If True, the distance is a fraction of the total
line length instead of the absolute distance.
Examples
Expand Down Expand Up @@ -59,7 +59,7 @@ def line_locate_point(line, other, normalized=False, **kwargs):
line : Geometry or array_like
point : Geometry or array_like
normalized : bool
If normalized is set to True, the distance is a fraction of the total
If True, the distance is a fraction of the total
line length instead of the absolute distance.
Examples
Expand Down
30 changes: 30 additions & 0 deletions pygeos/test/test_coordinates.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,36 @@ def test_get_coords(geoms, x, y, include_z):
assert_equal(actual, expected)


# fmt: off
@pytest.mark.parametrize(
"geoms,index",
[
([], []),
([empty], []),
([point, empty], [0]),
([empty, point, empty], [1]),
([point, None], [0]),
([None, point, None], [1]),
([point, point], [0, 1]),
([point, line_string], [0, 1, 1, 1]),
([line_string, point], [0, 0, 0, 1]),
([line_string, linear_ring], [0, 0, 0, 1, 1, 1, 1, 1]),
],
) # fmt: on
def test_get_coords_index(geoms, index):
_, actual = get_coordinates(np.array(geoms, np.object_), return_index=True)
expected = np.array(index, dtype=np.intp)
assert_equal(actual, expected)


@pytest.mark.parametrize("order", ["C", "F"])
def test_get_coords_index_multidim(order):
geometry = np.array([[point, line_string], [empty, empty]], order=order)
expected = [0, 1, 1, 1] # would be [0, 2, 2, 2] with fortran order
_, actual = get_coordinates(geometry, return_index=True)
assert_equal(actual, expected)


# fmt: off
@pytest.mark.parametrize("include_z", [True, False])
@pytest.mark.parametrize(
Expand Down
54 changes: 45 additions & 9 deletions src/coords.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
#include "geos.h"
#include "pygeom.h"


/* These function prototypes enables that these functions can call themselves */
static char get_coordinates(GEOSContextHandle_t, GEOSGeometry*, PyArrayObject*, npy_intp*,
int);
Expand Down Expand Up @@ -360,14 +359,15 @@ npy_intp CountCoords(PyArrayObject* arr) {
return result;
}

PyObject* GetCoords(PyArrayObject* arr, int include_z) {
PyObject* GetCoords(PyArrayObject* arr, int include_z, int return_index) {
npy_intp coord_dim;
NpyIter* iter;
NpyIter_IterNextFunc* iternext;
char** dataptr;
npy_intp cursor;
npy_intp cursor, i, geom_i;
GeometryObject* obj;
GEOSGeometry* geom;
PyArrayObject* index = NULL;

/* create a coordinate array with the appropriate dimensions */
npy_intp size = CountCoords(arr);
Expand All @@ -384,24 +384,42 @@ PyObject* GetCoords(PyArrayObject* arr, int include_z) {
if (result == NULL) {
return NULL;
}
if (return_index) {
npy_intp dims_ind[1] = {size};
index = (PyArrayObject*)PyArray_SimpleNew(1, dims_ind, NPY_INTP);
if (index == NULL) {
Py_DECREF(result);
return NULL;
}
}

/* Handle zero-sized arrays specially */
if (PyArray_SIZE(arr) == 0) {
return (PyObject*)result;
if (return_index) {
PyObject* result_tpl = PyTuple_New(2);
PyTuple_SET_ITEM(result_tpl, 0, (PyObject*)result);
PyTuple_SET_ITEM(result_tpl, 1, (PyObject*)index);
return result_tpl;
} else {
return (PyObject*)result;
}
}

/* We use the Numpy iterator C-API here.
The iterator exposes an "iternext" function which updates a "dataptr"
see also: https://docs.scipy.org/doc/numpy/reference/c-api.iterator.html */
iter = NpyIter_New(arr, NPY_ITER_READONLY | NPY_ITER_REFS_OK, NPY_KEEPORDER,
iter = NpyIter_New(arr, NPY_ITER_READONLY | NPY_ITER_REFS_OK, NPY_CORDER,
NPY_NO_CASTING, NULL);
if (iter == NULL) {
Py_DECREF(result);
Py_XDECREF(index);
return NULL;
}
iternext = NpyIter_GetIterNext(iter, NULL);
if (iternext == NULL) {
NpyIter_Deallocate(iter);
Py_DECREF(result);
Py_XDECREF(index);
return NULL;
}
dataptr = NpyIter_GetDataPtrArray(iter);
Expand All @@ -411,9 +429,11 @@ PyObject* GetCoords(PyArrayObject* arr, int include_z) {
/* We work with a "cursor" that tells the get_coordinates function where
to write the coordinate data into the output array "result" */
cursor = 0;
geom_i = -1;
do {
/* get the geometry */
obj = *(GeometryObject**)dataptr[0];
geom_i++;
if (!get_geom(obj, &geom)) {
errstate = PGERR_NOT_A_GEOMETRY;
goto finish;
Expand All @@ -422,11 +442,20 @@ PyObject* GetCoords(PyArrayObject* arr, int include_z) {
if (geom == NULL) {
continue;
}
/* get the coordinates */
/* keep track of the current cursor in "i" to be able to loop from the
first to the last coordinate belonging to this geometry later */
i = cursor;
/* get the coordinates (updates "cursor") */
if (!get_coordinates(ctx, geom, result, &cursor, include_z)) {
errstate = PGERR_GEOS_EXCEPTION;
goto finish;
}
if (return_index) {
/* loop from "i" to "cursor" */
for (; i < cursor; i++) {
*(npy_intp*)PyArray_GETPTR1(index, i) = geom_i;
}
}
} while (iternext(iter));

finish:
Expand All @@ -435,7 +464,13 @@ PyObject* GetCoords(PyArrayObject* arr, int include_z) {

if (errstate != PGERR_SUCCESS) {
Py_DECREF(result);
Py_XDECREF(index);
return NULL;
} else if (return_index) {
PyObject* result_tpl = PyTuple_New(2);
PyTuple_SET_ITEM(result_tpl, 0, (PyObject*)result);
PyTuple_SET_ITEM(result_tpl, 1, (PyObject*)index);
return result_tpl;
} else {
return (PyObject*)result;
}
Expand Down Expand Up @@ -465,7 +500,7 @@ PyObject* SetCoords(PyArrayObject* geoms, PyArrayObject* coords) {
/* We use the Numpy iterator C-API here.
The iterator exposes an "iternext" function which updates a "dataptr"
see also: https://docs.scipy.org/doc/numpy/reference/c-api.iterator.html */
iter = NpyIter_New(geoms, NPY_ITER_READWRITE | NPY_ITER_REFS_OK, NPY_KEEPORDER,
iter = NpyIter_New(geoms, NPY_ITER_READWRITE | NPY_ITER_REFS_OK, NPY_CORDER,
NPY_NO_CASTING, NULL);
if (iter == NULL) {
return NULL;
Expand Down Expand Up @@ -542,8 +577,9 @@ PyObject* PyCountCoords(PyObject* self, PyObject* args) {
PyObject* PyGetCoords(PyObject* self, PyObject* args) {
PyObject* arr;
int include_z;
int return_index;

if (!PyArg_ParseTuple(args, "Op", &arr, &include_z)) {
if (!PyArg_ParseTuple(args, "Opp", &arr, &include_z, &return_index)) {
return NULL;
}
if (!PyArray_Check(arr)) {
Expand All @@ -554,7 +590,7 @@ PyObject* PyGetCoords(PyObject* self, PyObject* args) {
PyErr_SetString(PyExc_TypeError, "Array should be of object dtype");
return NULL;
}
return GetCoords((PyArrayObject*)arr, include_z);
return GetCoords((PyArrayObject*)arr, include_z, return_index);
}

PyObject* PySetCoords(PyObject* self, PyObject* args) {
Expand Down

0 comments on commit 9193765

Please sign in to comment.