From 5a045e702b9aad671b14c93e127afe61999f06ff Mon Sep 17 00:00:00 2001 From: Casper van der Wel Date: Wed, 23 Sep 2020 21:00:01 +0200 Subject: [PATCH] Support pickling (pygeos/pygeos#190) --- CHANGELOG.rst | 1 + docs/geometry.rst | 73 ++++++++++++++++++- pygeos/io.py | 7 ++ pygeos/test/test_io.py | 20 +++++- src/pygeom.c | 154 +++++++++++++++++++++++++++++++++++------ 5 files changed, 230 insertions(+), 25 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index eb314fb85..dc8b57086 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -6,6 +6,7 @@ Version 0.9 (unreleased) **Highlights of this release** +* Added support for pickling to ``Geometry`` objects (#190) * Limited the length of geometry repr to 80 characters (#189) diff --git a/docs/geometry.rst b/docs/geometry.rst index 6a6600fbd..53f265f3a 100644 --- a/docs/geometry.rst +++ b/docs/geometry.rst @@ -1,5 +1,74 @@ -Geometry properties -=================== +Geometry +======== + +The `pygeos.Geometry` class is the central datatype in PyGEOS. +An instance of `pygeos.Geometry` is a container of the actual GEOSGeometry object. +The Geometry object keeps track of the underlying GEOSGeometry and +lets the python garbage collector free its memory when it is not +used anymore. + +Geometry objects are immutable. This means that after constructed, they cannot +be changed in place. Every PyGEOS operation will result in a new object being returned. + +Construction +~~~~~~~~~~~~ + +For convenience, the ``Geometry`` class can be constructed with a WKT (Well-Known Text) +or WKB (Well-Known Binary) representation of your geometry: + +.. code:: python + + >>> from pygeos import Geometry + >>> point_1 = Geometry("POINT (5.2 52.1)") + >>> point_2 = Geometry(b"\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\xf0?") + +A more efficient way of constructing geometries is by making use of the (vectorized) +functions described in :mod:`pygeos.creation`. + +Pickling +~~~~~~~~ + +Geometries can be serialized using pickle: + + >>> import pickle + >>> pickled = pickle.dumps(point_1) + >>> pickle.loads(point_1) + + +.. warning:: Pickling is not supported for linearrings and empty points. + See :func:`pygeos.io.to_wkb` for a complete list of limitations. + +Hashing +~~~~~~~ + +Geometries can be used as elements in sets or as keys in dictionaries. +Python uses a technique called *hashing* for lookups in these datastructures. +PyGEOS generates this hash from the WKB representation. +Therefore, geometries are equal if and only if their WKB representations are equal. + +.. code:: python + + >>> point_3 = Geometry("POINT (5.2 52.1)") + >>> {point_1, point_2, point_3} + {, } + +.. warning:: Due to limitations of WKB, linearrings will equal linearrings if they contain the exact same points. + Also, different kind of empty points will be considered equal. See :func:`pygeos.io.to_wkb`. + +Comparing two geometries directly is also supported. +This is the same as using :func:`pygeos.predicates.equals_exact` with a ``tolerance`` value of zero. + + >>> point_1 == point_2 + False + >>> point_1 != point_2 + True + + +Properties +~~~~~~~~~~ + +Geometry objects have no attributes or properties. +Instead, use functions listed below to obtain information about geometry objects. .. automodule:: pygeos.geometry :members: diff --git a/pygeos/io.py b/pygeos/io.py index ac29b00e4..45bcdf763 100644 --- a/pygeos/io.py +++ b/pygeos/io.py @@ -109,6 +109,13 @@ def to_wkb( The Well-Known Binary format is defined in the `OGC Simple Features Specification for SQL `__. + The following limitations apply to WKB serialization: + + - linearrings will be converted to linestrings + - a point with only NaN coordinates is converted to an empty point + - empty points are transformed to 3D in GEOS < 3.8 + - empty points are transformed to 2D in GEOS 3.8 + Parameters ---------- geometry : Geometry or array_like diff --git a/pygeos/test/test_io.py b/pygeos/test/test_io.py index 288450a72..b52a89fc6 100644 --- a/pygeos/test/test_io.py +++ b/pygeos/test/test_io.py @@ -1,10 +1,11 @@ import numpy as np import pygeos import pytest +import pickle import struct from unittest import mock -from .common import all_types, point, empty_point +from .common import all_types, point, empty_point, point_z POINT11_WKB = b'\x01\x01\x00\x00\x00' + struct.pack("<2d", 1., 1.) @@ -361,3 +362,20 @@ def test_from_shapely_error(geom): def test_from_shapely_incompatible_versions(): with pytest.raises(ImportError): pygeos.from_shapely(point) + + +@pytest.mark.parametrize("geom", all_types + (point_z, empty_point)) +def test_pickle(geom): + if pygeos.get_type_id(geom) == 2: + # Linearrings get converted to linestrings + expected = pygeos.linestrings(pygeos.get_coordinates(geom)) + else: + expected = geom + pickled = pickle.dumps(geom) + assert pygeos.equals_exact(pickle.loads(pickled), expected) + + +def test_pickle_with_srid(): + geom = pygeos.set_srid(point, 4326) + pickled = pickle.dumps(geom) + assert pygeos.get_srid(pickle.loads(pickled)) == 4326 diff --git a/src/pygeom.c b/src/pygeom.c index c0bff7079..53f3cc214 100644 --- a/src/pygeom.c +++ b/src/pygeom.c @@ -95,6 +95,63 @@ static PyObject *GeometryObject_ToWKT(GeometryObject *obj) } } + +static PyObject *GeometryObject_ToWKB(GeometryObject *obj) +{ + unsigned char *wkb = NULL; + char has_empty = 0; + size_t size; + PyObject *result = NULL; + GEOSGeometry *geom = NULL; + GEOSWKBWriter *writer = NULL; + if (obj->ptr == NULL) { + Py_INCREF(Py_None); + return Py_None; + } + + GEOS_INIT; + + // WKB Does not allow empty points. + // We check for that and patch the POINT EMPTY if necessary + has_empty = has_point_empty(ctx, obj->ptr); + if (has_empty == 2) { errstate = PGERR_GEOS_EXCEPTION; goto finish; } + if (has_empty == 1) { + geom = point_empty_to_nan_all_geoms(ctx, obj->ptr); + } else { + geom = obj->ptr; + } + + /* Create the WKB writer */ + writer = GEOSWKBWriter_create_r(ctx); + if (writer == NULL) { errstate = PGERR_GEOS_EXCEPTION; goto finish; } + // Allow 3D output and include SRID + GEOSWKBWriter_setOutputDimension_r(ctx, writer, 3); + GEOSWKBWriter_setIncludeSRID_r(ctx, writer, 1); + // Check if the above functions caused a GEOS exception + if (last_error[0] != 0) { errstate = PGERR_GEOS_EXCEPTION; goto finish; } + + wkb = GEOSWKBWriter_write_r(ctx, writer, geom, &size); + if (wkb == NULL) { errstate = PGERR_GEOS_EXCEPTION; goto finish; } + + result = PyBytes_FromStringAndSize((char *) wkb, size); + + finish: + // Destroy the geom if it was patched (POINT EMPTY patch) + if (has_empty & (geom != NULL)) { + GEOSGeom_destroy_r(ctx, geom); + } + if (writer != NULL) { + GEOSWKBWriter_destroy_r(ctx, writer); + } + if (wkb != NULL) { + GEOSFree_r(ctx, wkb); + } + + GEOS_FINISH; + + return result; +} + static PyObject *GeometryObject_repr(GeometryObject *self) { PyObject *result, *wkt, *truncated; @@ -122,40 +179,50 @@ static PyObject *GeometryObject_str(GeometryObject *self) return GeometryObject_ToWKT(self); } + +/* For pickling. To be used in GeometryObject->tp_reduce. + * reduce should return a a tuple of (callable, args). + * On unpickling, callable(*args) is called */ +static PyObject *GeometryObject_reduce(PyObject *self) +{ + Py_INCREF(self->ob_type); + return PyTuple_Pack( + 2, + self->ob_type, + PyTuple_Pack( + 1, + GeometryObject_ToWKB((GeometryObject *)self) + ) + ); +} + + +/* For lookups in sets / dicts. + * Python should be told how to generate a hash from the Geometry object. */ static Py_hash_t GeometryObject_hash(GeometryObject *self) { - unsigned char *wkb; - size_t size; + PyObject *wkb = NULL; Py_hash_t x; if (self->ptr == NULL) { return -1; } - GEOS_INIT; - GEOSWKBWriter *writer = GEOSWKBWriter_create_r(ctx); - if (writer == NULL) { errstate = PGERR_GEOS_EXCEPTION; goto finish; } + // Transform to a WKB (PyBytes object) + wkb = GeometryObject_ToWKB(self); + if (wkb == NULL) { return -1; } - GEOSWKBWriter_setOutputDimension_r(ctx, writer, 3); - GEOSWKBWriter_setIncludeSRID_r(ctx, writer, 1); - wkb = GEOSWKBWriter_write_r(ctx, writer, self->ptr, &size); - GEOSWKBWriter_destroy_r(ctx, writer); - if (wkb == NULL) { errstate = PGERR_GEOS_EXCEPTION; goto finish; } - x = PyHash_GetFuncDef()->hash(wkb, size); + // Use the python built-in method to hash the PyBytes object + x = wkb->ob_type->tp_hash(wkb); if (x == -1) { x = -2; } else { x ^= 374761393UL; // to make the result distinct from the actual WKB hash // } - GEOSFree_r(ctx, wkb); - finish: - GEOS_FINISH; - if (errstate == PGERR_SUCCESS) { - return x; - } else { - return -1; - } + Py_DECREF(wkb); + + return x; } static PyObject *GeometryObject_richcompare(GeometryObject *self, PyObject *other, int op) { @@ -229,6 +296,47 @@ static PyObject *GeometryObject_FromWKT(PyObject *value) } } +static PyObject *GeometryObject_FromWKB(PyObject *value) +{ + PyObject *result = NULL; + unsigned char *wkb = NULL; + Py_ssize_t size; + GEOSGeometry *geom = NULL; + GEOSWKBReader *reader = NULL; + + /* Cast the PyObject bytes to char* */ + if (!PyBytes_Check(value)) { + PyErr_Format(PyExc_TypeError, "Expected bytes, found %s", value->ob_type->tp_name); + return NULL; + } + size = PyBytes_Size(value); + wkb = (unsigned char *)PyBytes_AsString(value); + if (wkb == NULL) { return NULL; } + + GEOS_INIT; + + reader = GEOSWKBReader_create_r(ctx); + if (reader == NULL) { errstate = PGERR_GEOS_EXCEPTION; goto finish; } + geom = GEOSWKBReader_read_r(ctx, reader, wkb, size); + if (geom == NULL) { errstate = PGERR_GEOS_EXCEPTION; goto finish; } + + result = GeometryObject_FromGEOS(geom, ctx); + if (result == NULL) { + GEOSGeom_destroy_r(ctx, geom); + PyErr_Format(PyExc_RuntimeError, "Could not instantiate a new Geometry object"); + } + + finish: + + if (reader != NULL) { + GEOSWKBReader_destroy_r(ctx, reader); + } + + GEOS_FINISH; + + return result; +} + static PyObject *GeometryObject_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { @@ -239,20 +347,22 @@ static PyObject *GeometryObject_new(PyTypeObject *type, PyObject *args, } else if (PyUnicode_Check(value)) { return GeometryObject_FromWKT(value); - } - else { + } else if (PyBytes_Check(value)) { + return GeometryObject_FromWKB(value); + } else { PyErr_Format(PyExc_TypeError, "Expected string, got %s", value->ob_type->tp_name); return NULL; } } static PyMethodDef GeometryObject_methods[] = { + {"__reduce__", (PyCFunction) GeometryObject_reduce, METH_NOARGS, "For pickling."}, {NULL} /* Sentinel */ }; PyTypeObject GeometryType = { PyVarObject_HEAD_INIT(NULL, 0) - .tp_name = "pygeos.lib.GEOSGeometry", + .tp_name = "pygeos.lib.Geometry", .tp_doc = "Geometry type", .tp_basicsize = sizeof(GeometryObject), .tp_itemsize = 0,