Skip to content

Commit

Permalink
Support pickling (pygeos/pygeos#190)
Browse files Browse the repository at this point in the history
  • Loading branch information
caspervdw committed Sep 23, 2020
1 parent dbe2fe0 commit 5a045e7
Show file tree
Hide file tree
Showing 5 changed files with 230 additions and 25 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.rst
Expand Up @@ -6,6 +6,7 @@ Version 0.9 (unreleased)

**Highlights of this release**

* Added support for pickling to ``Geometry`` objects (#190)
* Limited the length of geometry repr to 80 characters (#189)


Expand Down
73 changes: 71 additions & 2 deletions docs/geometry.rst
@@ -1,5 +1,74 @@
Geometry properties
===================
Geometry
========

The `pygeos.Geometry` class is the central datatype in PyGEOS.
An instance of `pygeos.Geometry` is a container of the actual GEOSGeometry object.
The Geometry object keeps track of the underlying GEOSGeometry and
lets the python garbage collector free its memory when it is not
used anymore.

Geometry objects are immutable. This means that after constructed, they cannot
be changed in place. Every PyGEOS operation will result in a new object being returned.

Construction
~~~~~~~~~~~~

For convenience, the ``Geometry`` class can be constructed with a WKT (Well-Known Text)
or WKB (Well-Known Binary) representation of your geometry:

.. code:: python
>>> from pygeos import Geometry
>>> point_1 = Geometry("POINT (5.2 52.1)")
>>> point_2 = Geometry(b"\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\xf0?")
A more efficient way of constructing geometries is by making use of the (vectorized)
functions described in :mod:`pygeos.creation`.

Pickling
~~~~~~~~

Geometries can be serialized using pickle:

>>> import pickle
>>> pickled = pickle.dumps(point_1)
>>> pickle.loads(point_1)
<pygeos.Geometry POINT (5.2 52.1)>

.. warning:: Pickling is not supported for linearrings and empty points.
See :func:`pygeos.io.to_wkb` for a complete list of limitations.

Hashing
~~~~~~~

Geometries can be used as elements in sets or as keys in dictionaries.
Python uses a technique called *hashing* for lookups in these datastructures.
PyGEOS generates this hash from the WKB representation.
Therefore, geometries are equal if and only if their WKB representations are equal.

.. code:: python
>>> point_3 = Geometry("POINT (5.2 52.1)")
>>> {point_1, point_2, point_3}
{<pygeos.Geometry POINT (5.2 52.1)>, <pygeos.Geometry POINT (1 1)>}
.. warning:: Due to limitations of WKB, linearrings will equal linearrings if they contain the exact same points.
Also, different kind of empty points will be considered equal. See :func:`pygeos.io.to_wkb`.

Comparing two geometries directly is also supported.
This is the same as using :func:`pygeos.predicates.equals_exact` with a ``tolerance`` value of zero.

>>> point_1 == point_2
False
>>> point_1 != point_2
True


Properties
~~~~~~~~~~

Geometry objects have no attributes or properties.
Instead, use functions listed below to obtain information about geometry objects.

.. automodule:: pygeos.geometry
:members:
Expand Down
7 changes: 7 additions & 0 deletions pygeos/io.py
Expand Up @@ -109,6 +109,13 @@ def to_wkb(
The Well-Known Binary format is defined in the `OGC Simple Features
Specification for SQL <https://www.opengeospatial.org/standards/sfs>`__.
The following limitations apply to WKB serialization:
- linearrings will be converted to linestrings
- a point with only NaN coordinates is converted to an empty point
- empty points are transformed to 3D in GEOS < 3.8
- empty points are transformed to 2D in GEOS 3.8
Parameters
----------
geometry : Geometry or array_like
Expand Down
20 changes: 19 additions & 1 deletion pygeos/test/test_io.py
@@ -1,10 +1,11 @@
import numpy as np
import pygeos
import pytest
import pickle
import struct
from unittest import mock

from .common import all_types, point, empty_point
from .common import all_types, point, empty_point, point_z


POINT11_WKB = b'\x01\x01\x00\x00\x00' + struct.pack("<2d", 1., 1.)
Expand Down Expand Up @@ -361,3 +362,20 @@ def test_from_shapely_error(geom):
def test_from_shapely_incompatible_versions():
with pytest.raises(ImportError):
pygeos.from_shapely(point)


@pytest.mark.parametrize("geom", all_types + (point_z, empty_point))
def test_pickle(geom):
if pygeos.get_type_id(geom) == 2:
# Linearrings get converted to linestrings
expected = pygeos.linestrings(pygeos.get_coordinates(geom))
else:
expected = geom
pickled = pickle.dumps(geom)
assert pygeos.equals_exact(pickle.loads(pickled), expected)


def test_pickle_with_srid():
geom = pygeos.set_srid(point, 4326)
pickled = pickle.dumps(geom)
assert pygeos.get_srid(pickle.loads(pickled)) == 4326
154 changes: 132 additions & 22 deletions src/pygeom.c
Expand Up @@ -95,6 +95,63 @@ static PyObject *GeometryObject_ToWKT(GeometryObject *obj)
}
}


static PyObject *GeometryObject_ToWKB(GeometryObject *obj)
{
unsigned char *wkb = NULL;
char has_empty = 0;
size_t size;
PyObject *result = NULL;
GEOSGeometry *geom = NULL;
GEOSWKBWriter *writer = NULL;
if (obj->ptr == NULL) {
Py_INCREF(Py_None);
return Py_None;
}

GEOS_INIT;

// WKB Does not allow empty points.
// We check for that and patch the POINT EMPTY if necessary
has_empty = has_point_empty(ctx, obj->ptr);
if (has_empty == 2) { errstate = PGERR_GEOS_EXCEPTION; goto finish; }
if (has_empty == 1) {
geom = point_empty_to_nan_all_geoms(ctx, obj->ptr);
} else {
geom = obj->ptr;
}

/* Create the WKB writer */
writer = GEOSWKBWriter_create_r(ctx);
if (writer == NULL) { errstate = PGERR_GEOS_EXCEPTION; goto finish; }
// Allow 3D output and include SRID
GEOSWKBWriter_setOutputDimension_r(ctx, writer, 3);
GEOSWKBWriter_setIncludeSRID_r(ctx, writer, 1);
// Check if the above functions caused a GEOS exception
if (last_error[0] != 0) { errstate = PGERR_GEOS_EXCEPTION; goto finish; }

wkb = GEOSWKBWriter_write_r(ctx, writer, geom, &size);
if (wkb == NULL) { errstate = PGERR_GEOS_EXCEPTION; goto finish; }

result = PyBytes_FromStringAndSize((char *) wkb, size);

finish:
// Destroy the geom if it was patched (POINT EMPTY patch)
if (has_empty & (geom != NULL)) {
GEOSGeom_destroy_r(ctx, geom);
}
if (writer != NULL) {
GEOSWKBWriter_destroy_r(ctx, writer);
}
if (wkb != NULL) {
GEOSFree_r(ctx, wkb);
}

GEOS_FINISH;

return result;
}

static PyObject *GeometryObject_repr(GeometryObject *self)
{
PyObject *result, *wkt, *truncated;
Expand Down Expand Up @@ -122,40 +179,50 @@ static PyObject *GeometryObject_str(GeometryObject *self)
return GeometryObject_ToWKT(self);
}


/* For pickling. To be used in GeometryObject->tp_reduce.
* reduce should return a a tuple of (callable, args).
* On unpickling, callable(*args) is called */
static PyObject *GeometryObject_reduce(PyObject *self)
{
Py_INCREF(self->ob_type);
return PyTuple_Pack(
2,
self->ob_type,
PyTuple_Pack(
1,
GeometryObject_ToWKB((GeometryObject *)self)
)
);
}


/* For lookups in sets / dicts.
* Python should be told how to generate a hash from the Geometry object. */
static Py_hash_t GeometryObject_hash(GeometryObject *self)
{
unsigned char *wkb;
size_t size;
PyObject *wkb = NULL;
Py_hash_t x;

if (self->ptr == NULL) {
return -1;
}

GEOS_INIT;
GEOSWKBWriter *writer = GEOSWKBWriter_create_r(ctx);
if (writer == NULL) { errstate = PGERR_GEOS_EXCEPTION; goto finish; }
// Transform to a WKB (PyBytes object)
wkb = GeometryObject_ToWKB(self);
if (wkb == NULL) { return -1; }

GEOSWKBWriter_setOutputDimension_r(ctx, writer, 3);
GEOSWKBWriter_setIncludeSRID_r(ctx, writer, 1);
wkb = GEOSWKBWriter_write_r(ctx, writer, self->ptr, &size);
GEOSWKBWriter_destroy_r(ctx, writer);
if (wkb == NULL) { errstate = PGERR_GEOS_EXCEPTION; goto finish; }
x = PyHash_GetFuncDef()->hash(wkb, size);
// Use the python built-in method to hash the PyBytes object
x = wkb->ob_type->tp_hash(wkb);
if (x == -1) {
x = -2;
} else {
x ^= 374761393UL; // to make the result distinct from the actual WKB hash //
}
GEOSFree_r(ctx, wkb);

finish:
GEOS_FINISH;
if (errstate == PGERR_SUCCESS) {
return x;
} else {
return -1;
}
Py_DECREF(wkb);

return x;
}

static PyObject *GeometryObject_richcompare(GeometryObject *self, PyObject *other, int op) {
Expand Down Expand Up @@ -229,6 +296,47 @@ static PyObject *GeometryObject_FromWKT(PyObject *value)
}
}

static PyObject *GeometryObject_FromWKB(PyObject *value)
{
PyObject *result = NULL;
unsigned char *wkb = NULL;
Py_ssize_t size;
GEOSGeometry *geom = NULL;
GEOSWKBReader *reader = NULL;

/* Cast the PyObject bytes to char* */
if (!PyBytes_Check(value)) {
PyErr_Format(PyExc_TypeError, "Expected bytes, found %s", value->ob_type->tp_name);
return NULL;
}
size = PyBytes_Size(value);
wkb = (unsigned char *)PyBytes_AsString(value);
if (wkb == NULL) { return NULL; }

GEOS_INIT;

reader = GEOSWKBReader_create_r(ctx);
if (reader == NULL) { errstate = PGERR_GEOS_EXCEPTION; goto finish; }
geom = GEOSWKBReader_read_r(ctx, reader, wkb, size);
if (geom == NULL) { errstate = PGERR_GEOS_EXCEPTION; goto finish; }

result = GeometryObject_FromGEOS(geom, ctx);
if (result == NULL) {
GEOSGeom_destroy_r(ctx, geom);
PyErr_Format(PyExc_RuntimeError, "Could not instantiate a new Geometry object");
}

finish:

if (reader != NULL) {
GEOSWKBReader_destroy_r(ctx, reader);
}

GEOS_FINISH;

return result;
}

static PyObject *GeometryObject_new(PyTypeObject *type, PyObject *args,
PyObject *kwds)
{
Expand All @@ -239,20 +347,22 @@ static PyObject *GeometryObject_new(PyTypeObject *type, PyObject *args,
}
else if (PyUnicode_Check(value)) {
return GeometryObject_FromWKT(value);
}
else {
} else if (PyBytes_Check(value)) {
return GeometryObject_FromWKB(value);
} else {
PyErr_Format(PyExc_TypeError, "Expected string, got %s", value->ob_type->tp_name);
return NULL;
}
}

static PyMethodDef GeometryObject_methods[] = {
{"__reduce__", (PyCFunction) GeometryObject_reduce, METH_NOARGS, "For pickling."},
{NULL} /* Sentinel */
};

PyTypeObject GeometryType = {
PyVarObject_HEAD_INIT(NULL, 0)
.tp_name = "pygeos.lib.GEOSGeometry",
.tp_name = "pygeos.lib.Geometry",
.tp_doc = "Geometry type",
.tp_basicsize = sizeof(GeometryObject),
.tp_itemsize = 0,
Expand Down

0 comments on commit 5a045e7

Please sign in to comment.