Skip to content

Commit

Permalink
Raise warning and return None on invalid WKB/WKT instead of raising e…
Browse files Browse the repository at this point in the history
…xception (#321)
  • Loading branch information
brendan-ward committed Apr 1, 2021
1 parent 942b3ce commit ff7d0d9
Show file tree
Hide file tree
Showing 5 changed files with 173 additions and 26 deletions.
6 changes: 4 additions & 2 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ Version 0.10 (unreleased)
* Enable bulk construction of collections with different number of geometries
by optionally taking index arrays in the constructors ``multipoints``,
``multilinestrings``, ``multipolygons``, and ``geometrycollections`` (#290).
* Released GIL for ``points``, ``linestrings``, ``linearrings``, and
* Released GIL for ``points``, ``linestrings``, ``linearrings``, and
``polygons`` (without holes) (#310).
* Added the option to return the geometry index in ``get_coordinates`` (#318).
* Updated ``box`` ufunc to use internal C function for creating polygon
Expand All @@ -30,6 +30,8 @@ Version 0.10 (unreleased)
This will be removed in a future release.
* ``points``, ``linestrings``, ``linearrings``, and ``polygons`` now return a ``GEOSException``
instead of a ``ValueError`` for invalid input (#310).
* Addition of ``on_invalid`` parameter to ``from_wkb`` and ``from_wkt`` to
optionally return invalid WKB geometries as ``None``.

**Added GEOS functions**

Expand All @@ -40,7 +42,7 @@ Version 0.10 (unreleased)
**Bug fixes**

* Fixed portability issue for ARM architecture (#293)
* Fixed segfault in ``linearrings`` and ``box`` when constructing a geometry with nan
* Fixed segfault in ``linearrings`` and ``box`` when constructing a geometry with nan
coordinates (#310).

**Acknowledgments**
Expand Down
39 changes: 35 additions & 4 deletions pygeos/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,21 @@
from . import Geometry # noqa
from . import lib
from . import geos_capi_version_string
from .enum import ParamEnum


# Allowed options for handling WKB/WKT decoding errors
# Note: cannot use standard constructor since "raise" is a keyword
DecodingErrorOptions = ParamEnum(
"DecodingErrorOptions", {"ignore": 0, "warn": 1, "raise": 2}
)


shapely_geos_version = None
ShapelyGeometry = None
_shapely_checked = False


def check_shapely_version():
global shapely_geos_version
global ShapelyGeometry
Expand Down Expand Up @@ -161,7 +170,7 @@ def to_wkb(
)


def from_wkt(geometry, **kwargs):
def from_wkt(geometry, on_invalid="raise", **kwargs):
"""
Creates geometries from the Well-Known Text (WKT) representation.
Expand All @@ -172,37 +181,59 @@ def from_wkt(geometry, **kwargs):
----------
geometry : str or array_like
The WKT string(s) to convert.
on_invalid : {"raise", "warn", "ignore"}
- raise: an exception will be raised if WKT input geometries are invalid.
- warn: a warning will be raised and invalid WKT geometries will be
returned as `None`.
- ignore: invalid WKT geometries will be returned as `None` without a warning.
Examples
--------
>>> from_wkt('POINT (0 0)')
<pygeos.Geometry POINT (0 0)>
"""
return lib.from_wkt(geometry, **kwargs)
if not np.isscalar(on_invalid):
raise TypeError("on_invalid only accepts scalar values")

invalid_handler = np.uint8(DecodingErrorOptions.get_value(on_invalid))

return lib.from_wkt(geometry, invalid_handler, **kwargs)


def from_wkb(geometry, **kwargs):
def from_wkb(geometry, on_invalid="raise", **kwargs):
r"""
Creates geometries from the Well-Known Binary (WKB) representation.
The Well-Known Binary format is defined in the `OGC Simple Features
Specification for SQL <https://www.opengeospatial.org/standards/sfs>`__.
Parameters
----------
geometry : str or array_like
The WKB byte object(s) to convert.
on_invalid : {"raise", "warn", "ignore"}
- raise: an exception will be raised if WKB input geometries are invalid.
- warn: a warning will be raised and invalid WKB geometries will be
returned as `None`.
- ignore: invalid WKB geometries will be returned as `None` without a warning.
Examples
--------
>>> from_wkb(b'\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\xf0?')
<pygeos.Geometry POINT (1 1)>
"""

if not np.isscalar(on_invalid):
raise TypeError("on_invalid only accepts scalar values")

invalid_handler = np.uint8(DecodingErrorOptions.get_value(on_invalid))

# ensure the input has object dtype, to avoid numpy inferring it as a
# fixed-length string dtype (which removes trailing null bytes upon access
# of array elements)
geometry = np.asarray(geometry, dtype=object)
return lib.from_wkb(geometry, **kwargs)
return lib.from_wkb(geometry, invalid_handler, **kwargs)


def from_shapely(geometry, **kwargs):
Expand Down
82 changes: 77 additions & 5 deletions pygeos/test/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,36 @@ def test_from_wkt_exceptions():
with pytest.raises(TypeError, match="Expected bytes, got int"):
pygeos.from_wkt(1)

with pytest.raises(pygeos.GEOSException):
with pytest.raises(
pygeos.GEOSException, match="Expected word but encountered end of stream"
):
pygeos.from_wkt("")

with pytest.raises(pygeos.GEOSException):
with pytest.raises(pygeos.GEOSException, match="Unknown type: 'NOT'"):
pygeos.from_wkt("NOT A WKT STRING")


def test_from_wkt_warn_on_invalid():
with pytest.warns(Warning, match="Invalid WKT"):
pygeos.from_wkt("", on_invalid="warn")

with pytest.warns(Warning, match="Invalid WKT"):
pygeos.from_wkt("NOT A WKT STRING", on_invalid="warn")


def test_from_wkb_ignore_on_invalid():
with pytest.warns(None):
pygeos.from_wkt("", on_invalid="ignore")

with pytest.warns(None):
pygeos.from_wkt("NOT A WKT STRING", on_invalid="ignore")


def test_from_wkt_on_invalid_unsupported_option():
with pytest.raises(ValueError, match="not a valid option"):
pygeos.from_wkt(b"\x01\x01\x00\x00\x00\x00", on_invalid="unsupported_option")


@pytest.mark.parametrize("geom", all_types)
def test_from_wkt_all_types(geom):
wkt = pygeos.to_wkt(geom)
Expand Down Expand Up @@ -99,8 +122,57 @@ def test_from_wkb_exceptions():
with pytest.raises(TypeError, match="Expected bytes, got int"):
pygeos.from_wkb(1)

with pytest.raises(pygeos.GEOSException):
pygeos.from_wkb(b"\x01\x01\x00\x00\x00\x00")
# invalid WKB
with pytest.raises(pygeos.GEOSException, match="Unexpected EOF parsing WKB"):
result = pygeos.from_wkb(b"\x01\x01\x00\x00\x00\x00")
assert result is None

# invalid ring in WKB
with pytest.raises(
pygeos.GEOSException,
match="Invalid number of points in LinearRing found 3 - must be 0 or >= 4",
):
result = pygeos.from_wkb(
b"\x01\x03\x00\x00\x00\x01\x00\x00\x00\x03\x00\x00\x00P}\xae\xc6\x00\xb15A\x00\xde\x02I\x8e^=A0n\xa3!\xfc\xb05A\xa0\x11\xa5=\x90^=AP}\xae\xc6\x00\xb15A\x00\xde\x02I\x8e^=A"
)
assert result is None


def test_from_wkb_warn_on_invalid():
# invalid WKB
with pytest.warns(Warning, match="Invalid WKB"):
result = pygeos.from_wkb(b"\x01\x01\x00\x00\x00\x00", on_invalid="warn")
assert result is None

# invalid ring in WKB
with pytest.warns(Warning, match="Invalid WKB"):
result = pygeos.from_wkb(
b"\x01\x03\x00\x00\x00\x01\x00\x00\x00\x03\x00\x00\x00P}\xae\xc6\x00\xb15A\x00\xde\x02I\x8e^=A0n\xa3!\xfc\xb05A\xa0\x11\xa5=\x90^=AP}\xae\xc6\x00\xb15A\x00\xde\x02I\x8e^=A",
on_invalid="warn"
)
assert result is None


def test_from_wkb_ignore_on_invalid():
# invalid WKB
with pytest.warns(None) as w:
result = pygeos.from_wkb(b"\x01\x01\x00\x00\x00\x00", on_invalid="ignore")
assert result is None
assert len(w) == 0 # no warning

# invalid ring in WKB
with pytest.warns(None) as w:
result = pygeos.from_wkb(
b"\x01\x03\x00\x00\x00\x01\x00\x00\x00\x03\x00\x00\x00P}\xae\xc6\x00\xb15A\x00\xde\x02I\x8e^=A0n\xa3!\xfc\xb05A\xa0\x11\xa5=\x90^=AP}\xae\xc6\x00\xb15A\x00\xde\x02I\x8e^=A",
on_invalid="ignore",
)
assert result is None
assert len(w) == 0 # no warning


def test_from_wkb_on_invalid_unsupported_option():
with pytest.raises(ValueError, match="not a valid option"):
pygeos.from_wkb(b"\x01\x01\x00\x00\x00\x00", on_invalid="unsupported_option")


@pytest.mark.parametrize("geom", all_types)
Expand Down Expand Up @@ -325,7 +397,7 @@ def test_to_wkb_point_empty_srid():
wkb = pygeos.to_wkb(expected, include_srid=True)
actual = pygeos.from_wkb(wkb)
assert pygeos.get_srid(actual) == 4236


@pytest.mark.parametrize("geom", all_types)
@mock.patch("pygeos.io.ShapelyGeometry", ShapelyGeometryMock)
Expand Down
12 changes: 11 additions & 1 deletion src/geos.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,9 @@ enum {
PGERR_NO_MALLOC,
PGERR_GEOMETRY_TYPE,
PGERR_MULTIPOINT_WITH_POINT_EMPTY,
PGERR_EMPTY_GEOMETRY
PGERR_EMPTY_GEOMETRY,
PGWARN_INVALID_WKB, // raise the GEOS WKB error as a warning instead of exception
PGWARN_INVALID_WKT // raise the GEOS WKB error as a warning instead of exception
};

// Define how the states are handled by CPython
Expand Down Expand Up @@ -85,6 +87,14 @@ enum {
case PGERR_EMPTY_GEOMETRY: \
PyErr_SetString(PyExc_ValueError, "One of the Geometry inputs is empty."); \
break; \
case PGWARN_INVALID_WKB: \
PyErr_WarnFormat(PyExc_Warning, 0, \
"Invalid WKB: geometry is returned as None. %s", last_error); \
break; \
case PGWARN_INVALID_WKT: \
PyErr_WarnFormat(PyExc_Warning, 0, \
"Invalid WKT: geometry is returned as None. %s", last_error); \
break; \
default: \
PyErr_Format(PyExc_RuntimeError, \
"Pygeos ufunc returned with unknown error state code %d.", errstate); \
Expand Down
60 changes: 46 additions & 14 deletions src/ufuncs.c
Original file line number Diff line number Diff line change
Expand Up @@ -2301,17 +2301,26 @@ static PyUFuncGenericFunction bounds_funcs[1] = {&bounds_func};

/* Define the object -> geom functions (O_Y) */

static char from_wkb_dtypes[2] = {NPY_OBJECT, NPY_OBJECT};
static char from_wkb_dtypes[3] = {NPY_OBJECT, NPY_UINT8, NPY_OBJECT};
static void from_wkb_func(char** args, npy_intp* dimensions, npy_intp* steps,
void* data) {
GEOSGeometry* ret_ptr;
char *ip1 = args[0], *ip2 = args[1], *op1 = args[2];
npy_intp is1 = steps[0], is2 = steps[1], os1 = steps[2];
PyObject* in1;

npy_uint8 on_invalid = *(npy_uint8*)ip2;
npy_intp n = dimensions[0];
npy_intp i;
GEOSWKBReader* reader;
unsigned char* wkb;
GEOSGeometry* ret_ptr;
Py_ssize_t size;
char is_hex;

if ((is2 != 0)) {
PyErr_Format(PyExc_ValueError, "from_wkb function called with non-scalar parameters");
return;
}

GEOS_INIT;

/* Create the WKB reader */
Expand All @@ -2321,7 +2330,7 @@ static void from_wkb_func(char** args, npy_intp* dimensions, npy_intp* steps,
goto finish;
}

UNARY_LOOP {
for (i = 0; i < n; i++, ip1 += is1, op1 += os1) {
/* ip1 is pointer to array element PyObject* */
in1 = *(PyObject**)ip1;

Expand Down Expand Up @@ -2362,8 +2371,15 @@ static void from_wkb_func(char** args, npy_intp* dimensions, npy_intp* steps,
ret_ptr = GEOSWKBReader_read_r(ctx, reader, wkb, size);
}
if (ret_ptr == NULL) {
errstate = PGERR_GEOS_EXCEPTION;
goto finish;
if (on_invalid == 2) {
// raise exception
errstate = PGERR_GEOS_EXCEPTION;
goto finish;
} else if (on_invalid == 1) {
// raise warning, return None
errstate = PGWARN_INVALID_WKB;
}
// else: return None, no warning
}
}
OUTPUT_Y;
Expand All @@ -2375,15 +2391,24 @@ static void from_wkb_func(char** args, npy_intp* dimensions, npy_intp* steps,
}
static PyUFuncGenericFunction from_wkb_funcs[1] = {&from_wkb_func};

static char from_wkt_dtypes[2] = {NPY_OBJECT, NPY_OBJECT};
static char from_wkt_dtypes[3] = {NPY_OBJECT, NPY_UINT8, NPY_OBJECT};
static void from_wkt_func(char** args, npy_intp* dimensions, npy_intp* steps,
void* data) {
GEOSGeometry* ret_ptr;
char *ip1 = args[0], *ip2 = args[1], *op1 = args[2];
npy_intp is1 = steps[0], is2 = steps[1], os1 = steps[2];
PyObject* in1;

npy_uint8 on_invalid = *(npy_uint8*)ip2;
npy_intp n = dimensions[0];
npy_intp i;
GEOSGeometry* ret_ptr;
GEOSWKTReader* reader;
const char* wkt;

if ((is2 != 0)) {
PyErr_Format(PyExc_ValueError, "from_wkt function called with non-scalar parameters");
return;
}

GEOS_INIT;

/* Create the WKT reader */
Expand All @@ -2393,7 +2418,7 @@ static void from_wkt_func(char** args, npy_intp* dimensions, npy_intp* steps,
goto finish;
}

UNARY_LOOP {
for (i = 0; i < n; i++, ip1 += is1, op1 += os1) {
/* ip1 is pointer to array element PyObject* */
in1 = *(PyObject**)ip1;

Expand Down Expand Up @@ -2422,8 +2447,15 @@ static void from_wkt_func(char** args, npy_intp* dimensions, npy_intp* steps,
/* Read the WKT */
ret_ptr = GEOSWKTReader_read_r(ctx, reader, wkt);
if (ret_ptr == NULL) {
errstate = PGERR_GEOS_EXCEPTION;
goto finish;
if (on_invalid == 2) {
// raise exception
errstate = PGERR_GEOS_EXCEPTION;
goto finish;
} else if (on_invalid == 1) {
// raise warning, return None
errstate = PGWARN_INVALID_WKT;
}
// else: return None, no warning
}
}
OUTPUT_Y;
Expand Down Expand Up @@ -2862,8 +2894,8 @@ int init_ufuncs(PyObject* m, PyObject* d) {
DEFINE_GENERALIZED(polygons_with_holes, 2, "(),(i)->()");
DEFINE_GENERALIZED(create_collection, 2, "(i),()->()");

DEFINE_CUSTOM(from_wkb, 1);
DEFINE_CUSTOM(from_wkt, 1);
DEFINE_CUSTOM(from_wkb, 2);
DEFINE_CUSTOM(from_wkt, 2);
DEFINE_CUSTOM(to_wkb, 5);
DEFINE_CUSTOM(to_wkt, 5);
DEFINE_CUSTOM(from_shapely, 1);
Expand Down

0 comments on commit ff7d0d9

Please sign in to comment.