From 68819f4627e2a49c3f5a4dbdedde82bba5f2ad19 Mon Sep 17 00:00:00 2001 From: Paul Norman Date: Mon, 3 Aug 2015 01:51:29 -0700 Subject: [PATCH 1/4] Don't add invalid geoms to the database Sometimes even after buffer(0), a geometry is invalid. In these cases, don't produce a geometry, but instead skip it. --- geometry-builder.cpp | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/geometry-builder.cpp b/geometry-builder.cpp index f5f661e29..5d55ac0c6 100644 --- a/geometry-builder.cpp +++ b/geometry-builder.cpp @@ -109,6 +109,9 @@ geometry_builder::maybe_wkt_t geometry_builder::get_wkt_simple(const nodelist_t throw std::runtime_error("Excluding broken polygon."); } else { geom = geom_ptr(geom->buffer(0)); + if (!geom->isValid()) { + throw std::runtime_error("Excluding unrecoverable broken polygon."); + } } } geom->normalize(); // Fix direction of ring @@ -163,6 +166,9 @@ geometry_builder::maybe_wkts_t geometry_builder::get_wkt_split(const nodelist_t throw std::runtime_error("Excluding broken polygon."); } else { geom = geom_ptr(geom->buffer(0)); + if (!geom->isValid()) { + throw std::runtime_error("Excluding unrecoverable broken polygon."); + } } } geom->normalize(); // Fix direction of ring @@ -439,8 +445,7 @@ geometry_builder::maybe_wkts_t geometry_builder::build_polygons(const multinodel } multipoly->normalize(); - if ((excludepoly == 0) || (multipoly->isValid())) - { + if (multipoly->isValid()) { //copy of an empty one should be cheapest wkts->push_back(geometry_builder::wkt_t()); //then we set on the one we already have @@ -457,7 +462,7 @@ geometry_builder::maybe_wkts_t geometry_builder::build_polygons(const multinodel poly = dynamic_cast(poly->buffer(0)); poly->normalize(); } - if ((excludepoly == 0) || (poly->isValid())) + if (poly->isValid()) { //copy of an empty one should be cheapest wkts->push_back(geometry_builder::wkt_t()); @@ -707,8 +712,7 @@ geometry_builder::maybe_wkts_t geometry_builder::build_both(const multinodelist_ } multipoly->normalize(); - if ((excludepoly == 0) || (multipoly->isValid())) - { + if (multipoly->isValid()) { //copy of an empty one should be cheapest wkts->push_back(geometry_builder::wkt_t()); //then we set on the one we already have @@ -725,7 +729,7 @@ geometry_builder::maybe_wkts_t geometry_builder::build_both(const multinodelist_ poly = dynamic_cast(poly->buffer(0)); poly->normalize(); } - if (!excludepoly || (poly->isValid())) + if (poly->isValid()) { //copy of an empty one should be cheapest wkts->push_back(geometry_builder::wkt_t()); From 7eb7b3f8cc899d8a067c40fcde2b302f7a7257cc Mon Sep 17 00:00:00 2001 From: Paul Norman Date: Mon, 3 Aug 2015 01:59:29 -0700 Subject: [PATCH 2/4] Improve invalid polygons text --- docs/osm2pgsql.1 | 9 +++++---- options.cpp | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/docs/osm2pgsql.1 b/docs/osm2pgsql.1 index 3bc4a11e5..507c6e999 100644 --- a/docs/osm2pgsql.1 +++ b/docs/osm2pgsql.1 @@ -195,10 +195,11 @@ assumption that post-processed Coastline Checker shape files will be used. OpenStreetMap data is defined in terms of nodes, ways and relations and not in terms of actual geometric features. Osm2pgsql therefore tries to build postgis geometries out of this data representation. However not all ways and relations -correspond to valid postgis geometries (e.g. self intersecting polygons). By -default osm2pgsql tries to automatically fix these geometries using ST_Buffer(0) -around the invalid polygons. With this option, invalid polygons are instead -simply dropped from the database. +correspond to valid PostGIS geometries (e.g. self intersecting polygons). By +default osm2pgsql tries to fix these geometries using buffer(0) around the +invalid polygons. With this option, invalid polygons are instead simply dropped +from the database. Even without this option, all polygons in the database should +be valid. .TP \fB\ \fR\-\-unlogged Use postgresql's unlogged tables for storing data. This requires PostgreSQL 9.1 diff --git a/options.cpp b/options.cpp index b3a166ec3..25b74cff6 100644 --- a/options.cpp +++ b/options.cpp @@ -201,7 +201,7 @@ namespace -K|--keep-coastlines Keep coastline data rather than filtering it out.\n\ By default natural=coastline tagged data will be discarded\n\ because renderers usually have shape files for them.\n\ - --exclude-invalid-polygon do not import polygons with invalid geometries.\n\ + --exclude-invalid-polygon do not attempt to recover invalid geometries.\n\ -h|--help Help information.\n\ -v|--verbose Verbose output.\n"); } From 1a46474300888e961c2cb611a9f9e87cc27ddf83 Mon Sep 17 00:00:00 2001 From: Paul Norman Date: Mon, 3 Aug 2015 02:01:37 -0700 Subject: [PATCH 3/4] Add test for invalid geometry handling --- .gitignore | 1 + Makefile.am | 4 + tests/test-output-pgsql-validgeom.cpp | 101 +++++++++++++++++++++ tests/test_output_pgsql_validgeom.osm | 126 ++++++++++++++++++++++++++ 4 files changed, 232 insertions(+) create mode 100644 tests/test-output-pgsql-validgeom.cpp create mode 100644 tests/test_output_pgsql_validgeom.osm diff --git a/.gitignore b/.gitignore index 3c34b037a..f658bd3a7 100644 --- a/.gitignore +++ b/.gitignore @@ -58,6 +58,7 @@ tests/test-output-pgsql tests/test-output-pgsql-tablespace tests/test-output-pgsql-z_order tests/test-output-pgsql-schema +tests/test-output-pgsql-validgeom tests/test-expire-tiles tests/test-hstore-match-only tests/*.log diff --git a/Makefile.am b/Makefile.am index e5eb4071c..c0fcaa0de 100644 --- a/Makefile.am +++ b/Makefile.am @@ -59,6 +59,7 @@ check_PROGRAMS = \ tests/test-output-pgsql-z_order \ tests/test-output-pgsql-tablespace \ tests/test-output-pgsql-schema \ + tests/test-output-pgsql-validgeom \ tests/test-pgsql-escape \ tests/test-parse-options \ tests/test-expire-tiles \ @@ -96,6 +97,8 @@ tests_test_output_pgsql_z_order_SOURCES = tests/test-output-pgsql-z_order.cpp te tests_test_output_pgsql_z_order_LDADD = libosm2pgsql.la tests_test_output_pgsql_schema_SOURCES = tests/test-output-pgsql-schema.cpp tests/common-pg.cpp tests_test_output_pgsql_schema_LDADD = libosm2pgsql.la +tests_test_output_pgsql_validgeom_LDADD = libosm2pgsql.la +tests_test_output_pgsql_validgeom_SOURCES = tests/test-output-pgsql-validgeom.cpp tests/common-pg.cpp tests_test_pgsql_escape_SOURCES = tests/test-pgsql-escape.cpp tests_test_pgsql_escape_LDADD = libosm2pgsql.la tests_test_parse_options_SOURCES = tests/test-parse-options.cpp @@ -156,6 +159,7 @@ tests_test_output_pgsql_LDADD += $(GLOBAL_LDFLAGS) tests_test_output_pgsql_tablespace_LDADD += $(GLOBAL_LDFLAGS) tests_test_output_pgsql_z_order_LDADD += $(GLOBAL_LDFLAGS) tests_test_output_pgsql_schema_LDADD += $(GLOBAL_LDFLAGS) +tests_test_output_pgsql_validgeom_LDADD += $(GLOBAL_LDFLAGS) tests_test_pgsql_escape_LDADD += $(GLOBAL_LDFLAGS) tests_test_parse_options_LDADD += $(GLOBAL_LDFLAGS) tests_test_expire_tiles_LDADD += $(GLOBAL_LDFLAGS) diff --git a/tests/test-output-pgsql-validgeom.cpp b/tests/test-output-pgsql-validgeom.cpp new file mode 100644 index 000000000..71a073d40 --- /dev/null +++ b/tests/test-output-pgsql-validgeom.cpp @@ -0,0 +1,101 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "osmtypes.hpp" +#include "osmdata.hpp" +#include "output-pgsql.hpp" +#include "options.hpp" +#include "middle-pgsql.hpp" +#include "middle-ram.hpp" +#include "taginfo_impl.hpp" +#include "parse.hpp" + +#include +#include + +#include + +#include "tests/common-pg.hpp" + +namespace { + +struct skip_test : public std::exception { + const char *what() const noexcept { return "Test skipped."; } +}; + +void run_test(const char* test_name, void (*testfunc)()) { + try { + fprintf(stderr, "%s\n", test_name); + testfunc(); + + } catch (const skip_test &) { + exit(77); // <-- code to skip this test. + + } catch (const std::exception& e) { + fprintf(stderr, "%s\n", e.what()); + fprintf(stderr, "FAIL\n"); + exit(EXIT_FAILURE); + } + + fprintf(stderr, "PASS\n"); +} +#define RUN_TEST(x) run_test(#x, &(x)) + +// "simple" test modeled on the basic regression test from +// the python script. this is just to check everything is +// working as expected before we start the complex stuff. +void test_z_order() { + std::unique_ptr db; + + try { + db.reset(new pg::tempdb); + } catch (const std::exception &e) { + std::cerr << "Unable to setup database: " << e.what() << "\n"; + throw skip_test(); + } + + std::string proc_name("test-output-pgsql-validgeom"), input_file("-"); + char *argv[] = { &proc_name[0], &input_file[0], nullptr }; + + std::shared_ptr mid_pgsql(new middle_pgsql_t()); + options_t options = options_t(2, argv); + options.database_options = db->database_options; + options.num_procs = 1; + options.prefix = "osm2pgsql_test"; + options.style = "default.style"; + + auto out_test = std::make_shared(mid_pgsql.get(), options); + + osmdata_t osmdata(mid_pgsql, out_test); + + std::unique_ptr parser(new parse_delegate_t(options.extra_attributes, options.bbox, options.projection, false)); + + osmdata.start(); + + parser->stream_file("libxml2", "tests/test_output_pgsql_validgeom.osm", &osmdata); + + parser.reset(nullptr); + + osmdata.stop(); + + db->assert_has_table("osm2pgsql_test_point"); + db->assert_has_table("osm2pgsql_test_line"); + db->assert_has_table("osm2pgsql_test_polygon"); + db->assert_has_table("osm2pgsql_test_roads"); + + db->check_count(0, "SELECT COUNT(*) FROM osm2pgsql_test_polygon WHERE NOT ST_IsValid(way)"); +} + +} // anonymous namespace + +int main(int argc, char *argv[]) { + RUN_TEST(test_z_order); + + return 0; +} diff --git a/tests/test_output_pgsql_validgeom.osm b/tests/test_output_pgsql_validgeom.osm new file mode 100644 index 000000000..654a6a52c --- /dev/null +++ b/tests/test_output_pgsql_validgeom.osm @@ -0,0 +1,126 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + From 3aae18a772a89669f8b7853abf1909751fa3522c Mon Sep 17 00:00:00 2001 From: Paul Norman Date: Mon, 21 Sep 2015 20:49:14 -0700 Subject: [PATCH 4/4] Reject empty geometries GEOS 3.4 will often return an empty geometry when trying to buffer an invalid one. This avoids letting those empty geometries into the database, essentiallly treating them the same as invalid geometries. --- geometry-builder.cpp | 18 ++++----- tests/test-output-pgsql-validgeom.cpp | 1 + tests/test_output_pgsql_validgeom.osm | 55 +++++++++++++++++++++++++++ 3 files changed, 65 insertions(+), 9 deletions(-) diff --git a/geometry-builder.cpp b/geometry-builder.cpp index 5d55ac0c6..18d5c9079 100644 --- a/geometry-builder.cpp +++ b/geometry-builder.cpp @@ -104,12 +104,12 @@ geometry_builder::maybe_wkt_t geometry_builder::get_wkt_simple(const nodelist_t if (polygon && (coords->getSize() >= 4) && (coords->getAt(coords->getSize() - 1).equals2D(coords->getAt(0)))) { std::unique_ptr shell(gf.createLinearRing(coords.release())); geom = geom_ptr(gf.createPolygon(shell.release(), new std::vector)); - if (!geom->isValid()) { + if (geom->isEmpty() || !geom->isValid()) { if (excludepoly) { throw std::runtime_error("Excluding broken polygon."); } else { geom = geom_ptr(geom->buffer(0)); - if (!geom->isValid()) { + if (geom->isEmpty() || !geom->isValid()) { throw std::runtime_error("Excluding unrecoverable broken polygon."); } } @@ -161,12 +161,12 @@ geometry_builder::maybe_wkts_t geometry_builder::get_wkt_split(const nodelist_t if (polygon && (coords->getSize() >= 4) && (coords->getAt(coords->getSize() - 1).equals2D(coords->getAt(0)))) { std::unique_ptr shell(gf.createLinearRing(coords.release())); geom = geom_ptr(gf.createPolygon(shell.release(), new std::vector)); - if (!geom->isValid()) { + if (geom->isEmpty() || !geom->isValid()) { if (excludepoly) { throw std::runtime_error("Excluding broken polygon."); } else { geom = geom_ptr(geom->buffer(0)); - if (!geom->isValid()) { + if (geom->isEmpty() || !geom->isValid()) { throw std::runtime_error("Excluding unrecoverable broken polygon."); } } @@ -445,7 +445,7 @@ geometry_builder::maybe_wkts_t geometry_builder::build_polygons(const multinodel } multipoly->normalize(); - if (multipoly->isValid()) { + if (!multipoly->isEmpty() && multipoly->isValid()) { //copy of an empty one should be cheapest wkts->push_back(geometry_builder::wkt_t()); //then we set on the one we already have @@ -458,11 +458,11 @@ geometry_builder::maybe_wkts_t geometry_builder::build_polygons(const multinodel for(unsigned i=0; i(polygons->at(i)); - if (!poly->isValid() && !excludepoly) { + if ((poly->isEmpty() || !poly->isValid()) && !excludepoly) { poly = dynamic_cast(poly->buffer(0)); poly->normalize(); } - if (poly->isValid()) + if (!poly->isEmpty() && poly->isValid()) { //copy of an empty one should be cheapest wkts->push_back(geometry_builder::wkt_t()); @@ -712,7 +712,7 @@ geometry_builder::maybe_wkts_t geometry_builder::build_both(const multinodelist_ } multipoly->normalize(); - if (multipoly->isValid()) { + if (!multipoly->isEmpty() && multipoly->isValid()) { //copy of an empty one should be cheapest wkts->push_back(geometry_builder::wkt_t()); //then we set on the one we already have @@ -729,7 +729,7 @@ geometry_builder::maybe_wkts_t geometry_builder::build_both(const multinodelist_ poly = dynamic_cast(poly->buffer(0)); poly->normalize(); } - if (poly->isValid()) + if (!poly->isEmpty() && poly->isValid()) { //copy of an empty one should be cheapest wkts->push_back(geometry_builder::wkt_t()); diff --git a/tests/test-output-pgsql-validgeom.cpp b/tests/test-output-pgsql-validgeom.cpp index 71a073d40..0a511e117 100644 --- a/tests/test-output-pgsql-validgeom.cpp +++ b/tests/test-output-pgsql-validgeom.cpp @@ -90,6 +90,7 @@ void test_z_order() { db->assert_has_table("osm2pgsql_test_roads"); db->check_count(0, "SELECT COUNT(*) FROM osm2pgsql_test_polygon WHERE NOT ST_IsValid(way)"); + db->check_count(0, "SELECT COUNT(*) FROM osm2pgsql_test_polygon WHERE ST_IsEmpty(way)"); } } // anonymous namespace diff --git a/tests/test_output_pgsql_validgeom.osm b/tests/test_output_pgsql_validgeom.osm index 654a6a52c..b72a52795 100644 --- a/tests/test_output_pgsql_validgeom.osm +++ b/tests/test_output_pgsql_validgeom.osm @@ -1,5 +1,30 @@ + + + + + + + + + + + + + + + + + + + + + + + + + @@ -43,6 +68,36 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +