From 8f9a0489088f538259bbb7ad6128be7e5a641ade Mon Sep 17 00:00:00 2001 From: Jochen Topf Date: Thu, 5 Nov 2020 10:54:26 +0100 Subject: [PATCH 1/2] Add --with-forward-dependencies option Add new command line option --with-forward-dependencies. Set this to "false" to suppress the normal propagation of changes in append mode from nodes to ways to relations. This is intended mainly for Nominatim which doesn't need this propagation. I have chosen the flag name as --with-forward-dependencies=VALUE to leave the possibility open to later allow values like "nodes-only" or something like that. But currently only "true" and "false" are supported. This replaces the hardcoded check for the gazeteer output in the middle. As the (now deleted) comment in the code shows an earlier intention was to let the output tell the middle whether it needs to handle the forward dependencies. But this commit is not going that route: Some outputs can not know whether the dependency management is needed (i.e. the flex output) without having an extra flag for that. But if we need a flag anyway, it makes more sense to have it as an overall option. Fixes #1304 --- docs/osm2pgsql.1 | 12 ++++++--- docs/osm2pgsql.md | 10 ++++--- src/middle-pgsql.cpp | 8 +----- src/options.cpp | 18 +++++++++++-- src/options.hpp | 6 +++++ src/osm2pgsql.cpp | 11 +++----- src/osmdata.cpp | 15 ++++------- src/osmdata.hpp | 1 + src/output-gazetteer.hpp | 2 -- src/output-null.hpp | 2 -- src/output.hpp | 2 -- tests/common-import.hpp | 11 +++----- tests/data/test_forward_dependencies.opl | 13 +++++++++ tests/data/test_forward_dependencies_diff.opl | 2 ++ tests/regression.py | 27 +++++++++++++++++++ 15 files changed, 93 insertions(+), 47 deletions(-) create mode 100644 tests/data/test_forward_dependencies.opl create mode 100644 tests/data/test_forward_dependencies_diff.opl diff --git a/docs/osm2pgsql.1 b/docs/osm2pgsql.1 index ff1c6d04d..9905930d0 100644 --- a/docs/osm2pgsql.1 +++ b/docs/osm2pgsql.1 @@ -219,14 +219,14 @@ The default is disabled. .RS .RE .TP -.B \[en]middle\-schema=SCHEMA +.B \-\-middle\-schema=SCHEMA Use PostgreSQL schema SCHEMA for all tables, indexes, and functions in the middle (default is no schema, i.e.\ the \f[C]public\f[] schema is used). .RS .RE .TP -.B \[en]middle\-way\-node\-index\-id\-shift=SHIFT +.B \-\-middle\-way\-node\-index\-id\-shift=SHIFT Set ID shift for way node bucket index in middle. Experts only. See documentation for details. @@ -369,7 +369,7 @@ Compute area column using spherical mercator coordinates. .RS .RE .TP -.B \[en]output\-pgsql\-schema=SCHEMA +.B \-\-output\-pgsql\-schema=SCHEMA Use PostgreSQL schema SCHEMA for all tables, indexes, and functions in the pgsql and multi outputs (default is no schema, i.e.\ the \f[C]public\f[] schema is used). @@ -404,6 +404,12 @@ index after the other. Specifies the number of parallel threads used for certain operations. .RS .RE +.TP +.B \-\-with\-forward\-dependencies=BOOL +Propagate changes from nodes to ways and node/way members to relations +(Default: \f[C]true\f[]). +.RS +.RE .SH SEE ALSO .IP \[bu] 2 osm2pgsql website (https://osm2pgsql.org) diff --git a/docs/osm2pgsql.md b/docs/osm2pgsql.md index e71035cc6..d62a80c79 100644 --- a/docs/osm2pgsql.md +++ b/docs/osm2pgsql.md @@ -151,11 +151,11 @@ Mandatory arguments to long options are mandatory for short options too. single large file. This mode is only recommended for full planet imports as it doesn't work well with small imports. The default is disabled. ---middle-schema=SCHEMA +\--middle-schema=SCHEMA : Use PostgreSQL schema SCHEMA for all tables, indexes, and functions in the middle (default is no schema, i.e. the `public` schema is used). ---middle-way-node-index-id-shift=SHIFT +\--middle-way-node-index-id-shift=SHIFT : Set ID shift for way node bucket index in middle. Experts only. See documentation for details. @@ -245,7 +245,7 @@ Mandatory arguments to long options are mandatory for short options too. \--reproject-area : Compute area column using spherical mercator coordinates. ---output-pgsql-schema=SCHEMA +\--output-pgsql-schema=SCHEMA : Use PostgreSQL schema SCHEMA for all tables, indexes, and functions in the pgsql and multi outputs (default is no schema, i.e. the `public` schema is used). @@ -270,6 +270,10 @@ Mandatory arguments to long options are mandatory for short options too. \--number-processes=THREADS : Specifies the number of parallel threads used for certain operations. +\--with-forward-dependencies=BOOL +: Propagate changes from nodes to ways and node/way members to relations + (Default: `true`). + # SEE ALSO * [osm2pgsql website](https://osm2pgsql.org) diff --git a/src/middle-pgsql.cpp b/src/middle-pgsql.cpp index 76931478b..bc70364b4 100644 --- a/src/middle-pgsql.cpp +++ b/src/middle-pgsql.cpp @@ -72,13 +72,7 @@ middle_pgsql_t::table_desc::table_desc(options_t const &options, m_copy_target->schema = options.middle_dbschema; m_copy_target->id = "id"; // XXX hardcoded column name - // Gazetteer doesn't use mark-pending processing and consequently - // needs no way-node index. - // TODO Currently, set here to keep the impact on the code small. - // We actually should have the output plugins report their needs - // and pass that via the constructor to middle_t, so that middle_t - // itself doesn't need to know about details of the output. - if (options.output_backend != "gazetteer") { + if (options.with_forward_dependencies) { m_prepare_intarray = build_sql(options, ts.prepare_mark); m_array_indexes = build_sql(options, ts.create_index); } diff --git a/src/options.cpp b/src/options.cpp index 7808e5cca..9c74c0741 100644 --- a/src/options.cpp +++ b/src/options.cpp @@ -80,6 +80,7 @@ const struct option long_options[] = { {"username", required_argument, nullptr, 'U'}, {"verbose", no_argument, nullptr, 'v'}, {"version", no_argument, nullptr, 'V'}, + {"with-forward-dependencies", required_argument, nullptr, 217}, {nullptr, 0, nullptr, 0}}; void short_usage(char *arg0) @@ -126,6 +127,8 @@ void long_usage(char const *arg0, bool verbose) This file is a single > 40Gb large file. Only recommended\n\ for full planet imports. Default is disabled.\n\ --middle-schema Schema to use for middle tables (default: none)\n\ + --with-forward-dependencies true|false Propagate changes from nodes to ways\n\ + and node/way members to relations (Default: true).\n\ \n\ Database options:\n\ -d|--database The name of the PostgreSQL database to connect to or\n\ @@ -187,8 +190,8 @@ void long_usage(char const *arg0, bool verbose) printf("%s", "\ \n\ Middle options (experts only):\n\ - --middle-way-node-index-id-shift shift Set ID shift for bucket\ - index. See documentation for details.\ + --middle-way-node-index-id-shift shift Set ID shift for bucket\n\ + index. See documentation for details.\n\ \n\ Expiry options:\n\ -e|--expire-tiles [min_zoom-]max_zoom Create a tile expiry list.\n\ @@ -582,6 +585,17 @@ options_t::options_t(int argc, char *argv[]) : options_t() case 216: output_dbschema = optarg; break; + case 217: + if (std::strcmp(optarg, "false") == 0) { + with_forward_dependencies = false; + } else if (std::strcmp(optarg, "true") == 0) { + with_forward_dependencies = true; + } else { + throw std::runtime_error{ + "Unknown value for --with-forward-dependencies option: {}\n"_format( + optarg)}; + } + break; case 300: way_node_index_id_shift = atoi(optarg); break; diff --git a/src/options.hpp b/src/options.hpp index b40ced14b..203750c30 100644 --- a/src/options.hpp +++ b/src/options.hpp @@ -105,6 +105,12 @@ class options_t int num_procs; bool droptemp = false; ///< drop slim mode temp tables after act + /** + * Should changes of objects be propagated forwards (from nodes to ways and + * from node/way members to parent relations)? + */ + bool with_forward_dependencies = true; + /// only copy rows that match an explicitly listed key bool hstore_match_only = false; diff --git a/src/osm2pgsql.cpp b/src/osm2pgsql.cpp index 8fa47852c..bd903bf2f 100644 --- a/src/osm2pgsql.cpp +++ b/src/osm2pgsql.cpp @@ -66,15 +66,10 @@ int main(int argc, char *argv[]) auto const outputs = output_t::create_outputs(middle->get_query_instance(), options); - bool const need_dependencies = - std::any_of(outputs.cbegin(), outputs.cend(), - [](std::shared_ptr const &output) { - return output->need_forward_dependencies(); - }); - auto dependency_manager = std::unique_ptr( - need_dependencies ? new full_dependency_manager_t{middle} - : new dependency_manager_t{}); + options.with_forward_dependencies + ? new full_dependency_manager_t{middle} + : new dependency_manager_t{}); osmdata_t osmdata{std::move(dependency_manager), middle, outputs, options}; diff --git a/src/osmdata.cpp b/src/osmdata.cpp index bdd07444d..69f537d4e 100644 --- a/src/osmdata.cpp +++ b/src/osmdata.cpp @@ -30,7 +30,8 @@ osmdata_t::osmdata_t(std::unique_ptr dependency_manager, m_outs(std::move(outs)), m_conninfo(options.database_options.conninfo()), m_num_procs(options.num_procs), m_append(options.append), m_droptemp(options.droptemp), m_parallel_indexing(options.parallel_indexing), - m_with_extra_attrs(options.extra_attributes) + m_with_extra_attrs(options.extra_attributes), + m_with_forward_dependencies(options.with_forward_dependencies) { assert(m_dependency_manager); assert(m_mid); @@ -193,11 +194,7 @@ class multithreaded_processor auto copy_thread = std::make_shared(conninfo); for (auto const &out : m_outputs) { - if (out->need_forward_dependencies()) { - m_clones[i].push_back(out->clone(midq, copy_thread)); - } else { - m_clones[i].emplace_back(nullptr); - } + m_clones[i].push_back(out->clone(midq, copy_thread)); } } } @@ -246,9 +243,7 @@ class multithreaded_processor for (auto const &output : m_outputs) { for (auto const &clone : m_clones) { assert(n < clone.size()); - if (clone[n]) { - output->merge_expire_trees(clone[n].get()); - } + output->merge_expire_trees(clone[n].get()); } ++n; } @@ -459,7 +454,7 @@ void osmdata_t::stop() const out->sync(); } - if (m_append) { + if (m_append && m_with_forward_dependencies) { process_dependents(); } diff --git a/src/osmdata.hpp b/src/osmdata.hpp index 4b2af8b2e..fcdf648a9 100644 --- a/src/osmdata.hpp +++ b/src/osmdata.hpp @@ -95,6 +95,7 @@ class osmdata_t bool m_droptemp; bool m_parallel_indexing; bool m_with_extra_attrs; + bool m_with_forward_dependencies; }; diff --git a/src/output-gazetteer.hpp b/src/output-gazetteer.hpp index 12d033766..90cc14c57 100644 --- a/src/output-gazetteer.hpp +++ b/src/output-gazetteer.hpp @@ -44,8 +44,6 @@ class output_gazetteer_t : public output_t void stop(thread_pool_t *) noexcept override {} void sync() override; - bool need_forward_dependencies() const noexcept override { return false; } - void pending_way(osmid_t) noexcept override {} void pending_relation(osmid_t) noexcept override {} diff --git a/src/output-null.hpp b/src/output-null.hpp index 192095c84..916e82aa0 100644 --- a/src/output-null.hpp +++ b/src/output-null.hpp @@ -23,8 +23,6 @@ class output_null_t : public output_t void sync() override {} void cleanup() {} - bool need_forward_dependencies() const noexcept override { return false; } - void pending_way(osmid_t) override {} void pending_relation(osmid_t) override {} diff --git a/src/output.hpp b/src/output.hpp index b2fbd0c2b..b0dc082b2 100644 --- a/src/output.hpp +++ b/src/output.hpp @@ -45,8 +45,6 @@ class output_t virtual void reprocess_marked() {} - virtual bool need_forward_dependencies() const noexcept { return true; } - virtual void pending_way(osmid_t id) = 0; virtual void pending_relation(osmid_t id) = 0; virtual void pending_relation_stage1c(osmid_t) {} diff --git a/tests/common-import.hpp b/tests/common-import.hpp index e7bc24144..8db31cd1b 100644 --- a/tests/common-import.hpp +++ b/tests/common-import.hpp @@ -140,15 +140,10 @@ class import_t auto const outputs = output_t::create_outputs(middle->get_query_instance(), options); - bool const need_dependencies = - std::any_of(outputs.cbegin(), outputs.cend(), - [](std::shared_ptr const &output) { - return output->need_forward_dependencies(); - }); - auto dependency_manager = std::unique_ptr( - need_dependencies ? new full_dependency_manager_t{middle} - : new dependency_manager_t{}); + options.with_forward_dependencies + ? new full_dependency_manager_t{middle} + : new dependency_manager_t{}); osmdata_t osmdata{std::move(dependency_manager), middle, outputs, options}; diff --git a/tests/data/test_forward_dependencies.opl b/tests/data/test_forward_dependencies.opl new file mode 100644 index 000000000..25ee1e6b7 --- /dev/null +++ b/tests/data/test_forward_dependencies.opl @@ -0,0 +1,13 @@ +n10 v1 x1.0 y1.0 +n11 v1 x1.0 y2.0 +n12 v1 x2.0 y2.0 Tnatural=tree +n13 v1 x3.0 y3.0 +n14 v1 x3.1 y3.1 +n15 v1 x0.0 y0.0 +n16 v1 x0.0 y0.1 +n17 v1 x0.1 y0.1 +w20 v1 Nn10,n11,n12,n10 Tlanduse=forest +w21 v1 Nn13,n14 Thighway=primary +w22 v1 Nn15,n16 +w23 v1 Nn16,n17,n15 +r30 v1 Mw22@,w23@ Ttype=multipolygon,natural=water diff --git a/tests/data/test_forward_dependencies_diff.opl b/tests/data/test_forward_dependencies_diff.opl new file mode 100644 index 000000000..cd1ffd2da --- /dev/null +++ b/tests/data/test_forward_dependencies_diff.opl @@ -0,0 +1,2 @@ +n13 v2 x3.1 y3.0 +w23 v2 Nn16,n17 diff --git a/tests/regression.py b/tests/regression.py index 2bf07bf48..582e404eb 100644 --- a/tests/regression.py +++ b/tests/regression.py @@ -218,6 +218,11 @@ class BaseUpdateRunnerWithOutputSchema(BaseUpdateRunner): schema = 'osm' +class DependencyUpdateRunner(BaseRunner): + import_file = 'test_forward_dependencies.opl' + update_file = 'test_forward_dependencies_diff.opl' + + ######################################################################## # # Test groups @@ -769,6 +774,28 @@ def test_planet_osm_nodes(self): self.assert_count(0, 'pg_tables', where="tablename = 'planet_osm_nodes'") +class TestMPUpdateWithForwardDependencies(DependencyUpdateRunner, unittest.TestCase): + extra_params = ['--latlong', '--slim', '--with-forward-dependencies=true'] + + def test_count(self): + self.assert_count(1, 'planet_osm_point') + self.assert_count(1, 'planet_osm_line') + self.assert_count(0, 'planet_osm_line', 'abs(ST_X(ST_StartPoint(way)) - 3.0) < 0.0001') + self.assert_count(1, 'planet_osm_line', 'abs(ST_X(ST_StartPoint(way)) - 3.1) < 0.0001') + self.assert_count(1, 'planet_osm_roads') + self.assert_count(1, 'planet_osm_polygon') + +class TestMPUpdateWithoutForwardDependencies(DependencyUpdateRunner, unittest.TestCase): + extra_params = ['--latlong', '--slim', '--with-forward-dependencies=false'] + + def test_count(self): + self.assert_count(1, 'planet_osm_point') + self.assert_count(1, 'planet_osm_line') + self.assert_count(1, 'planet_osm_line', 'abs(ST_X(ST_StartPoint(way)) - 3.0) < 0.0001') + self.assert_count(0, 'planet_osm_line', 'abs(ST_X(ST_StartPoint(way)) - 3.1) < 0.0001') + self.assert_count(1, 'planet_osm_roads') + self.assert_count(2, 'planet_osm_polygon') + # Database access tests class TestDBAccessNorm(BaseUpdateRunner, unittest.TestCase, From 59b57debff494c3e74a568ad9c746e32a24c63d4 Mon Sep 17 00:00:00 2001 From: Jochen Topf Date: Tue, 10 Nov 2020 11:06:46 +0100 Subject: [PATCH 2/2] Better and more consistent variable names for SQL commands --- src/middle-pgsql.cpp | 72 +++++++++++++++++++++++--------------------- src/middle-pgsql.hpp | 10 +++--- 2 files changed, 43 insertions(+), 39 deletions(-) diff --git a/src/middle-pgsql.cpp b/src/middle-pgsql.cpp index bc70364b4..bcace8b90 100644 --- a/src/middle-pgsql.cpp +++ b/src/middle-pgsql.cpp @@ -64,7 +64,7 @@ static std::string build_sql(options_t const &options, char const *templ) middle_pgsql_t::table_desc::table_desc(options_t const &options, table_sql const &ts) -: m_create(build_sql(options, ts.create_table)), +: m_create_table(build_sql(options, ts.create_table)), m_prepare_query(build_sql(options, ts.prepare_query)), m_copy_target(std::make_shared()) { @@ -73,8 +73,9 @@ middle_pgsql_t::table_desc::table_desc(options_t const &options, m_copy_target->id = "id"; // XXX hardcoded column name if (options.with_forward_dependencies) { - m_prepare_intarray = build_sql(options, ts.prepare_mark); - m_array_indexes = build_sql(options, ts.create_index); + m_prepare_fw_dep_lookups = + build_sql(options, ts.prepare_fw_dep_lookups); + m_create_fw_dep_indexes = build_sql(options, ts.create_fw_dep_indexes); } } @@ -97,9 +98,9 @@ void middle_pgsql_t::table_desc::stop(std::string const &conninfo, auto const qual_name = qualified_name( m_copy_target->schema, m_copy_target->name); sql_conn.exec("DROP TABLE IF EXISTS {}"_format(qual_name)); - } else if (build_indexes && !m_array_indexes.empty()) { + } else if (build_indexes && !m_create_fw_dep_indexes.empty()) { fmt::print(stderr, "Building index on table: {}\n", name()); - sql_conn.exec(m_array_indexes); + sql_conn.exec(m_create_fw_dep_indexes); } fmt::print(stderr, "Stopped table: {} in {}s\n", name(), timer.stop()); @@ -564,8 +565,8 @@ void middle_pgsql_t::start() // Prepare queries for updating dependent objects for (auto &table : m_tables) { - if (!table.m_prepare_intarray.empty()) { - m_db_connection.exec(table.m_prepare_intarray); + if (!table.m_prepare_fw_dep_lookups.empty()) { + m_db_connection.exec(table.m_prepare_fw_dep_lookups); } } } else { @@ -577,7 +578,7 @@ void middle_pgsql_t::start() table.m_copy_target->schema, table.m_copy_target->name); m_db_connection.exec( "DROP TABLE IF EXISTS {} CASCADE"_format(qual_name)); - m_db_connection.exec(table.m_create); + m_db_connection.exec(table.m_create_table); } // The extra query connection is only needed in append mode, so close. @@ -660,33 +661,34 @@ static table_sql sql_for_ways(bool has_bucket_index, " WHERE id = ANY($1::int8[]);\n"; if (has_bucket_index) { - sql.prepare_mark = + sql.prepare_fw_dep_lookups = "PREPARE mark_ways_by_node(int8) AS" " SELECT id FROM {schema}{prefix}_ways w" " WHERE $1 = ANY(nodes)" " AND {schema}{prefix}_index_bucket(w.nodes)" " && {schema}{prefix}_index_bucket(ARRAY[$1]);\n"; } else { - sql.prepare_mark = "PREPARE mark_ways_by_node(int8) AS" - " SELECT id FROM {schema}{prefix}_ways" - " WHERE nodes && ARRAY[$1];\n"; + sql.prepare_fw_dep_lookups = "PREPARE mark_ways_by_node(int8) AS" + " SELECT id FROM {schema}{prefix}_ways" + " WHERE nodes && ARRAY[$1];\n"; } if (way_node_index_id_shift == 0) { - sql.create_index = + sql.create_fw_dep_indexes = "CREATE INDEX ON {schema}{prefix}_ways USING GIN (nodes)" " WITH (fastupdate = off) {index_tablespace};\n"; } else { - sql.create_index = "CREATE OR REPLACE FUNCTION" - " {schema}{prefix}_index_bucket(int8[])" - " RETURNS int8[] AS $$\n" - " SELECT ARRAY(SELECT DISTINCT" - " unnest($1) >> {way_node_index_id_shift})\n" - "$$ LANGUAGE SQL IMMUTABLE;\n" - "CREATE INDEX {schema}{prefix}_ways_nodes_bucket_idx" - " ON {schema}{prefix}_ways" - " USING GIN ({schema}{prefix}_index_bucket(nodes))" - " WITH (fastupdate = off) {index_tablespace};\n"; + sql.create_fw_dep_indexes = + "CREATE OR REPLACE FUNCTION" + " {schema}{prefix}_index_bucket(int8[])" + " RETURNS int8[] AS $$\n" + " SELECT ARRAY(SELECT DISTINCT" + " unnest($1) >> {way_node_index_id_shift})\n" + "$$ LANGUAGE SQL IMMUTABLE;\n" + "CREATE INDEX {schema}{prefix}_ways_nodes_bucket_idx" + " ON {schema}{prefix}_ways" + " USING GIN ({schema}{prefix}_index_bucket(nodes))" + " WITH (fastupdate = off) {index_tablespace};\n"; } return sql; @@ -711,17 +713,19 @@ static table_sql sql_for_relations() noexcept " SELECT members, tags" " FROM {schema}{prefix}_rels WHERE id = $1;\n"; - sql.prepare_mark = "PREPARE mark_rels_by_node(int8) AS" - " SELECT id FROM {schema}{prefix}_rels" - " WHERE parts && ARRAY[$1]" - " AND parts[1:way_off] && ARRAY[$1];\n" - "PREPARE mark_rels_by_way(int8) AS" - " SELECT id FROM {schema}{prefix}_rels" - " WHERE parts && ARRAY[$1]" - " AND parts[way_off+1:rel_off] && ARRAY[$1];\n"; - - sql.create_index = "CREATE INDEX ON {schema}{prefix}_rels USING GIN (parts)" - " WITH (fastupdate = off) {index_tablespace};\n"; + sql.prepare_fw_dep_lookups = + "PREPARE mark_rels_by_node(int8) AS" + " SELECT id FROM {schema}{prefix}_rels" + " WHERE parts && ARRAY[$1]" + " AND parts[1:way_off] && ARRAY[$1];\n" + "PREPARE mark_rels_by_way(int8) AS" + " SELECT id FROM {schema}{prefix}_rels" + " WHERE parts && ARRAY[$1]" + " AND parts[way_off+1:rel_off] && ARRAY[$1];\n"; + + sql.create_fw_dep_indexes = + "CREATE INDEX ON {schema}{prefix}_rels USING GIN (parts)" + " WITH (fastupdate = off) {index_tablespace};\n"; return sql; } diff --git a/src/middle-pgsql.hpp b/src/middle-pgsql.hpp index 20662517c..dc281ec44 100644 --- a/src/middle-pgsql.hpp +++ b/src/middle-pgsql.hpp @@ -48,8 +48,8 @@ struct table_sql { char const *name = ""; char const *create_table = ""; char const *prepare_query = ""; - char const *prepare_mark = ""; - char const *create_index = ""; + char const *prepare_fw_dep_lookups = ""; + char const *create_fw_dep_indexes = ""; }; struct middle_pgsql_t : public slim_middle_t @@ -87,10 +87,10 @@ struct middle_pgsql_t : public slim_middle_t void stop(std::string const &conninfo, bool droptemp, bool build_indexes); - std::string m_create; + std::string m_create_table; std::string m_prepare_query; - std::string m_prepare_intarray; - std::string m_array_indexes; + std::string m_prepare_fw_dep_lookups; + std::string m_create_fw_dep_indexes; std::shared_ptr m_copy_target; };