From 29c52d6208d5e353cbb4871f547ded604e659f9f Mon Sep 17 00:00:00 2001 From: Michael Reichert Date: Wed, 30 Jan 2019 16:52:32 +0100 Subject: [PATCH 1/3] add support for --extra-attributes to Gazetteer output --- gazetteer-style.cpp | 50 +++++++++++++++++++++++++++++++++++++++++++- gazetteer-style.hpp | 14 +++++++++++++ output-gazetteer.hpp | 2 ++ 3 files changed, 65 insertions(+), 1 deletion(-) diff --git a/gazetteer-style.cpp b/gazetteer-style.cpp index c5d6a2b0d..8e3036052 100644 --- a/gazetteer-style.cpp +++ b/gazetteer-style.cpp @@ -42,6 +42,7 @@ void gazetteer_style_t::clear() m_main.clear(); m_names.clear(); m_extra.clear(); + m_metadata.clear(); m_address.clear(); m_operator = nullptr; m_admin_level = MAX_ADMINLEVEL; @@ -72,6 +73,11 @@ void gazetteer_style_t::load_style(std::string const &filename) } } +void gazetteer_style_t::set_metadata(const bool enabled) +{ + m_metadata_enabled = enabled; +} + gazetteer_style_t::flag_t gazetteer_style_t::parse_flags(std::string const &str) { flag_t out = 0; @@ -349,6 +355,45 @@ void gazetteer_style_t::process_tags(osmium::OSMObject const &o) } else if (postcode_fallback && postcode) { m_main.emplace_back("place", "postcode", SF_MAIN | SF_MAIN_FALLBACK); } + + // add metadata fields as tags if enabled + if (m_metadata_enabled) { + if (o.version()) { + add_metadata_field_num("osm_version", o.version()); + } + if (o.uid()) { + add_metadata_field_num("osm_uid", o.uid()); + } + + if (o.user() && *(o.user()) != '\0') { + std::string username = o.user(); + add_metadata_field("osm_user", std::move(username)); + } + + if (o.changeset()) { + add_metadata_field_num("osm_changeset", o.changeset()); + } + + if (o.timestamp()) { + add_metadata_field("osm_timestamp", std::move(o.timestamp().to_iso())); + } + } +} + +void gazetteer_style_t::add_metadata_field(const std::string&& field, const std::string&& value) { + // We have to work with std::string, not char* because metadata fields converted to char* + // would require an allocation on heap and a cleanup at the end. + flag_t flag = find_flag(field.c_str(), value.c_str()); + if (flag & SF_EXTRA) { + m_metadata.emplace_back(std::move(field), std::move(value)); + } +} + +template +void gazetteer_style_t::add_metadata_field_num(const std::string&& field, const T value) { + // This method is not linked to from outside this class. Therefore, it can stay in the source file. + std::string value_str = std::to_string(value); + add_metadata_field(std::move(field), std::move(value_str)); } void gazetteer_style_t::copy_out(osmium::OSMObject const &o, @@ -456,13 +501,16 @@ bool gazetteer_style_t::copy_out_maintag(pmaintag_t const &tag, buffer.finish_hash(); } // extra tags - if (m_extra.empty()) { + if (m_extra.empty() && m_metadata.empty()) { buffer.add_null_column(); } else { buffer.new_hash(); for (auto const &entry : m_extra) { buffer.add_hash_elem(entry.first, entry.second); } + for (auto const &entry : m_metadata) { + buffer.add_hash_elem(entry.first, entry.second); + } buffer.finish_hash(); } // add the geometry - encoding it to hex along the way diff --git a/gazetteer-style.hpp b/gazetteer-style.hpp index fb287475e..364cccfb7 100644 --- a/gazetteer-style.hpp +++ b/gazetteer-style.hpp @@ -14,6 +14,7 @@ class gazetteer_style_t { using flag_t = uint16_t; using ptag_t = std::pair; + using ptag_str_t = std::pair; using pmaintag_t = std::tuple; enum style_flags @@ -66,11 +67,17 @@ class gazetteer_style_t bool has_data() const { return !m_main.empty(); } + void set_metadata(const bool enabled); + private: void add_style_entry(std::string const &key, std::string const &value, flag_t flags); flag_t parse_flags(std::string const &str); flag_t find_flag(char const *k, char const *v) const; + void add_metadata_field(const std::string&& field, const std::string&& value); + + template + void add_metadata_field_num(const std::string&& field, const T value); bool copy_out_maintag(pmaintag_t const &tag, osmium::OSMObject const &o, std::string const &geom, db_copy_mgr_t &buffer); void clear(); @@ -88,6 +95,8 @@ class gazetteer_style_t std::vector m_names; /// extratags to include std::vector m_extra; + /// metadata fields to include + std::vector m_metadata; /// addresstags to include std::vector m_address; /// value of operator tag @@ -96,6 +105,11 @@ class gazetteer_style_t int m_admin_level; /// True if there is an actual name to the object (not a ref). bool m_is_named; + + /// enable writing of metadata as tags (osm_version, osm_timestamp, osm_uid, osm_user, osm_changeset) + bool m_metadata_enabled {false}; + + boost::format m_single_fmt{"%1%\t"}; }; #endif diff --git a/output-gazetteer.hpp b/output-gazetteer.hpp index ccf776f2a..cddf691b6 100644 --- a/output-gazetteer.hpp +++ b/output-gazetteer.hpp @@ -15,6 +15,7 @@ #include "pgsql.hpp" #include "util.hpp" + class output_gazetteer_t : public output_t { output_gazetteer_t(output_gazetteer_t const *other, @@ -37,6 +38,7 @@ class output_gazetteer_t : public output_t osmium_buffer(PLACE_BUFFER_SIZE, osmium::memory::Buffer::auto_grow::yes) { m_style.load_style(options.style); + m_style.set_metadata(options.extra_attributes); } virtual ~output_gazetteer_t(); From b280c9ef9c7000cebddc153d501bfbc2d86e91e7 Mon Sep 17 00:00:00 2001 From: Michael Reichert Date: Tue, 5 Feb 2019 18:00:35 +0100 Subject: [PATCH 2/3] remove necessity of --extra-attributes, use style only --- db-copy.hpp | 35 +++++++++++++++++ gazetteer-style.cpp | 92 +++++++++++++++++++++----------------------- gazetteer-style.hpp | 15 +++----- output-gazetteer.hpp | 1 - 4 files changed, 84 insertions(+), 59 deletions(-) diff --git a/db-copy.hpp b/db-copy.hpp index 7e5692eef..e14d62ca8 100644 --- a/db-copy.hpp +++ b/db-copy.hpp @@ -290,6 +290,41 @@ class db_copy_mgr_t m_current->buffer += "\","; } + /** + * Add a key/value pair to a hash column without escaping. + * + * Key and value must be strings and will NOT be appropriately escaped. + * A separator for the next pair is added at the end. + */ + void add_hash_elem_noescape(char const *k, char const *v) + { + m_current->buffer += '"'; + m_current->buffer += k; + m_current->buffer += "\"=>\""; + m_current->buffer += v; + m_current->buffer += "\","; + } + + /** + * Add a key (unescaped) and a numeric value to a hash column. + * + * Key must be string and come from a safe source because it will NOT be + * escaped! The value should be convertible using std::to_string. + * A separator for the next pair is added at the end. + * + * This method is suitable to insert safe input, e.g. numeric OSM metadata + * (eg. uid) but not unsafe input like user names. + */ + template + void add_hstore_num_noescape(char const *k, T const value) + { + m_current->buffer += '"'; + m_current->buffer += k; + m_current->buffer += "\"=>\""; + m_current->buffer += std::to_string(value); + m_current->buffer += "\","; + } + /** * Close a hash previously started with new_hash(). * diff --git a/gazetteer-style.cpp b/gazetteer-style.cpp index 8e3036052..b19075454 100644 --- a/gazetteer-style.cpp +++ b/gazetteer-style.cpp @@ -42,7 +42,6 @@ void gazetteer_style_t::clear() m_main.clear(); m_names.clear(); m_extra.clear(); - m_metadata.clear(); m_address.clear(); m_operator = nullptr; m_admin_level = MAX_ADMINLEVEL; @@ -73,11 +72,6 @@ void gazetteer_style_t::load_style(std::string const &filename) } } -void gazetteer_style_t::set_metadata(const bool enabled) -{ - m_metadata_enabled = enabled; -} - gazetteer_style_t::flag_t gazetteer_style_t::parse_flags(std::string const &str) { flag_t out = 0; @@ -134,6 +128,25 @@ gazetteer_style_t::flag_t gazetteer_style_t::parse_flags(std::string const &str) return out; } +bool gazetteer_style_t::add_metadata_style_entry(std::string const &key, + std::string const &value) +{ + if (key == "osm_version") { + m_metadata_fields.set_version(true); + } else if (key == "osm_timestamp") { + m_metadata_fields.set_timestamp(true); + } else if (key == "osm_changeset") { + m_metadata_fields.set_changeset(true); + } else if (key == "osm_uid") { + m_metadata_fields.set_uid(true); + } else if (key == "osm_user") { + m_metadata_fields.set_user(true); + } else { + return false; + } + return true; +} + void gazetteer_style_t::add_style_entry(std::string const &key, std::string const &value, gazetteer_style_t::flag_t flags) @@ -177,6 +190,13 @@ void gazetteer_style_t::add_style_entry(std::string const &key, } } + if (add_metadata_style_entry(key, value)) { + if (!value.empty()) { + throw std::runtime_error("Style error. Rules for OSM metadata " + "attributes must have an empty value.\n"); + } + return; + } if (value.empty()) { m_matcher.emplace_back(key, flags, matcher_t::MT_KEY); } else { @@ -355,45 +375,6 @@ void gazetteer_style_t::process_tags(osmium::OSMObject const &o) } else if (postcode_fallback && postcode) { m_main.emplace_back("place", "postcode", SF_MAIN | SF_MAIN_FALLBACK); } - - // add metadata fields as tags if enabled - if (m_metadata_enabled) { - if (o.version()) { - add_metadata_field_num("osm_version", o.version()); - } - if (o.uid()) { - add_metadata_field_num("osm_uid", o.uid()); - } - - if (o.user() && *(o.user()) != '\0') { - std::string username = o.user(); - add_metadata_field("osm_user", std::move(username)); - } - - if (o.changeset()) { - add_metadata_field_num("osm_changeset", o.changeset()); - } - - if (o.timestamp()) { - add_metadata_field("osm_timestamp", std::move(o.timestamp().to_iso())); - } - } -} - -void gazetteer_style_t::add_metadata_field(const std::string&& field, const std::string&& value) { - // We have to work with std::string, not char* because metadata fields converted to char* - // would require an allocation on heap and a cleanup at the end. - flag_t flag = find_flag(field.c_str(), value.c_str()); - if (flag & SF_EXTRA) { - m_metadata.emplace_back(std::move(field), std::move(value)); - } -} - -template -void gazetteer_style_t::add_metadata_field_num(const std::string&& field, const T value) { - // This method is not linked to from outside this class. Therefore, it can stay in the source file. - std::string value_str = std::to_string(value); - add_metadata_field(std::move(field), std::move(value_str)); } void gazetteer_style_t::copy_out(osmium::OSMObject const &o, @@ -501,15 +482,30 @@ bool gazetteer_style_t::copy_out_maintag(pmaintag_t const &tag, buffer.finish_hash(); } // extra tags - if (m_extra.empty() && m_metadata.empty()) { + if (m_extra.empty() && m_metadata_fields.none()) { buffer.add_null_column(); } else { buffer.new_hash(); for (auto const &entry : m_extra) { buffer.add_hash_elem(entry.first, entry.second); } - for (auto const &entry : m_metadata) { - buffer.add_hash_elem(entry.first, entry.second); + if (m_metadata_fields.version() && o.version()) { + buffer.add_hstore_num_noescape( + "osm_version", o.version()); + } + if (m_metadata_fields.uid() && o.uid()) { + buffer.add_hstore_num_noescape("osm_uid", o.uid()); + } + if (m_metadata_fields.user() && o.user() && *(o.user()) != '\0') { + buffer.add_hash_elem("osm_user", o.user()); + } + if (m_metadata_fields.changeset() && o.changeset()) { + buffer.add_hstore_num_noescape( + "osm_changeset", o.changeset()); + } + if (m_metadata_fields.timestamp() && o.timestamp()) { + std::string timestamp = o.timestamp().to_iso(); + buffer.add_hash_elem_noescape("osm_timestamp", timestamp.c_str()); } buffer.finish_hash(); } diff --git a/gazetteer-style.hpp b/gazetteer-style.hpp index 364cccfb7..5d609df98 100644 --- a/gazetteer-style.hpp +++ b/gazetteer-style.hpp @@ -7,6 +7,7 @@ #include #include +#include #include "db-copy.hpp" @@ -14,7 +15,6 @@ class gazetteer_style_t { using flag_t = uint16_t; using ptag_t = std::pair; - using ptag_str_t = std::pair; using pmaintag_t = std::tuple; enum style_flags @@ -67,17 +67,14 @@ class gazetteer_style_t bool has_data() const { return !m_main.empty(); } - void set_metadata(const bool enabled); - private: + bool add_metadata_style_entry(std::string const &key, + std::string const &value); void add_style_entry(std::string const &key, std::string const &value, flag_t flags); flag_t parse_flags(std::string const &str); flag_t find_flag(char const *k, char const *v) const; - void add_metadata_field(const std::string&& field, const std::string&& value); - template - void add_metadata_field_num(const std::string&& field, const T value); bool copy_out_maintag(pmaintag_t const &tag, osmium::OSMObject const &o, std::string const &geom, db_copy_mgr_t &buffer); void clear(); @@ -95,8 +92,6 @@ class gazetteer_style_t std::vector m_names; /// extratags to include std::vector m_extra; - /// metadata fields to include - std::vector m_metadata; /// addresstags to include std::vector m_address; /// value of operator tag @@ -106,8 +101,8 @@ class gazetteer_style_t /// True if there is an actual name to the object (not a ref). bool m_is_named; - /// enable writing of metadata as tags (osm_version, osm_timestamp, osm_uid, osm_user, osm_changeset) - bool m_metadata_enabled {false}; + /// which metadata fields of the OSM objects should be written to the output + osmium::metadata_options m_metadata_fields{"none"}; boost::format m_single_fmt{"%1%\t"}; }; diff --git a/output-gazetteer.hpp b/output-gazetteer.hpp index cddf691b6..057e5bd0b 100644 --- a/output-gazetteer.hpp +++ b/output-gazetteer.hpp @@ -38,7 +38,6 @@ class output_gazetteer_t : public output_t osmium_buffer(PLACE_BUFFER_SIZE, osmium::memory::Buffer::auto_grow::yes) { m_style.load_style(options.style); - m_style.set_metadata(options.extra_attributes); } virtual ~output_gazetteer_t(); From 9e535f8ede7034cbe9c392d4d27f9ebe706be267 Mon Sep 17 00:00:00 2001 From: Michael Reichert Date: Tue, 21 May 2019 15:11:18 +0200 Subject: [PATCH 3/3] gazetteer OSM metadata: add another validation check, code cleanup --- gazetteer-style.cpp | 10 +++++++--- gazetteer-style.hpp | 3 +-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/gazetteer-style.cpp b/gazetteer-style.cpp index b19075454..3382f83f6 100644 --- a/gazetteer-style.cpp +++ b/gazetteer-style.cpp @@ -128,8 +128,7 @@ gazetteer_style_t::flag_t gazetteer_style_t::parse_flags(std::string const &str) return out; } -bool gazetteer_style_t::add_metadata_style_entry(std::string const &key, - std::string const &value) +bool gazetteer_style_t::add_metadata_style_entry(std::string const &key) { if (key == "osm_version") { m_metadata_fields.set_version(true); @@ -190,11 +189,16 @@ void gazetteer_style_t::add_style_entry(std::string const &key, } } - if (add_metadata_style_entry(key, value)) { + if (add_metadata_style_entry(key)) { if (!value.empty()) { throw std::runtime_error("Style error. Rules for OSM metadata " "attributes must have an empty value.\n"); } + if (flags != SF_EXTRA) { + throw std::runtime_error("Style error. Rules for OSM metadata " + "attributes must have the style flag " + "\"extra\" and no other flag.\n"); + } return; } if (value.empty()) { diff --git a/gazetteer-style.hpp b/gazetteer-style.hpp index 5d609df98..ff22fb8ff 100644 --- a/gazetteer-style.hpp +++ b/gazetteer-style.hpp @@ -68,8 +68,7 @@ class gazetteer_style_t bool has_data() const { return !m_main.empty(); } private: - bool add_metadata_style_entry(std::string const &key, - std::string const &value); + bool add_metadata_style_entry(std::string const &key); void add_style_entry(std::string const &key, std::string const &value, flag_t flags); flag_t parse_flags(std::string const &str);