From 58ab1fd687bd7edc96d35c6669a025f16f5aeaae Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sun, 8 Dec 2019 10:05:35 +0100 Subject: [PATCH] Limit the number of scheduled deletes As we have to issue a delete for each and every untagged node, the number of scheduled deletes can become quite large even for a small change file. It is better to send off the copy buffer a bit earlier in this case to ensure that the deletes can already run in parallel with the parsing of the file. The change also ensures that the memory usage of the deleter is bound. --- src/db-copy-mgr.hpp | 2 +- src/db-copy.hpp | 15 +++++++++++++++ src/gazetteer-style.hpp | 10 ++++++++++ 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/src/db-copy-mgr.hpp b/src/db-copy-mgr.hpp index 8ebab192e..7ec894926 100644 --- a/src/db-copy-mgr.hpp +++ b/src/db-copy-mgr.hpp @@ -53,7 +53,7 @@ class db_copy_mgr_t assert(buf[sz - 1] == '\t'); buf[sz - 1] = '\n'; - if (sz > db_cmd_copy_t::Max_buf_size - 100) { + if (m_current->is_full()) { m_processor->add_buffer(std::move(m_current)); } } diff --git a/src/db-copy.hpp b/src/db-copy.hpp index 04a5b4036..b6492dbc3 100644 --- a/src/db-copy.hpp +++ b/src/db-copy.hpp @@ -45,6 +45,13 @@ struct db_target_descr_t */ class db_deleter_by_id_t { + enum + { + // There is a trade-off here between sending as few DELETE SQL as + // possible and keeping the size of the deletable vector managable. + Max_entries = 1000000 + }; + public: bool has_data() const noexcept { return !m_deletables.empty(); } @@ -53,6 +60,8 @@ class db_deleter_by_id_t void delete_rows(std::string const &table, std::string const &column, pg_conn_t *conn); + bool is_full() const noexcept { return m_deletables.size() > Max_entries; } + private: /// Vector with object to delete before copying std::vector m_deletables; @@ -120,6 +129,12 @@ class db_cmd_copy_delete_t : public db_cmd_copy_t public: using db_cmd_copy_t::db_cmd_copy_t; + /// Return true if the buffer is filled up. + bool is_full() const noexcept + { + return (buffer.size() > Max_buf_size - 100) || m_deleter.is_full(); + } + bool has_deletables() const noexcept override { return m_deleter.has_data(); diff --git a/src/gazetteer-style.hpp b/src/gazetteer-style.hpp index 4a8f275ed..79db2853d 100644 --- a/src/gazetteer-style.hpp +++ b/src/gazetteer-style.hpp @@ -19,6 +19,14 @@ */ class db_deleter_place_t { + enum + { + // Deletion in the place table is fairly complex because of the + // compound primary key. It is better to start earlier with the + // deletion, so it can run in parallel with the file import. + Max_entries = 100000 + }; + struct item_t { std::string classes; @@ -48,6 +56,8 @@ class db_deleter_place_t void delete_rows(std::string const &table, std::string const &column, pg_conn_t *conn); + bool is_full() const noexcept { return m_deletables.size() > Max_entries; } + private: /// Vector with object to delete before copying std::vector m_deletables;