Merge ' test/topology_custom: test_read_repair.py: reduce run-time ' …

…from Botond Dénes This test needed a lot of data to ensure multiple pages when doing the read repair. This change two key configuration items, allowing for a drastic reduction of the data size and consequently a large reduction in run-time. * Changes query-tombstone-page-limit 1000 -> 10. Before f068d1a, reducing this to a too small value would start killing internal queries. Now, after said commit, this is no longer a concern, as this limit no longer affects unpaged queries. * Sets (the new) query-page-size-in-bytes 1MB (default) -> 1KB. The latter configuration is a new one, added by the first patches of this series. It allows configuring the page-size in bytes, after which pages are cut. Previously this was a hard-coded constant: 1MB. This forced any tests which wanted to check paging, with pages cut on size, to work with large datasets. This was especially pronounced in the tests fixed in this PR, because this test works with tombstones which are tiny and a lot of them were needed to trigger paging based on the size. With this two changes, we can reduce the data size: * total_rows: 20000 -> 100 * max_live_rows: 32 -> 8 The runtime of the test consequently drops from 62 seconds to 13.5 seconds (dev mode, on my build machine). Fixes: #15425 Fixes: #16899 Closes #17529 * github.com:scylladb/scylladb: test/topology_custom: test_read_repair.py: reduce run-time replica/database: get_query_max_result_size(): use query_page_size_in_bytes replica/database: use include page-size in max-result-size query-request: max_result_size: add without_page_limit() db/config: introduce query_page_size_in_bytes (cherry picked from commit 616eec2)
scylladb · Feb 28, 2024 · 58a1be9 · 58a1be9
1 parent 6a6450a
commit 58a1be9
Show file tree

Hide file tree

Showing 10 changed files with 37 additions and 24 deletions.
diff --git a/db/config.cc b/db/config.cc
@@ -678,6 +678,9 @@ db::config::config(std::shared_ptr<db::extensions> exts)
         "The maximum number of tombstones a query can scan before aborting.")
     , query_tombstone_page_limit(this, "query_tombstone_page_limit", liveness::LiveUpdate, value_status::Used, 10000,
         "The number of tombstones after which a query cuts a page, even if not full or even empty.")
+    , query_page_size_in_bytes(this, "query_page_size_in_bytes", liveness::LiveUpdate, value_status::Used, 1 << 20,
+        "The size of pages in bytes, after a page accumulates this much data, the page is cut and sent to the client."
+        " Setting a too large value increases the risk of OOM.")
     /**
     * @Group Network timeout settings
     */

diff --git a/db/config.hh b/db/config.hh
@@ -254,6 +254,7 @@ public:
     named_value<uint32_t> tombstone_warn_threshold;
     named_value<uint32_t> tombstone_failure_threshold;
     named_value<uint64_t> query_tombstone_page_limit;
+    named_value<uint64_t> query_page_size_in_bytes;
     named_value<uint32_t> range_request_timeout_in_ms;
     named_value<uint32_t> read_request_timeout_in_ms;
     named_value<uint32_t> counter_write_request_timeout_in_ms;

diff --git a/multishard_mutation_query.cc b/multishard_mutation_query.cc
@@ -251,7 +251,7 @@ class read_context : public reader_lifecycle_policy_v2 {
     }
 
     query::max_result_size get_max_result_size() {
-        return _cmd.max_result_size ? *_cmd.max_result_size : _db.local().get_unlimited_query_max_result_size();
+        return _cmd.max_result_size ? *_cmd.max_result_size : _db.local().get_query_max_result_size();
     }
 
     virtual flat_mutation_reader_v2 create_reader(

diff --git a/query-request.hh b/query-request.hh
@@ -363,6 +363,9 @@ public:
     uint64_t get_page_size() const {
         return page_size == 0 ? hard_limit : page_size;
     }
+    max_result_size without_page_limit() const {
+        return max_result_size(soft_limit, hard_limit, 0);
+    }
     friend bool operator==(const max_result_size&, const max_result_size&);
     friend class ser::serializer<query::max_result_size>;
 };

diff --git a/replica/database.cc b/replica/database.cc
@@ -1692,7 +1692,7 @@ database::query(schema_ptr s, const query::read_command& cmd, query::result_opti
     }
 
     auto& semaphore = get_reader_concurrency_semaphore();
-    auto max_result_size = cmd.max_result_size ? *cmd.max_result_size : get_unlimited_query_max_result_size();
+    auto max_result_size = cmd.max_result_size ? *cmd.max_result_size : get_query_max_result_size();
 
     std::optional<query::querier> querier_opt;
     lw_shared_ptr<query::result> result;
@@ -1757,7 +1757,7 @@ database::query_mutations(schema_ptr s, const query::read_command& cmd, const dh
 
     const auto short_read_allwoed = query::short_read(cmd.slice.options.contains<query::partition_slice::option::allow_short_read>());
     auto& semaphore = get_reader_concurrency_semaphore();
-    auto max_result_size = cmd.max_result_size ? *cmd.max_result_size : get_unlimited_query_max_result_size();
+    auto max_result_size = cmd.max_result_size ? *cmd.max_result_size : get_query_max_result_size();
     auto accounter = co_await get_result_memory_limiter().new_mutation_read(max_result_size, short_read_allwoed);
     column_family& cf = find_column_family(cmd.cf_id);
 
@@ -1815,13 +1815,15 @@ database::query_mutations(schema_ptr s, const query::read_command& cmd, const dh
     co_return std::tuple(std::move(result), hit_rate);
 }
 
-query::max_result_size database::get_unlimited_query_max_result_size() const {
+query::max_result_size database::get_query_max_result_size() const {
     switch (classify_request(_dbcfg)) {
         case request_class::user:
-            return query::max_result_size(_cfg.max_memory_for_unlimited_query_soft_limit(), _cfg.max_memory_for_unlimited_query_hard_limit());
+            return query::max_result_size(_cfg.max_memory_for_unlimited_query_soft_limit(), _cfg.max_memory_for_unlimited_query_hard_limit(),
+                    _cfg.query_page_size_in_bytes());
         case request_class::system: [[fallthrough]];
         case request_class::maintenance:
-            return query::max_result_size(query::result_memory_limiter::unlimited_result_size);
+            return query::max_result_size(query::result_memory_limiter::unlimited_result_size, query::result_memory_limiter::unlimited_result_size,
+                    query::result_memory_limiter::maximum_result_size);
     }
     std::abort();
 }
@@ -3141,8 +3143,7 @@ future<foreign_ptr<lw_shared_ptr<reconcilable_result>>> query_mutations(
         const dht::partition_range& pr,
         const query::partition_slice& ps,
         db::timeout_clock::time_point timeout) {
-    auto max_size = db.local().get_unlimited_query_max_result_size();
-    auto max_res_size = query::max_result_size(max_size.soft_limit, max_size.hard_limit, query::result_memory_limiter::maximum_result_size);
+    auto max_res_size = db.local().get_query_max_result_size();
     auto cmd = query::read_command(s->id(), s->version(), ps, max_res_size, query::tombstone_limit::max);
     auto erm = s->table().get_effective_replication_map();
     if (auto shard_opt = dht::is_single_shard(erm->get_sharder(*s), *s, pr)) {
@@ -3165,8 +3166,7 @@ future<foreign_ptr<lw_shared_ptr<query::result>>> query_data(
         const dht::partition_range& pr,
         const query::partition_slice& ps,
         db::timeout_clock::time_point timeout) {
-    auto max_size = db.local().get_unlimited_query_max_result_size();
-    auto max_res_size = query::max_result_size(max_size.soft_limit, max_size.hard_limit, query::result_memory_limiter::maximum_result_size);
+    auto max_res_size = db.local().get_query_max_result_size();
     auto cmd = query::read_command(s->id(), s->version(), ps, max_res_size, query::tombstone_limit::max);
     auto prs = dht::partition_range_vector{pr};
     auto opts = query::result_options::only_result();

diff --git a/replica/database.hh b/replica/database.hh
@@ -1795,9 +1795,9 @@ public:
         return *_data_listeners;
     }
 
-    // Get the maximum result size for an unlimited query, appropriate for the
+    // Get the maximum result size for a query, appropriate for the
     // query class, which is deduced from the current scheduling group.
-    query::max_result_size get_unlimited_query_max_result_size() const;
+    query::max_result_size get_query_max_result_size() const;
 
     // Get the reader concurrency semaphore, appropriate for the query class,
     // which is deduced from the current scheduling group.

diff --git a/replica/mutation_dump.cc b/replica/mutation_dump.cc
@@ -576,7 +576,7 @@ future<foreign_ptr<lw_shared_ptr<query::result>>> dump_mutations(
     }
 
     auto permit = co_await db.local().obtain_reader_permit(underlying_schema, "mutation-dump", timeout, ts);
-    auto max_result_size = cmd.max_result_size ? *cmd.max_result_size : db.local().get_unlimited_query_max_result_size();
+    auto max_result_size = cmd.max_result_size ? *cmd.max_result_size : db.local().get_query_max_result_size();
     permit.set_max_result_size(max_result_size);
 
     const auto opts = query::result_options::only_result();

diff --git a/service/storage_proxy.cc b/service/storage_proxy.cc
@@ -2304,14 +2304,13 @@ endpoints_to_replica_ids(const locator::token_metadata& tm, const inet_address_v
 
 query::max_result_size storage_proxy::get_max_result_size(const query::partition_slice& slice) const {
     if (_features.separate_page_size_and_safety_limit) {
-        auto max_size = _db.local().get_unlimited_query_max_result_size();
-        return query::max_result_size(max_size.soft_limit, max_size.hard_limit, query::result_memory_limiter::maximum_result_size);
+        return _db.local().get_query_max_result_size();
     }
     // FIXME: Remove the code below once SEPARATE_PAGE_SIZE_AND_SAFETY_LIMIT
     //        cluster feature is released for more than 2 years and can be
     //        retired.
     if (!slice.options.contains<query::partition_slice::option::allow_short_read>() || slice.options.contains<query::partition_slice::option::reversed>()) {
-        return _db.local().get_unlimited_query_max_result_size();
+        return _db.local().get_query_max_result_size().without_page_limit();
     } else {
         return query::max_result_size(query::result_memory_limiter::maximum_result_size);
     }

diff --git a/test/boost/database_test.cc b/test/boost/database_test.cc
@@ -1092,7 +1092,7 @@ SEASTAR_THREAD_TEST_CASE(reader_concurrency_semaphore_selection_test) {
     }, std::move(cfg)).get();
 }
 
-SEASTAR_THREAD_TEST_CASE(max_result_size_for_unlimited_query_selection_test) {
+SEASTAR_THREAD_TEST_CASE(max_result_size_for_query_selection_test) {
     cql_test_config cfg;
 
     cfg.db_config->max_memory_for_unlimited_query_soft_limit(1 * 1024 * 1024, utils::config_file::config_source::CommandLine);
@@ -1103,9 +1103,14 @@ SEASTAR_THREAD_TEST_CASE(max_result_size_for_unlimited_query_selection_test) {
         destroy_scheduling_group(unknown_scheduling_group).get();
     });
 
-    const auto user_max_result_size = query::max_result_size(cfg.db_config->max_memory_for_unlimited_query_soft_limit(),
-            cfg.db_config->max_memory_for_unlimited_query_hard_limit());
-    const auto system_max_result_size = query::max_result_size(query::result_memory_limiter::unlimited_result_size);
+    const auto user_max_result_size = query::max_result_size(
+            cfg.db_config->max_memory_for_unlimited_query_soft_limit(),
+            cfg.db_config->max_memory_for_unlimited_query_hard_limit(),
+            query::result_memory_limiter::maximum_result_size);
+    const auto system_max_result_size = query::max_result_size(
+            query::result_memory_limiter::unlimited_result_size,
+            query::result_memory_limiter::unlimited_result_size,
+            query::result_memory_limiter::maximum_result_size);
     const auto maintenance_max_result_size = system_max_result_size;
 
     std::vector<std::pair<scheduling_group, query::max_result_size>> scheduling_group_and_expected_max_result_size{
@@ -1128,7 +1133,7 @@ SEASTAR_THREAD_TEST_CASE(max_result_size_for_unlimited_query_selection_test) {
         database_test tdb(db);
         for (const auto& [sched_group, expected_max_size] : scheduling_group_and_expected_max_result_size) {
             with_scheduling_group(sched_group, [&db, sched_group = sched_group, expected_max_size = expected_max_size] {
-                const auto max_size = db.get_unlimited_query_max_result_size();
+                const auto max_size = db.get_query_max_result_size();
                 if (max_size != expected_max_size) {
                     BOOST_FAIL(fmt::format("Unexpected max_size for scheduling group {}, expected {{{}, {}}}, got {{{}, {}}}",
                                 sched_group.name(),

diff --git a/test/topology_custom/test_read_repair.py b/test/topology_custom/test_read_repair.py
@@ -212,7 +212,9 @@ async def test_incremental_read_repair(data_class, workdir, manager):
     seed = int(time.time())
     logger.info(f"random-seed: {seed}")
     random.seed(seed)
-    cmdline = ["--hinted-handoff-enabled", "0", "--query-tombstone-page-limit", "1000"]
+    cmdline = ["--hinted-handoff-enabled", "0",
+               "--query-tombstone-page-limit", "10",
+               "--query-page-size-in-bytes", "1024"]
     node1 = await manager.server_add(cmdline=cmdline)
     node2 = await manager.server_add(cmdline=cmdline)
 
@@ -231,8 +233,8 @@ async def test_incremental_read_repair(data_class, workdir, manager):
     dead_timestamp = int(time.time() * 1000)
     live_timestamp = dead_timestamp + 1
 
-    total_rows = 20000
-    max_live_rows = 32
+    total_rows = 100
+    max_live_rows = 8
     deletion_time = datetime.datetime.now()
 
     row_set: TypeAlias = set[int]