Skip to content

Commit

Permalink
Merge ' test/topology_custom: test_read_repair.py: reduce run-time ' …
Browse files Browse the repository at this point in the history
…from Botond Dénes

This test needed a lot of data to ensure multiple pages when doing the read repair. This change two key configuration items, allowing for a drastic reduction of the data size and consequently a large reduction in run-time.
* Changes query-tombstone-page-limit 1000 -> 10. Before f068d1a,  reducing this to a too small value would start killing internal queries. Now, after said commit, this is no longer a concern, as this limit no longer affects unpaged queries.
* Sets (the new) query-page-size-in-bytes 1MB (default) -> 1KB.

The latter configuration is a new one, added by the first patches of this series. It allows configuring the page-size in bytes, after which pages are cut. Previously this was a hard-coded constant: 1MB. This forced any tests which wanted to check paging, with pages cut on size, to work with large datasets. This was especially pronounced in the tests fixed in this PR, because this test works with tombstones which are tiny and a lot of them were needed to trigger paging based on the size.

With this two changes, we can reduce the data size:
* total_rows: 20000 -> 100
* max_live_rows: 32 -> 8

The runtime of the test consequently drops from 62 seconds to 13.5 seconds (dev mode, on my build machine).

Fixes: #15425
Fixes: #16899

Closes #17529

* github.com:scylladb/scylladb:
  test/topology_custom: test_read_repair.py: reduce run-time
  replica/database: get_query_max_result_size(): use query_page_size_in_bytes
  replica/database: use include page-size in max-result-size
  query-request: max_result_size: add without_page_limit()
  db/config: introduce query_page_size_in_bytes

(cherry picked from commit 616eec2)
  • Loading branch information
avikivity authored and denesb committed Feb 28, 2024
1 parent 6a6450a commit 58a1be9
Show file tree
Hide file tree
Showing 10 changed files with 37 additions and 24 deletions.
3 changes: 3 additions & 0 deletions db/config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -678,6 +678,9 @@ db::config::config(std::shared_ptr<db::extensions> exts)
"The maximum number of tombstones a query can scan before aborting.")
, query_tombstone_page_limit(this, "query_tombstone_page_limit", liveness::LiveUpdate, value_status::Used, 10000,
"The number of tombstones after which a query cuts a page, even if not full or even empty.")
, query_page_size_in_bytes(this, "query_page_size_in_bytes", liveness::LiveUpdate, value_status::Used, 1 << 20,
"The size of pages in bytes, after a page accumulates this much data, the page is cut and sent to the client."
" Setting a too large value increases the risk of OOM.")
/**
* @Group Network timeout settings
*/
Expand Down
1 change: 1 addition & 0 deletions db/config.hh
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,7 @@ public:
named_value<uint32_t> tombstone_warn_threshold;
named_value<uint32_t> tombstone_failure_threshold;
named_value<uint64_t> query_tombstone_page_limit;
named_value<uint64_t> query_page_size_in_bytes;
named_value<uint32_t> range_request_timeout_in_ms;
named_value<uint32_t> read_request_timeout_in_ms;
named_value<uint32_t> counter_write_request_timeout_in_ms;
Expand Down
2 changes: 1 addition & 1 deletion multishard_mutation_query.cc
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ class read_context : public reader_lifecycle_policy_v2 {
}

query::max_result_size get_max_result_size() {
return _cmd.max_result_size ? *_cmd.max_result_size : _db.local().get_unlimited_query_max_result_size();
return _cmd.max_result_size ? *_cmd.max_result_size : _db.local().get_query_max_result_size();
}

virtual flat_mutation_reader_v2 create_reader(
Expand Down
3 changes: 3 additions & 0 deletions query-request.hh
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,9 @@ public:
uint64_t get_page_size() const {
return page_size == 0 ? hard_limit : page_size;
}
max_result_size without_page_limit() const {
return max_result_size(soft_limit, hard_limit, 0);
}
friend bool operator==(const max_result_size&, const max_result_size&);
friend class ser::serializer<query::max_result_size>;
};
Expand Down
18 changes: 9 additions & 9 deletions replica/database.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1692,7 +1692,7 @@ database::query(schema_ptr s, const query::read_command& cmd, query::result_opti
}

auto& semaphore = get_reader_concurrency_semaphore();
auto max_result_size = cmd.max_result_size ? *cmd.max_result_size : get_unlimited_query_max_result_size();
auto max_result_size = cmd.max_result_size ? *cmd.max_result_size : get_query_max_result_size();

std::optional<query::querier> querier_opt;
lw_shared_ptr<query::result> result;
Expand Down Expand Up @@ -1757,7 +1757,7 @@ database::query_mutations(schema_ptr s, const query::read_command& cmd, const dh

const auto short_read_allwoed = query::short_read(cmd.slice.options.contains<query::partition_slice::option::allow_short_read>());
auto& semaphore = get_reader_concurrency_semaphore();
auto max_result_size = cmd.max_result_size ? *cmd.max_result_size : get_unlimited_query_max_result_size();
auto max_result_size = cmd.max_result_size ? *cmd.max_result_size : get_query_max_result_size();
auto accounter = co_await get_result_memory_limiter().new_mutation_read(max_result_size, short_read_allwoed);
column_family& cf = find_column_family(cmd.cf_id);

Expand Down Expand Up @@ -1815,13 +1815,15 @@ database::query_mutations(schema_ptr s, const query::read_command& cmd, const dh
co_return std::tuple(std::move(result), hit_rate);
}

query::max_result_size database::get_unlimited_query_max_result_size() const {
query::max_result_size database::get_query_max_result_size() const {
switch (classify_request(_dbcfg)) {
case request_class::user:
return query::max_result_size(_cfg.max_memory_for_unlimited_query_soft_limit(), _cfg.max_memory_for_unlimited_query_hard_limit());
return query::max_result_size(_cfg.max_memory_for_unlimited_query_soft_limit(), _cfg.max_memory_for_unlimited_query_hard_limit(),
_cfg.query_page_size_in_bytes());
case request_class::system: [[fallthrough]];
case request_class::maintenance:
return query::max_result_size(query::result_memory_limiter::unlimited_result_size);
return query::max_result_size(query::result_memory_limiter::unlimited_result_size, query::result_memory_limiter::unlimited_result_size,
query::result_memory_limiter::maximum_result_size);
}
std::abort();
}
Expand Down Expand Up @@ -3141,8 +3143,7 @@ future<foreign_ptr<lw_shared_ptr<reconcilable_result>>> query_mutations(
const dht::partition_range& pr,
const query::partition_slice& ps,
db::timeout_clock::time_point timeout) {
auto max_size = db.local().get_unlimited_query_max_result_size();
auto max_res_size = query::max_result_size(max_size.soft_limit, max_size.hard_limit, query::result_memory_limiter::maximum_result_size);
auto max_res_size = db.local().get_query_max_result_size();
auto cmd = query::read_command(s->id(), s->version(), ps, max_res_size, query::tombstone_limit::max);
auto erm = s->table().get_effective_replication_map();
if (auto shard_opt = dht::is_single_shard(erm->get_sharder(*s), *s, pr)) {
Expand All @@ -3165,8 +3166,7 @@ future<foreign_ptr<lw_shared_ptr<query::result>>> query_data(
const dht::partition_range& pr,
const query::partition_slice& ps,
db::timeout_clock::time_point timeout) {
auto max_size = db.local().get_unlimited_query_max_result_size();
auto max_res_size = query::max_result_size(max_size.soft_limit, max_size.hard_limit, query::result_memory_limiter::maximum_result_size);
auto max_res_size = db.local().get_query_max_result_size();
auto cmd = query::read_command(s->id(), s->version(), ps, max_res_size, query::tombstone_limit::max);
auto prs = dht::partition_range_vector{pr};
auto opts = query::result_options::only_result();
Expand Down
4 changes: 2 additions & 2 deletions replica/database.hh
Original file line number Diff line number Diff line change
Expand Up @@ -1795,9 +1795,9 @@ public:
return *_data_listeners;
}

// Get the maximum result size for an unlimited query, appropriate for the
// Get the maximum result size for a query, appropriate for the
// query class, which is deduced from the current scheduling group.
query::max_result_size get_unlimited_query_max_result_size() const;
query::max_result_size get_query_max_result_size() const;

// Get the reader concurrency semaphore, appropriate for the query class,
// which is deduced from the current scheduling group.
Expand Down
2 changes: 1 addition & 1 deletion replica/mutation_dump.cc
Original file line number Diff line number Diff line change
Expand Up @@ -576,7 +576,7 @@ future<foreign_ptr<lw_shared_ptr<query::result>>> dump_mutations(
}

auto permit = co_await db.local().obtain_reader_permit(underlying_schema, "mutation-dump", timeout, ts);
auto max_result_size = cmd.max_result_size ? *cmd.max_result_size : db.local().get_unlimited_query_max_result_size();
auto max_result_size = cmd.max_result_size ? *cmd.max_result_size : db.local().get_query_max_result_size();
permit.set_max_result_size(max_result_size);

const auto opts = query::result_options::only_result();
Expand Down
5 changes: 2 additions & 3 deletions service/storage_proxy.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2304,14 +2304,13 @@ endpoints_to_replica_ids(const locator::token_metadata& tm, const inet_address_v

query::max_result_size storage_proxy::get_max_result_size(const query::partition_slice& slice) const {
if (_features.separate_page_size_and_safety_limit) {
auto max_size = _db.local().get_unlimited_query_max_result_size();
return query::max_result_size(max_size.soft_limit, max_size.hard_limit, query::result_memory_limiter::maximum_result_size);
return _db.local().get_query_max_result_size();
}
// FIXME: Remove the code below once SEPARATE_PAGE_SIZE_AND_SAFETY_LIMIT
// cluster feature is released for more than 2 years and can be
// retired.
if (!slice.options.contains<query::partition_slice::option::allow_short_read>() || slice.options.contains<query::partition_slice::option::reversed>()) {
return _db.local().get_unlimited_query_max_result_size();
return _db.local().get_query_max_result_size().without_page_limit();
} else {
return query::max_result_size(query::result_memory_limiter::maximum_result_size);
}
Expand Down
15 changes: 10 additions & 5 deletions test/boost/database_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1092,7 +1092,7 @@ SEASTAR_THREAD_TEST_CASE(reader_concurrency_semaphore_selection_test) {
}, std::move(cfg)).get();
}

SEASTAR_THREAD_TEST_CASE(max_result_size_for_unlimited_query_selection_test) {
SEASTAR_THREAD_TEST_CASE(max_result_size_for_query_selection_test) {
cql_test_config cfg;

cfg.db_config->max_memory_for_unlimited_query_soft_limit(1 * 1024 * 1024, utils::config_file::config_source::CommandLine);
Expand All @@ -1103,9 +1103,14 @@ SEASTAR_THREAD_TEST_CASE(max_result_size_for_unlimited_query_selection_test) {
destroy_scheduling_group(unknown_scheduling_group).get();
});

const auto user_max_result_size = query::max_result_size(cfg.db_config->max_memory_for_unlimited_query_soft_limit(),
cfg.db_config->max_memory_for_unlimited_query_hard_limit());
const auto system_max_result_size = query::max_result_size(query::result_memory_limiter::unlimited_result_size);
const auto user_max_result_size = query::max_result_size(
cfg.db_config->max_memory_for_unlimited_query_soft_limit(),
cfg.db_config->max_memory_for_unlimited_query_hard_limit(),
query::result_memory_limiter::maximum_result_size);
const auto system_max_result_size = query::max_result_size(
query::result_memory_limiter::unlimited_result_size,
query::result_memory_limiter::unlimited_result_size,
query::result_memory_limiter::maximum_result_size);
const auto maintenance_max_result_size = system_max_result_size;

std::vector<std::pair<scheduling_group, query::max_result_size>> scheduling_group_and_expected_max_result_size{
Expand All @@ -1128,7 +1133,7 @@ SEASTAR_THREAD_TEST_CASE(max_result_size_for_unlimited_query_selection_test) {
database_test tdb(db);
for (const auto& [sched_group, expected_max_size] : scheduling_group_and_expected_max_result_size) {
with_scheduling_group(sched_group, [&db, sched_group = sched_group, expected_max_size = expected_max_size] {
const auto max_size = db.get_unlimited_query_max_result_size();
const auto max_size = db.get_query_max_result_size();
if (max_size != expected_max_size) {
BOOST_FAIL(fmt::format("Unexpected max_size for scheduling group {}, expected {{{}, {}}}, got {{{}, {}}}",
sched_group.name(),
Expand Down
8 changes: 5 additions & 3 deletions test/topology_custom/test_read_repair.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,9 @@ async def test_incremental_read_repair(data_class, workdir, manager):
seed = int(time.time())
logger.info(f"random-seed: {seed}")
random.seed(seed)
cmdline = ["--hinted-handoff-enabled", "0", "--query-tombstone-page-limit", "1000"]
cmdline = ["--hinted-handoff-enabled", "0",
"--query-tombstone-page-limit", "10",
"--query-page-size-in-bytes", "1024"]
node1 = await manager.server_add(cmdline=cmdline)
node2 = await manager.server_add(cmdline=cmdline)

Expand All @@ -231,8 +233,8 @@ async def test_incremental_read_repair(data_class, workdir, manager):
dead_timestamp = int(time.time() * 1000)
live_timestamp = dead_timestamp + 1

total_rows = 20000
max_live_rows = 32
total_rows = 100
max_live_rows = 8
deletion_time = datetime.datetime.now()

row_set: TypeAlias = set[int]
Expand Down

0 comments on commit 58a1be9

Please sign in to comment.