Skip to content

Commit

Permalink
compaction: Fix key estimation per sstable to produce efficient filters
Browse files Browse the repository at this point in the history
The estimation assumes that size of other components are irrelevant,
when estimating the number of partitions for each output sstable.
The sstables are split according to the data file size, therefore
size of other files are irrelevant for the estimation.

With certain data models, like single-row partitions containing small
values, the index could be even larger than data.
For example, assume index is as large as data, then the estimation
would say that 2x more sstables will be generated, and as a result,
each sstable are underestimated to have 2x less keys.

Fix it by only accounting size of data file.

Fixes #15726.

Signed-off-by: Raphael S. Carvalho <raphaelsc@scylladb.com>

Closes #15727
  • Loading branch information
raphaelsc authored and denesb committed Oct 17, 2023
1 parent 0ce9db2 commit da04fea
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 1 deletion.
5 changes: 4 additions & 1 deletion compaction/compaction.cc
Expand Up @@ -439,6 +439,8 @@ class compaction {
const uint32_t _sstable_level;
uint64_t _start_size = 0;
uint64_t _end_size = 0;
// fully expired files, which are skipped, aren't taken into account.
uint64_t _compacting_data_file_size = 0;
uint64_t _estimated_partitions = 0;
uint64_t _bloom_filter_checks = 0;
db::replay_position _rp;
Expand Down Expand Up @@ -501,7 +503,7 @@ class compaction {
virtual uint64_t partitions_per_sstable() const {
// some tests use _max_sstable_size == 0 for force many one partition per sstable
auto max_sstable_size = std::max<uint64_t>(_max_sstable_size, 1);
uint64_t estimated_sstables = std::max(1UL, uint64_t(ceil(double(_start_size) / max_sstable_size)));
uint64_t estimated_sstables = std::max(1UL, uint64_t(ceil(double(_compacting_data_file_size) / max_sstable_size)));
return std::min(uint64_t(ceil(double(_estimated_partitions) / estimated_sstables)),
_table_s.get_compaction_strategy().adjust_partition_estimate(_ms_metadata, _estimated_partitions));
}
Expand Down Expand Up @@ -710,6 +712,7 @@ class compaction {
// for a better estimate for the number of partitions in the merged
// sstable than just adding up the lengths of individual sstables.
_estimated_partitions += sst->get_estimated_key_count();
_compacting_data_file_size += sst->ondisk_data_size();
// TODO:
// Note that this is not fully correct. Since we might be merging sstables that originated on
// another shard (#cpu changed), we might be comparing RP:s with differing shard ids,
Expand Down
51 changes: 51 additions & 0 deletions test/boost/sstable_compaction_test.cc
Expand Up @@ -5257,3 +5257,54 @@ SEASTAR_TEST_CASE(test_sstables_excluding_staging_correctness) {
}
});
}

// Reproducer for https://github.com/scylladb/scylladb/issues/15726.
SEASTAR_TEST_CASE(produces_optimal_filter_by_estimating_correctly_partitions_per_sstable) {
return test_env::do_with_async([] (test_env& env) {
auto builder = schema_builder("tests", "test")
.with_column("id", utf8_type, column_kind::partition_key)
.with_column("value", int32_type);
builder.set_compressor_params(compression_parameters::no_compression());
auto s = builder.build();
auto sst_gen = env.make_sst_factory(s);

auto compact = [&, s] (std::vector<shared_sstable> c, uint64_t max_size) -> compaction_result {
auto t = env.make_table_for_tests(s);
auto stop = deferred_stop(t);
t->disable_auto_compaction().get();
auto desc = sstables::compaction_descriptor(std::move(c));
desc.max_sstable_bytes = max_size;
return compact_sstables(std::move(desc), t, sst_gen).get0();
};

auto make_insert = [&] (partition_key key) {
mutation m(s, key);
m.set_clustered_cell(clustering_key::make_empty(), bytes("value"), data_value(int32_t(1)), api::new_timestamp());
return m;
};

const sstring shared_key_prefix = "832193982198319823hsdjahdashjdsa81923189381931829sdajidjkas812938219jdsalljdadsajk319820";

std::vector<mutation> muts;
constexpr int keys = 200;
muts.reserve(keys);
for (auto i = 0; i < keys; i++) {
muts.push_back(make_insert(partition_key::from_exploded(*s, {to_bytes(shared_key_prefix + to_sstring(i))})));
}
auto sst = make_sstable_containing(sst_gen, std::move(muts));

testlog.info("index size: {}, data_size: {}", sst->index_size(), sst->ondisk_data_size());

uint64_t max_sstable_size = std::ceil(double(sst->ondisk_data_size()) / 10);
auto ret = compact({sst}, max_sstable_size);

uint64_t partitions_per_sstable = keys / ret.new_sstables.size();
auto filter = utils::i_filter::get_filter(partitions_per_sstable, s->bloom_filter_fp_chance(), utils::filter_format::m_format);

auto comp = ret.new_sstables.front()->get_open_info().get0();

// Filter for SSTable generated cannot be lower than the one expected
testlog.info("filter size: actual={}, expected>={}", comp.components->filter->memory_size(), filter->memory_size());
BOOST_REQUIRE(comp.components->filter->memory_size() >= filter->memory_size());
});
}

0 comments on commit da04fea

Please sign in to comment.