Skip to content

Commit

Permalink
Add sm.consolidation.total_buffer_size setting (TileDB-Inc#4550)
Browse files Browse the repository at this point in the history
This adds a new configuration parameter for directly setting the total
buffer size to use during fragment consolidation. This deprecates the
old sm.consolidation.buffer_size value. We will continue to use the old
value if set by users for backwards compatibility. If not set, the new
setting is used.

---
TYPE: FEATURE
DESC: Add sm.consolidation.total_buffer_size setting
  • Loading branch information
davisp committed Dec 5, 2023
1 parent 278c427 commit fe03460
Show file tree
Hide file tree
Showing 7 changed files with 46 additions and 2 deletions.
2 changes: 2 additions & 0 deletions test/src/unit-capi-config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,7 @@ void check_save_to_file() {
ss << "sm.consolidation.steps 4294967295\n";
ss << "sm.consolidation.timestamp_end " << std::to_string(UINT64_MAX) << "\n";
ss << "sm.consolidation.timestamp_start 0\n";
ss << "sm.consolidation.total_buffer_size 2147483648\n";
ss << "sm.dedup_coords false\n";
ss << "sm.enable_signal_handlers true\n";
ss << "sm.encryption_type NO_ENCRYPTION\n";
Expand Down Expand Up @@ -661,6 +662,7 @@ TEST_CASE("C API: Test config iter", "[capi][config]") {
all_param_values["sm.consolidation.step_min_frags"] = "4294967295";
all_param_values["sm.consolidation.step_max_frags"] = "4294967295";
all_param_values["sm.consolidation.buffer_size"] = "50000000";
all_param_values["sm.consolidation.total_buffer_size"] = "2147483648";
all_param_values["sm.consolidation.max_fragment_size"] =
std::to_string(UINT64_MAX);
all_param_values["sm.consolidation.step_size_ratio"] = "0.0";
Expand Down
5 changes: 5 additions & 0 deletions tiledb/api/c_api/config/config_api_external.h
Original file line number Diff line number Diff line change
Expand Up @@ -164,9 +164,14 @@ TILEDB_EXPORT void tiledb_config_free(tiledb_config_t** config) TILEDB_NOEXCEPT;
* (since the resulting fragment is dense). <br>
* **Default**: 1.0
* - `sm.consolidation.buffer_size` <br>
* Deprecated. Prefer `sm.consolidation.total_buffer_size` instead.
* The size (in bytes) of the attribute buffers used during
* consolidation. <br>
* **Default**: 50,000,000
* - `sm.consolidation.total_buffer_size` <br>
* The size (in bytes) of all attribute buffers used during
* consolidation. <br>
* **Default**: 2,147,483,648
* - `sm.consolidation.max_fragment_size` <br>
* **Experimental** <br>
* The size (in bytes) of the maximum on-disk fragment size that will be
Expand Down
6 changes: 6 additions & 0 deletions tiledb/sm/config/config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ const std::string Config::SM_IO_CONCURRENCY_LEVEL =
const std::string Config::SM_SKIP_CHECKSUM_VALIDATION = "false";
const std::string Config::SM_CONSOLIDATION_AMPLIFICATION = "1.0";
const std::string Config::SM_CONSOLIDATION_BUFFER_SIZE = "50000000";
const std::string Config::SM_CONSOLIDATION_TOTAL_BUFFER_SIZE = "2147483648";
const std::string Config::SM_CONSOLIDATION_MAX_FRAGMENT_SIZE =
std::to_string(UINT64_MAX);
const std::string Config::SM_CONSOLIDATION_PURGE_DELETED_CELLS = "false";
Expand Down Expand Up @@ -332,6 +333,9 @@ const std::map<std::string, std::string> default_config_values = {
Config::SM_CONSOLIDATION_AMPLIFICATION),
std::make_pair(
"sm.consolidation.buffer_size", Config::SM_CONSOLIDATION_BUFFER_SIZE),
std::make_pair(
"sm.consolidation.total_buffer_size",
Config::SM_CONSOLIDATION_TOTAL_BUFFER_SIZE),
std::make_pair(
"sm.consolidation.max_fragment_size",
Config::SM_CONSOLIDATION_MAX_FRAGMENT_SIZE),
Expand Down Expand Up @@ -741,6 +745,8 @@ Status Config::sanity_check(
RETURN_NOT_OK(utils::parse::convert(value, &vf));
} else if (param == "sm.consolidation.buffer_size") {
RETURN_NOT_OK(utils::parse::convert(value, &vuint64));
} else if (param == "sm.consolidation.total_buffer_size") {
RETURN_NOT_OK(utils::parse::convert(value, &vuint64));
} else if (param == "sm.consolidation.max_fragment_size") {
RETURN_NOT_OK(utils::parse::convert(value, &vuint64));
} else if (param == "sm.consolidation.purge_deleted_cells") {
Expand Down
3 changes: 3 additions & 0 deletions tiledb/sm/config/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,9 @@ class Config {
/** The buffer size for each attribute used in consolidation. */
static const std::string SM_CONSOLIDATION_BUFFER_SIZE;

/** The total buffer size for all attributes during consolidation. */
static const std::string SM_CONSOLIDATION_TOTAL_BUFFER_SIZE;

/** The maximum fragment size used in consolidation. */
static const std::string SM_CONSOLIDATION_MAX_FRAGMENT_SIZE;

Expand Down
25 changes: 23 additions & 2 deletions tiledb/sm/consolidator/fragment_consolidator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -593,7 +593,14 @@ FragmentConsolidator::create_buffers(

// Use the old buffer size setting to see how much memory we would use.
auto buffer_num = buffer_weights.size();
uint64_t total_budget = config.buffer_size_ * buffer_num;
uint64_t total_budget = config.total_buffer_size_;

// If a user set the per-attribute buffer size configuration, we override
// the use of the total_budget_size config setting for backwards
// compatible behavior.
if (config.buffer_size_ != 0) {
total_budget = config.buffer_size_ * buffer_num;
}

// Create buffers.
std::vector<ByteVec> buffers(buffer_num);
Expand Down Expand Up @@ -896,8 +903,22 @@ Status FragmentConsolidator::set_config(const Config& config) {
"sm.consolidation.steps", &config_.steps_, &found));
assert(found);
config_.buffer_size_ = 0;
// Only set the buffer_size_ if the user specified a value. Otherwise, we use
// the new sm.consolidation.total_buffer_size instead.
if (merged_config.set_params().count("sm.consolidation.buffer_size") > 0) {
logger_->warn(
"The `sm.consolidation.buffer_size configuration setting has been "
"deprecated. Set consolidation buffer sizes using the newer "
"`sm.consolidation.total_buffer_size` setting.");
RETURN_NOT_OK(merged_config.get<uint64_t>(
"sm.consolidation.buffer_size", &config_.buffer_size_, &found));
assert(found);
}
config_.total_buffer_size_ = 0;
RETURN_NOT_OK(merged_config.get<uint64_t>(
"sm.consolidation.buffer_size", &config_.buffer_size_, &found));
"sm.consolidation.total_buffer_size",
&config_.total_buffer_size_,
&found));
assert(found);
config_.max_fragment_size_ = 0;
RETURN_NOT_OK(merged_config.get<uint64_t>(
Expand Down
2 changes: 2 additions & 0 deletions tiledb/sm/consolidator/fragment_consolidator.h
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,8 @@ class FragmentConsolidator : public Consolidator {
float amplification_;
/** Attribute buffer size. */
uint64_t buffer_size_;
/** Total buffer size for all attributes. */
uint64_t total_buffer_size_;
/** Max fragment size. */
uint64_t max_fragment_size_;
/**
Expand Down
5 changes: 5 additions & 0 deletions tiledb/sm/cpp_api/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -340,9 +340,14 @@ class Config {
* (since the resulting fragments is dense). <br>
* **Default**: 1.0
* - `sm.consolidation.buffer_size` <br>
* Deprecated. Prefer `sm.consolidation.total_buffer_size` instead.
* The size (in bytes) of the attribute buffers used during
* consolidation. <br>
* **Default**: 50,000,000
* - `sm.consolidation.total_buffer_size` <br>
* The size (in bytes) of all attribute buffers used during
* consolidation. <br>
* **Default**: 2,147,483,648
* - `sm.consolidation.max_fragment_size` <br>
* **Experimental** <br>
* The size (in bytes) of the maximum on-disk fragment size that will be
Expand Down

0 comments on commit fe03460

Please sign in to comment.