From 5b167a4ad7bf29aa06e30650f3d93698d37695d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patryk=20J=C4=99drzejczak?= Date: Fri, 14 Jul 2023 13:37:51 +0200 Subject: [PATCH 1/3] config: add schema_commitlog_segment_size_in_mb variable In #14668, we have decided to introduce a new scylla.yaml variable for the schema commitlog segment size. The segment size puts a limit on the mutation size that can be written at once, and some schema mutation writes are much larger than average, as shown in #13864. Therefore, increasing the schema commitlog segment size is sometimes necessary. --- conf/scylla.yaml | 7 +++++++ db/config.cc | 3 +++ db/config.hh | 1 + replica/database.cc | 2 +- 4 files changed, 12 insertions(+), 1 deletion(-) diff --git a/conf/scylla.yaml b/conf/scylla.yaml index 1c3afc7cc35f..c1a376e21b71 100644 --- a/conf/scylla.yaml +++ b/conf/scylla.yaml @@ -71,6 +71,13 @@ commitlog_sync_period_in_ms: 10000 # is reasonable. commitlog_segment_size_in_mb: 32 +# The size of the individual schema commitlog file segments. + +# The segment size puts a limit on the mutation size that can be +# written at once, and some schema mutation writes are much larger +# than average. +schema_commitlog_segment_size_in_mb: 32 + # seed_provider class_name is saved for future use. # A seed address is mandatory. seed_provider: diff --git a/db/config.cc b/db/config.cc index 9783722d2747..1feae88ac2ae 100644 --- a/db/config.cc +++ b/db/config.cc @@ -469,6 +469,9 @@ db::config::config(std::shared_ptr exts) , commitlog_segment_size_in_mb(this, "commitlog_segment_size_in_mb", value_status::Used, 64, "Sets the size of the individual commitlog file segments. A commitlog segment may be archived, deleted, or recycled after all its data has been flushed to SSTables. This amount of data can potentially include commitlog segments from every table in the system. The default size is usually suitable for most commitlog archiving, but if you want a finer granularity, 8 or 16 MB is reasonable. See Commit log archive configuration.\n" "Related information: Commit log archive configuration") + , schema_commitlog_segment_size_in_mb(this, "schema_commitlog_segment_size_in_mb", value_status::Used, 32, + "Sets the size of the individual schema commitlog file segments. The segment size puts a limit on the mutation size that can be written at once, and some schema mutation writes are much larger than average.\n" + "Related information: Commit log archive configuration") /* Note: does not exist on the listing page other than in above comment, wtf? */ , commitlog_sync_period_in_ms(this, "commitlog_sync_period_in_ms", value_status::Used, 10000, "Controls how long the system waits for other writes before performing a sync in \"periodic\" mode.") diff --git a/db/config.hh b/db/config.hh index ba07a34a5341..aac75a71d6f8 100644 --- a/db/config.hh +++ b/db/config.hh @@ -199,6 +199,7 @@ public: named_value failure_detector_timeout_in_ms; named_value commitlog_sync; named_value commitlog_segment_size_in_mb; + named_value schema_commitlog_segment_size_in_mb; named_value commitlog_sync_period_in_ms; named_value commitlog_sync_batch_window_in_ms; named_value commitlog_total_space_in_mb; diff --git a/replica/database.cc b/replica/database.cc index db8e3295045b..046e4570f47a 100644 --- a/replica/database.cc +++ b/replica/database.cc @@ -951,7 +951,7 @@ void database::maybe_init_schema_commitlog() { c.fname_prefix = db::schema_tables::COMMITLOG_FILENAME_PREFIX; c.metrics_category_name = "schema-commitlog"; c.commitlog_total_space_in_mb = 10 << 20; - c.commitlog_segment_size_in_mb = _cfg.commitlog_segment_size_in_mb(); + c.commitlog_segment_size_in_mb = _cfg.schema_commitlog_segment_size_in_mb(); c.commitlog_sync_period_in_ms = _cfg.commitlog_sync_period_in_ms(); c.mode = db::commitlog::sync_mode::BATCH; c.extensions = &_cfg.extensions(); From b3be9617dc31221ceb47d4ea9ecbcd7577f9f13b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patryk=20J=C4=99drzejczak?= Date: Mon, 17 Jul 2023 14:13:03 +0200 Subject: [PATCH 2/3] config: set schema_commitlog_segment_size_in_mb to 128 We increase the default schema commitlog segment size so that the large mutations do not fail. We have agreed that 128 MB is sufficient. --- conf/scylla.yaml | 11 ++++++----- db/config.cc | 4 ++-- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/conf/scylla.yaml b/conf/scylla.yaml index c1a376e21b71..4b16b211488d 100644 --- a/conf/scylla.yaml +++ b/conf/scylla.yaml @@ -72,11 +72,12 @@ commitlog_sync_period_in_ms: 10000 commitlog_segment_size_in_mb: 32 # The size of the individual schema commitlog file segments. - -# The segment size puts a limit on the mutation size that can be -# written at once, and some schema mutation writes are much larger -# than average. -schema_commitlog_segment_size_in_mb: 32 +# +# The default size is 128, which is 4 times larger than the default +# size of the data commitlog. It's because the segment size puts +# a limit on the mutation size that can be written at once, and some +# schema mutation writes are much larger than average. +schema_commitlog_segment_size_in_mb: 128 # seed_provider class_name is saved for future use. # A seed address is mandatory. diff --git a/db/config.cc b/db/config.cc index 1feae88ac2ae..e19056c9ecd5 100644 --- a/db/config.cc +++ b/db/config.cc @@ -469,8 +469,8 @@ db::config::config(std::shared_ptr exts) , commitlog_segment_size_in_mb(this, "commitlog_segment_size_in_mb", value_status::Used, 64, "Sets the size of the individual commitlog file segments. A commitlog segment may be archived, deleted, or recycled after all its data has been flushed to SSTables. This amount of data can potentially include commitlog segments from every table in the system. The default size is usually suitable for most commitlog archiving, but if you want a finer granularity, 8 or 16 MB is reasonable. See Commit log archive configuration.\n" "Related information: Commit log archive configuration") - , schema_commitlog_segment_size_in_mb(this, "schema_commitlog_segment_size_in_mb", value_status::Used, 32, - "Sets the size of the individual schema commitlog file segments. The segment size puts a limit on the mutation size that can be written at once, and some schema mutation writes are much larger than average.\n" + , schema_commitlog_segment_size_in_mb(this, "schema_commitlog_segment_size_in_mb", value_status::Used, 128, + "Sets the size of the individual schema commitlog file segments. The default size is larger than the default size of the data commitlog because the segment size puts a limit on the mutation size that can be written at once, and some schema mutation writes are much larger than average.\n" "Related information: Commit log archive configuration") /* Note: does not exist on the listing page other than in above comment, wtf? */ , commitlog_sync_period_in_ms(this, "commitlog_sync_period_in_ms", value_status::Used, 10000, From ee1c240f2afa57dfa17cbad7be8a5b47a8425155 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patryk=20J=C4=99drzejczak?= Date: Fri, 14 Jul 2023 15:35:17 +0200 Subject: [PATCH 3/3] replica: do not derive the commitlog sync period for schema commitlog We don't want to apply the value of the commitlog_sync_period_in_ms variable to schema commitlog. Schema commitlog runs in batch mode, so it doesn't need this parameter. --- replica/database.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/replica/database.cc b/replica/database.cc index 046e4570f47a..eb4356347b91 100644 --- a/replica/database.cc +++ b/replica/database.cc @@ -952,7 +952,6 @@ void database::maybe_init_schema_commitlog() { c.metrics_category_name = "schema-commitlog"; c.commitlog_total_space_in_mb = 10 << 20; c.commitlog_segment_size_in_mb = _cfg.schema_commitlog_segment_size_in_mb(); - c.commitlog_sync_period_in_ms = _cfg.commitlog_sync_period_in_ms(); c.mode = db::commitlog::sync_mode::BATCH; c.extensions = &_cfg.extensions(); c.use_o_dsync = _cfg.commitlog_use_o_dsync();