Skip to content

Commit

Permalink
wal: fix wal_queue_max_size assignment during initial box.cfg
Browse files Browse the repository at this point in the history
wal_queue_max_size took effect only after the initial box.cfg call,
meaning that users with non-zero `replication_sync_timeout` still synced
using the default 16 Mb queue size. In some cases the default was too
big and the same issues described in #5536 arose.

Fix this.

Closes #10013

NO_DOC=bugfix

(cherry picked from commit ab0f791)
  • Loading branch information
sergepetrenko committed May 21, 2024
1 parent 77fb489 commit 359df4f
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 0 deletions.
4 changes: 4 additions & 0 deletions changelogs/unreleased/gh-10013-wal-queue-max-size.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
## bugfix/box

* Fixed the `wal_queue_max_size` configuration option not being applied during
the initial configuration (gh-10013).
2 changes: 2 additions & 0 deletions src/box/box.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4962,6 +4962,8 @@ box_cfg_xc(void)
box_set_replication_sync_timeout();
box_set_replication_skip_conflict();
box_set_replication_anon();
if (box_set_wal_queue_max_size() != 0)
diag_raise();
/*
* Must be set before opening the server port, because it may be
* requested by a client before the configuration is completed.
Expand Down
1 change: 1 addition & 0 deletions src/box/lua/load_cfg.lua
Original file line number Diff line number Diff line change
Expand Up @@ -651,6 +651,7 @@ local dynamic_cfg_skip_at_load = {
replication_anon = true,
bootstrap_strategy = true,
wal_dir_rescan_delay = true,
wal_queue_max_size = true,
custom_proc_title = true,
force_recovery = true,
instance_uuid = true,
Expand Down
75 changes: 75 additions & 0 deletions test/replication-luatest/gh_10013_wal_queue_max_size_test.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
local t = require('luatest')
local server = require('luatest.server')
local replica_set = require('luatest.replica_set')

local g = t.group()

g.before_each(function(cg)
t.tarantool.skip_if_not_debug()
cg.replica_set = replica_set:new{}
cg.master = cg.replica_set:build_and_add_server{
alias = 'master',
box_cfg = {
replication_timeout = 0.1,
},
}
cg.replica = cg.replica_set:build_and_add_server{
alias = 'replica',
box_cfg = {
replication = {
server.build_listen_uri('master', cg.replica_set.id),
},
replication_timeout = 0.1,
wal_queue_max_size = 1,
-- We want to check that correct wal_queue_max_size is in effect
-- during sync, so set huge sync timeout to make sure sync doesn't
-- end too fast and an old code path (setting size after sync) isn't
-- tested.
replication_sync_timeout = 300,
},
}
cg.replica_set:start()
cg.master:exec(function()
box.schema.space.create('test')
box.space.test:create_index('pk')
end)
cg.replica:wait_for_vclock_of(cg.master)
end)

g.after_each(function(cg)
cg.replica_set:drop()
end)

local run_before_cfg = [[
rawset(_G, 'wal_write_count', 0)
box.error.injection.set('ERRINJ_WAL_DELAY' , true)
wal_write_count = box.error.injection.get('ERRINJ_WAL_WRITE_COUNT')
]]

-- gh-10013: wal_queue_max_size wasn't respected during initial box.cfg() call,
-- and the replica used the default value (16 Mb) during sync. Test that this is
-- fixed: introduce a WAL delay before initial box.cfg() on replica, write some
-- data on master to be synced with, make sure replica respects queue max size.
g.test_wal_queue_max_size_apply_on_initial_sync = function(cg)
cg.replica:stop()
cg.master:exec(function()
for i = 1,10 do
box.space.test:insert{i}
end
end)
cg.replica.env['TARANTOOL_RUN_BEFORE_BOX_CFG'] = run_before_cfg
cg.replica:start({wait_until_ready=false})
t.helpers.retrying({}, cg.replica.connect_net_box, cg.replica)
cg.replica:exec(function()
t.helpers.retrying({}, function()
t.assert_equals(box.error.injection.get('ERRINJ_WAL_WRITE_COUNT'),
_G.wal_write_count + 1)
end)
box.error.injection.set('ERRINJ_WAL_DELAY', false)
t.helpers.retrying({}, function()
t.assert_equals(box.error.injection.get('ERRINJ_WAL_WRITE_COUNT'),
_G.wal_write_count + 10)
end)
end)
cg.replica:wait_for_vclock_of(cg.master)
end

0 comments on commit 359df4f

Please sign in to comment.