diff --git a/changelogs/unreleased/gh-8497-promote-atomic.md b/changelogs/unreleased/gh-8497-promote-atomic.md new file mode 100644 index 000000000000..79d6a057b4df --- /dev/null +++ b/changelogs/unreleased/gh-8497-promote-atomic.md @@ -0,0 +1,4 @@ +## bugfix/replication + +* Fixed possible failure to promote the desired node by `box.ctl.promote()` on + a cluster with nodes configured with `election_mode = "candidate"` (gh-8497). diff --git a/src/lib/raft/raft.c b/src/lib/raft/raft.c index 72d914971b01..f31cee5b10ee 100644 --- a/src/lib/raft/raft.c +++ b/src/lib/raft/raft.c @@ -1211,6 +1211,7 @@ raft_promote(struct raft *raft) return; raft_sm_schedule_new_term(raft, raft->volatile_term + 1); raft_start_candidate(raft); + raft_sm_schedule_new_vote(raft, raft->self, raft->vclock); } void diff --git a/test/replication-luatest/gh_6036_qsync_order_test.lua b/test/replication-luatest/gh_6036_qsync_order_test.lua index 729812d3e67d..4911e5680619 100644 --- a/test/replication-luatest/gh_6036_qsync_order_test.lua +++ b/test/replication-luatest/gh_6036_qsync_order_test.lua @@ -187,7 +187,7 @@ g.test_promote_order = function(cg) cg.r1:exec(function() box.cfg{replication=""} end) cg.r2:exec(function() box.cfg{replication = {}} - box.error.injection.set('ERRINJ_WAL_DELAY_COUNTDOWN', 2) + box.error.injection.set('ERRINJ_WAL_DELAY_COUNTDOWN', 1) require('fiber').create(function() box.ctl.promote() end) end) t.helpers.retrying({}, function() diff --git a/test/replication-luatest/gh_8497_atomic_promote_test.lua b/test/replication-luatest/gh_8497_atomic_promote_test.lua new file mode 100644 index 000000000000..1b5580595989 --- /dev/null +++ b/test/replication-luatest/gh_8497_atomic_promote_test.lua @@ -0,0 +1,56 @@ +local t = require('luatest') +local server = require('luatest.server') +local replica_set = require('luatest.replica_set') + +local g = t.group('gh-8497-atomic-promote') + +g.before_each(function(cg) + t.tarantool.skip_if_not_debug() + cg.replica_set = replica_set:new({}) + cg.box_cfg = { + replication_timeout = 0.1, + replication = { + server.build_listen_uri('server1', cg.replica_set.id), + server.build_listen_uri('server2', cg.replica_set.id), + }, + } + cg.box_cfg.election_mode = 'candidate' + cg.server1 = cg.replica_set:build_and_add_server{ + alias = 'server1', + box_cfg = cg.box_cfg, + } + cg.box_cfg.election_mode = 'voter' + cg.server2 =cg.replica_set:build_and_add_server{ + alias = 'server2', + box_cfg = cg.box_cfg, + } + cg.replica_set:start() + cg.replica_set:wait_for_fullmesh() + cg.server1:wait_for_election_leader() +end) + +g.after_each(function(cg) + cg.replica_set:drop() +end) + +g.test_election_promote_finishes_in_one_term = function(cg) + cg.server2:update_box_cfg{election_mode = 'candidate'} + local term = cg.server1:get_election_term() + t.assert_equals(term, cg.server2:get_election_term(), + 'The cluster is stable') + local ok, err = cg.server2:exec(function() + local fiber = require('fiber') + box.error.injection.set('ERRINJ_WAL_DELAY_COUNTDOWN', 1) + local fib = fiber.new(box.ctl.promote) + fib:set_joinable(true) + fiber.sleep(2 * box.cfg.replication_timeout) + box.error.injection.set('ERRINJ_WAL_DELAY', false) + return fib:join() + end) + t.assert_equals({ok, err}, {true, nil}, 'No error in promote') + cg.server2:wait_for_election_leader() + t.assert_equals(term + 1, cg.server1:get_election_term(), + 'The term is bumped once') + t.assert_equals(term + 1, cg.server2:get_election_term(), + 'The term is bumped once') +end diff --git a/test/replication/election_basic.result b/test/replication/election_basic.result index 92ab6914b6f5..1fd69caf1c1e 100644 --- a/test/replication/election_basic.result +++ b/test/replication/election_basic.result @@ -403,11 +403,11 @@ box.ctl.promote() | --- | ... -test_run:wait_cond(function() return #election_tbl == 9 end) +test_run:wait_cond(function() return #election_tbl == 8 end) | --- | - true | ... -assert(election_tbl[7].state == 'follower') +assert(election_tbl[7].state == 'candidate') | --- | - true | ... @@ -415,20 +415,11 @@ assert(election_tbl[7].term == election_tbl[6].term + 1) | --- | - true | ... --- Vote is visible here already, but it is volatile. assert(election_tbl[7].vote == 1) | --- | - true | ... -assert(election_tbl[8].state == 'candidate') - | --- - | - true - | ... -assert(election_tbl[8].vote == 1) - | --- - | - true - | ... -assert(election_tbl[9].state == 'leader') +assert(election_tbl[8].state == 'leader') | --- | - true | ... diff --git a/test/replication/election_basic.test.lua b/test/replication/election_basic.test.lua index 1ae4d7de2eb3..8fb0f6fe5a7b 100644 --- a/test/replication/election_basic.test.lua +++ b/test/replication/election_basic.test.lua @@ -165,14 +165,11 @@ assert(election_tbl[6].state == 'follower') box.ctl.promote() -test_run:wait_cond(function() return #election_tbl == 9 end) -assert(election_tbl[7].state == 'follower') +test_run:wait_cond(function() return #election_tbl == 8 end) +assert(election_tbl[7].state == 'candidate') assert(election_tbl[7].term == election_tbl[6].term + 1) --- Vote is visible here already, but it is volatile. assert(election_tbl[7].vote == 1) -assert(election_tbl[8].state == 'candidate') -assert(election_tbl[8].vote == 1) -assert(election_tbl[9].state == 'leader') +assert(election_tbl[8].state == 'leader') test_run:cmd('stop server replica') test_run:cmd('delete server replica') diff --git a/test/unit/raft.c b/test/unit/raft.c index 78d6a2e2a3ec..77f56369afa6 100644 --- a/test/unit/raft.c +++ b/test/unit/raft.c @@ -2348,7 +2348,7 @@ raft_test_resign(void) 1 /* Vote. */, 2 /* Volatile term. */, 1 /* Volatile vote. */, - "{0: 2}" /* Vclock. */ + "{0: 1}" /* Vclock. */ ), "became leader"); raft_node_resign(&node); @@ -2361,7 +2361,7 @@ raft_test_resign(void) 1 /* Vote. */, 2 /* Volatile term. */, 1 /* Volatile vote. */, - "{0: 2}" /* Vclock. */ + "{0: 1}" /* Vclock. */ ), "resigned from leader state"); raft_node_destroy(&node);