Skip to content

Commit

Permalink
raft: make promote bump term and vote at once
Browse files Browse the repository at this point in the history
box.ctl.promote() was impplemented as follows: an instance bumps the
term and marks itself a candidate, but doesn't vote for self
immediately. Instead it relies on the machinery which makes a candidate
vote for self as soon as it persists a new term.

This differs from a normal election start due to leader timeout: there
term and vote are bumped at once.

Besides, this increases probability of box.ctl.promote() resulting in
other node getting elected: if a node first broadcasts a term without a
vote, it is not considered a candidate, so other candidates might start
elections and vote for themselves.

Let's bring promote into line with automatic elections.

Closes tarantool#8497

NO_DOC=bugfix
  • Loading branch information
sergepetrenko committed May 24, 2023
1 parent 5b75f80 commit 88cfa25
Show file tree
Hide file tree
Showing 7 changed files with 70 additions and 21 deletions.
4 changes: 4 additions & 0 deletions changelogs/unreleased/gh-8497-promote-atomic.md
@@ -0,0 +1,4 @@
## bugfix/replication

* Fixed possible failure to promote the desired node by `box.ctl.promote()` on
a cluster with nodes configured with `election_mode = "candidate"` (gh-8497).
1 change: 1 addition & 0 deletions src/lib/raft/raft.c
Expand Up @@ -1211,6 +1211,7 @@ raft_promote(struct raft *raft)
return;
raft_sm_schedule_new_term(raft, raft->volatile_term + 1);
raft_start_candidate(raft);
raft_sm_schedule_new_vote(raft, raft->self, raft->vclock);
}

void
Expand Down
2 changes: 1 addition & 1 deletion test/replication-luatest/gh_6036_qsync_order_test.lua
Expand Up @@ -187,7 +187,7 @@ g.test_promote_order = function(cg)
cg.r1:exec(function() box.cfg{replication=""} end)
cg.r2:exec(function()
box.cfg{replication = {}}
box.error.injection.set('ERRINJ_WAL_DELAY_COUNTDOWN', 2)
box.error.injection.set('ERRINJ_WAL_DELAY_COUNTDOWN', 1)
require('fiber').create(function() box.ctl.promote() end)
end)
t.helpers.retrying({}, function()
Expand Down
56 changes: 56 additions & 0 deletions test/replication-luatest/gh_8497_atomic_promote_test.lua
@@ -0,0 +1,56 @@
local t = require('luatest')
local server = require('luatest.server')
local replica_set = require('luatest.replica_set')

local g = t.group('gh-8497-atomic-promote')

g.before_each(function(cg)
t.tarantool.skip_if_not_debug()
cg.replica_set = replica_set:new({})
cg.box_cfg = {
replication_timeout = 0.1,
replication = {
server.build_listen_uri('server1', cg.replica_set.id),
server.build_listen_uri('server2', cg.replica_set.id),
},
}
cg.box_cfg.election_mode = 'candidate'
cg.server1 = cg.replica_set:build_and_add_server{
alias = 'server1',
box_cfg = cg.box_cfg,
}
cg.box_cfg.election_mode = 'voter'
cg.server2 =cg.replica_set:build_and_add_server{
alias = 'server2',
box_cfg = cg.box_cfg,
}
cg.replica_set:start()
cg.replica_set:wait_for_fullmesh()
cg.server1:wait_for_election_leader()
end)

g.after_each(function(cg)
cg.replica_set:drop()
end)

g.test_election_promote_finishes_in_one_term = function(cg)
cg.server2:update_box_cfg{election_mode = 'candidate'}
local term = cg.server1:get_election_term()
t.assert_equals(term, cg.server2:get_election_term(),
'The cluster is stable')
local ok, err = cg.server2:exec(function()
local fiber = require('fiber')
box.error.injection.set('ERRINJ_WAL_DELAY_COUNTDOWN', 1)
local fib = fiber.new(box.ctl.promote)
fib:set_joinable(true)
fiber.sleep(2 * box.cfg.replication_timeout)
box.error.injection.set('ERRINJ_WAL_DELAY', false)
return fib:join()
end)
t.assert_equals({ok, err}, {true, nil}, 'No error in promote')
cg.server2:wait_for_election_leader()
t.assert_equals(term + 1, cg.server1:get_election_term(),
'The term is bumped once')
t.assert_equals(term + 1, cg.server2:get_election_term(),
'The term is bumped once')
end
15 changes: 3 additions & 12 deletions test/replication/election_basic.result
Expand Up @@ -403,32 +403,23 @@ box.ctl.promote()
| ---
| ...

test_run:wait_cond(function() return #election_tbl == 9 end)
test_run:wait_cond(function() return #election_tbl == 8 end)
| ---
| - true
| ...
assert(election_tbl[7].state == 'follower')
assert(election_tbl[7].state == 'candidate')
| ---
| - true
| ...
assert(election_tbl[7].term == election_tbl[6].term + 1)
| ---
| - true
| ...
-- Vote is visible here already, but it is volatile.
assert(election_tbl[7].vote == 1)
| ---
| - true
| ...
assert(election_tbl[8].state == 'candidate')
| ---
| - true
| ...
assert(election_tbl[8].vote == 1)
| ---
| - true
| ...
assert(election_tbl[9].state == 'leader')
assert(election_tbl[8].state == 'leader')
| ---
| - true
| ...
Expand Down
9 changes: 3 additions & 6 deletions test/replication/election_basic.test.lua
Expand Up @@ -165,14 +165,11 @@ assert(election_tbl[6].state == 'follower')

box.ctl.promote()

test_run:wait_cond(function() return #election_tbl == 9 end)
assert(election_tbl[7].state == 'follower')
test_run:wait_cond(function() return #election_tbl == 8 end)
assert(election_tbl[7].state == 'candidate')
assert(election_tbl[7].term == election_tbl[6].term + 1)
-- Vote is visible here already, but it is volatile.
assert(election_tbl[7].vote == 1)
assert(election_tbl[8].state == 'candidate')
assert(election_tbl[8].vote == 1)
assert(election_tbl[9].state == 'leader')
assert(election_tbl[8].state == 'leader')

test_run:cmd('stop server replica')
test_run:cmd('delete server replica')
Expand Down
4 changes: 2 additions & 2 deletions test/unit/raft.c
Expand Up @@ -2348,7 +2348,7 @@ raft_test_resign(void)
1 /* Vote. */,
2 /* Volatile term. */,
1 /* Volatile vote. */,
"{0: 2}" /* Vclock. */
"{0: 1}" /* Vclock. */
), "became leader");

raft_node_resign(&node);
Expand All @@ -2361,7 +2361,7 @@ raft_test_resign(void)
1 /* Vote. */,
2 /* Volatile term. */,
1 /* Volatile vote. */,
"{0: 2}" /* Vclock. */
"{0: 1}" /* Vclock. */
), "resigned from leader state");

raft_node_destroy(&node);
Expand Down

0 comments on commit 88cfa25

Please sign in to comment.