Skip to content

Commit

Permalink
box: prevent demoted leader from being a candidate in the next elections
Browse files Browse the repository at this point in the history
Currently, the demoted leader sees that nobody has requested a vote in the
newly persisted term (because it has just written it without voting, and
nobody had time to see the new term yet), and hence votes for itself,
becoming the most probable winner of the next elections.

To prevent this from happening, let's forbid the demoted leader to be a
candidate in the next elections using `box_raft_leader_step_off`.

Closes #9855

NO_DOC=<bugfix>

Co-authored-by: Serge Petrenko <sergepetrenko@tarantool.org>
(cherry picked from commit 05d03a1)
  • Loading branch information
CuriousGeorgiy authored and sergepetrenko committed Jun 13, 2024
1 parent 49747a4 commit 22a9cfd
Show file tree
Hide file tree
Showing 7 changed files with 124 additions and 3 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
## bugfix/replication

* Fixed a bug that allowed the old leader in
`box.cfg{election_mode = 'candidate'` mode to get re-elected after resigning
himself through `box.ctl.demote` (gh-9855).
5 changes: 2 additions & 3 deletions src/box/box.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2754,9 +2754,8 @@ box_demote(void)
return 0;
if (txn_limbo.owner_id != instance_id)
return 0;
if (raft->state != RAFT_STATE_LEADER)
return 0;
return box_trigger_elections();
box_raft_leader_step_off();
return 0;
}

assert(raft->state == RAFT_STATE_FOLLOWER);
Expand Down
13 changes: 13 additions & 0 deletions src/box/raft.c
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,19 @@ box_raft_fence(void)
raft_resign(raft);
}

void
box_raft_leader_step_off(void)
{
struct raft *raft = box_raft();
if (!raft->is_enabled || raft->state != RAFT_STATE_LEADER)
return;

/* It will be unfenced the next time new term is written. */
txn_limbo_fence(&txn_limbo);
raft_resign(raft);
raft_restore(raft);
}

/**
* Configure the raft node according to whether it has a quorum of connected
* peers or not. It can't start elections, when it doesn't.
Expand Down
9 changes: 9 additions & 0 deletions src/box/raft.h
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,15 @@ box_raft_set_election_fencing_mode(enum election_fencing_mode mode);
void
box_raft_election_fencing_pause(void);

/**
* Resign RAFT leadership and freeze limbo regardless of
* box_election_fencing_mode. It waits until the elections
* begin. After the death-timeout expires, it starts a new
* round of elections.
*/
void
box_raft_leader_step_off(void);

void
box_raft_init(void);

Expand Down
5 changes: 5 additions & 0 deletions test/replication-luatest/gh_6033_box_promote_demote_test.lua
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,11 @@ end
local function cluster_reinit(g)
box_cfg_update(g.cluster.servers, g.box_cfg)
wait_sync(g.cluster.servers)
for _, server in ipairs(g.cluster.servers) do
server:exec(function()
box.ctl.demote()
end)
end
end

local function cluster_stop(g)
Expand Down
13 changes: 13 additions & 0 deletions test/replication-luatest/gh_6860_election_off_demote_test.lua
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ local function replicaset_create(g)
server.build_listen_uri('server2', g.replicaset.id),
},
}
g.replication = box_cfg.replication
g.server1 = g.replicaset:build_and_add_server({
alias = 'server1', box_cfg = box_cfg
})
Expand Down Expand Up @@ -111,10 +112,22 @@ g.test_election_off_demote_other_no_leader = function(g)
end)
-- Server-2 sees that the queue is owned by server-1.
g.server2:wait_for_synchro_queue_term(g.server1:get_election_term())
g.server2:update_box_cfg{replication = ''}
g.server1:exec(function()
box.ctl.demote()
-- Bump the election term while leaving the synchro queue term intact.
local msg = "Not enough peers connected to start elections: 1 out " ..
"of minimal required 2"
t.assert_error_msg_content_equals(msg, box.ctl.promote)
t.assert_equals(box.info.election.term, box.info.synchro.queue.term + 1)
box.cfg{election_mode = 'off'}
end)
g.server2:update_box_cfg({replication = g.replication})
g.server2:exec(function()
t.helpers.retrying({timeout = 10, delay = 0.5}, function()
t.assert_not_equals(box.info.status, "orphan")
end)
end)
-- Server-2 sees that server-1 is no longer a leader. But the queue still
-- belongs to the latter.
g.server2:wait_for_election_term(g.server1:get_election_term())
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
local t = require('luatest')
local replica_set = require('luatest.replica_set')
local server = require('luatest.server')

local g = t.group()

g.before_each(function(cg)
cg.replica_set = replica_set:new{}
local box_cfg = {
replication = {
server.build_listen_uri('server1', cg.replica_set.id),
server.build_listen_uri('server2', cg.replica_set.id),
},
election_mode = 'candidate',
replication_timeout = 0.1,
election_timeout = 0.5,
}
cg.server1 = cg.replica_set:build_and_add_server{
alias = 'server1',
box_cfg = box_cfg,
}
box_cfg.election_mode = 'voter'
cg.server2 = cg.replica_set:build_and_add_server{
alias = 'server2',
box_cfg = box_cfg,
}
cg.replica_set:start()
cg.replica_set:wait_for_fullmesh()
end)

g.after_each(function(cg)
cg.replica_set:drop()
end)

-- Test that the demoted leader can still win in subsequent elections.
local function test_subsequent_elections(demoted_leader, current_leader)
current_leader:exec(function()
box.ctl.demote()
end)
demoted_leader:wait_for_election_leader();
demoted_leader:exec(function()
t.assert_equals(box.info.election.vote, box.info.election.leader)
end)
current_leader:exec(function()
t.assert_equals(box.info.election.vote, box.info.election.leader)
end)
end

-- Test that the demoted leader cannot get elected after resigning himself
-- through `box.ctl.demote`.
g.test_demote_guarantee = function(cg)
cg.server1:wait_for_election_leader()
cg.server2:update_box_cfg({election_mode = 'candidate'})
cg.server1:exec(function()
box.ctl.demote()
end)
cg.server2:wait_for_election_leader();
cg.server1:exec(function()
t.assert_equals(box.info.election.vote, box.info.election.leader)
end)
test_subsequent_elections(cg.server1, cg.server2)
end

-- Test that the demoted leader waits for leader death timeout after resigning
-- himself through `box.ctl.demote` and starts subsequent elections.
g.test_demoted_leader_election_in_subsequent_elections = function(cg)
cg.server1:wait_for_election_leader()
local demoted_term = cg.server1:get_election_term()
cg.server1:exec(function()
box.ctl.demote()
end)
cg.server1:wait_for_election_leader()
cg.server1:exec(function(demoted_term)
t.assert_equals(box.info.election.term, demoted_term + 1)
t.assert_equals(box.info.election.vote, box.info.election.leader)
end, {demoted_term})
end

0 comments on commit 22a9cfd

Please sign in to comment.