Skip to content

Commit

Permalink
raft: persist vote for self together with term bump
Browse files Browse the repository at this point in the history
Commit c9155ac ("raft: persist new term and vote separately") made
the nodes persist new term and vote separately, using 2 WAL writes.
Writing the term first is needed to flush all the ongoing transactions,
so that the node's vclock is updated and can be checked against the
candidate's vclock. Otherwise it could happen that the node persists a
vote for some candidate only to find that it's vclock would actually
become incomparable with the candidate's.

Actually, this guard is not needed when checking a vote for self,
because a node can always vote for self. Let's go back to writing term
and vote together for self votes.

Prerequisite tarantool#8497

NO_DOC=not user-visible
NO_CHANGELOG=not user-visible
NO_TEST=covered by the next commit
  • Loading branch information
sergepetrenko committed May 19, 2023
1 parent 7316d81 commit ba52795
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 39 deletions.
18 changes: 9 additions & 9 deletions src/lib/raft/raft.c
Expand Up @@ -740,6 +740,15 @@ raft_worker_handle_io(struct raft *raft)
assert(raft->volatile_term >= raft->term);
if (raft->volatile_vote == 0)
goto do_dump;
/*
* Skip self. When vote is issued, own vclock can be smaller,
* but that doesn't matter. Can always vote for self. Not having
* this special case still works if the node is configured as a
* candidate, but the node might log that it canceled a vote for
* self, which is confusing.
*/
if (raft->volatile_vote == raft->self)
goto do_dump_with_vote;
/*
* Vote and term bumps are persisted separately. This serves as
* a flush of all transactions going to WAL right now so as the
Expand All @@ -750,15 +759,6 @@ raft_worker_handle_io(struct raft *raft)
*/
if (raft->volatile_term > raft->term)
goto do_dump;
/*
* Skip self. When vote was issued, own vclock could be smaller,
* but that doesn't matter. Can always vote for self. Not having
* this special case still works if the node is configured as a
* candidate, but the node might log that it canceled a vote for
* self, which is confusing.
*/
if (raft->volatile_vote == raft->self)
goto do_dump_with_vote;
if (!raft_can_vote_for(raft, &raft->candidate_vclock)) {
say_info("RAFT: vote request for %u is canceled - the "
"vclock is not acceptable anymore",
Expand Down
46 changes: 16 additions & 30 deletions test/unit/raft.c
Expand Up @@ -68,44 +68,30 @@ raft_test_leader_election(void)
1 /* Vote. */,
2 /* Volatile term. */,
1 /* Volatile vote. */,
"{0: 2}" /* Vclock. */
"{0: 1}" /* Vclock. */
), "elections with a new term");
is(raft_vote_count(&node.raft), 1, "single vote for self");
ok(node.update_count > 0, "trigger worked");
node.update_count = 0;

/* Check if all async work is done properly. */

is(node.journal.size, 2, "2 records in the journal");
is(node.journal.size, 1, "2 records in the journal");
ok(raft_node_journal_check_row(
&node,
0 /* Index. */,
2 /* Term. */,
0 /* Vote. */
), "term is on disk");
ok(raft_node_journal_check_row(
&node,
1 /* Index. */,
2 /* Term. */,
1 /* Vote. */
), "vote is on disk");
), "term and vote is on disk");

is(node.net.count, 2, "2 pending messages");
is(node.net.count, 1, "1 pending message");
ok(raft_node_net_check_msg(&node,
0 /* Index. */,
RAFT_STATE_FOLLOWER /* State. */,
2 /* Term. */,
0 /* Vote. */,
NULL /* Vclock. */
), "term bump is sent");
ok(raft_node_net_check_msg(
&node,
1 /* Index. */,
RAFT_STATE_CANDIDATE /* State. */,
2 /* Term. */,
1 /* Vote. */,
"{0: 2}" /* Vclock. */
), "vote request is sent");
NULL /* Vclock. */
), "term bump and vote are sent");
raft_node_net_drop(&node);

/* Simulate first response. Nothing should happen, quorum is 3. */
Expand Down Expand Up @@ -2165,7 +2151,7 @@ raft_test_pre_vote(void)
1 /* Vote. */,
3 /* Volatile term. */,
1 /* Volatile vote. */,
"{0: 3}" /* Vclock. */
"{0: 2}" /* Vclock. */
), "elections once no one sees the leader");

raft_node_cfg_election_quorum(&node, 1);
Expand All @@ -2178,7 +2164,7 @@ raft_test_pre_vote(void)
1 /* Vote. */,
3 /* Volatile term. */,
1 /* Volatile vote. */,
"{0: 3}" /* Vclock. */
"{0: 2}" /* Vclock. */
), "become leader on quorum change");

raft_cfg_is_candidate_later(&node.raft, false);
Expand All @@ -2191,7 +2177,7 @@ raft_test_pre_vote(void)
1 /* Vote. */,
3 /* Volatile term. */,
1 /* Volatile vote. */,
"{0: 3}" /* Vclock. */
"{0: 2}" /* Vclock. */
), "cfg_is_candidate_later doesn't disrupt leader");

is(raft_node_send_follower(
Expand All @@ -2210,7 +2196,7 @@ raft_test_pre_vote(void)
0 /* Vote. */,
4 /* Volatile term. */,
0 /* Volatile vote. */,
"{0: 4}" /* Vclock. */
"{0: 3}" /* Vclock. */
), "term bump after cfg_is_candidate_later makes node a voter.");

raft_cfg_is_candidate_later(&node.raft, true);
Expand All @@ -2223,7 +2209,7 @@ raft_test_pre_vote(void)
0 /* Vote. */,
4 /* Volatile term. */,
0 /* Volatile vote. */,
"{0: 4}" /* Vclock. */
"{0: 3}" /* Vclock. */
), "cfg_is_candidate_later doesn't transfer voter to a candidate");

is(raft_node_send_follower(
Expand All @@ -2242,7 +2228,7 @@ raft_test_pre_vote(void)
1 /* Vote. */,
5 /* Volatile term. */,
1 /* Volatile vote. */,
"{0: 6}" /* Vclock. */
"{0: 5}" /* Vclock. */
), "Term bump with cfg_is_candidate_later transfers voter to candiate");

is(raft_leader_idle(&node.raft), 0,
Expand Down Expand Up @@ -2280,7 +2266,7 @@ raft_test_pre_vote(void)
0 /* Vote. */,
6 /* Volatile term. */,
0 /* Volatile vote. */,
"{0: 7}" /* Vclock. */
"{0: 6}" /* Vclock. */
), "no elections on start when someone sees the leader");

ok(!raft_ev_is_active(&node.raft.timer),
Expand All @@ -2297,7 +2283,7 @@ raft_test_pre_vote(void)
0 /* Vote. */,
6 /* Volatile term. */,
0 /* Volatile vote. */,
"{0: 7}" /* Vclock. */
"{0: 6}" /* Vclock. */
), "no elections on becoming candidate when someone sees the leader");

ok(!raft_ev_is_active(&node.raft.timer),
Expand Down Expand Up @@ -2362,7 +2348,7 @@ raft_test_resign(void)
1 /* Vote. */,
2 /* Volatile term. */,
1 /* Volatile vote. */,
"{0: 2}" /* Vclock. */
"{0: 1}" /* Vclock. */
), "became leader");

raft_node_resign(&node);
Expand All @@ -2375,7 +2361,7 @@ raft_test_resign(void)
1 /* Vote. */,
2 /* Volatile term. */,
1 /* Volatile vote. */,
"{0: 2}" /* Vclock. */
"{0: 1}" /* Vclock. */
), "resigned from leader state");

raft_node_destroy(&node);
Expand Down

0 comments on commit ba52795

Please sign in to comment.