Skip to content

Commit

Permalink
Support pre-voting in Raft
Browse files Browse the repository at this point in the history
Pre-voting adds robustness to the leadership elections. This is a breaking
change and must be explicitly enabled.

A Follower will only initiate an election if it has received a quorum
of positive pre-votes from other servers.

A positive pre-vote will only be returned from a server if that server
is itself asking for pre-votes. Therefore an election will be triggered
not by one server timing out, but by a majority of servers timing out.

This commit introduces a visitor pattern for RaftMessages used by
RaftMessageHandlers and RaftMessageEncoder.
  • Loading branch information
martinfurmanski authored and andrewkerr9000 committed Nov 28, 2017
1 parent 2b75613 commit 91c4bb7
Show file tree
Hide file tree
Showing 26 changed files with 1,699 additions and 251 deletions.
Expand Up @@ -73,6 +73,10 @@ public class CausalClusteringSettings implements LoadableConfig
public static final Setting<Boolean> refuse_to_be_leader =
setting( "causal_clustering.refuse_to_be_leader", BOOLEAN, FALSE );

@Description( "Enable pre-voting extension to the Raft protocol (this is breaking and must match between the core cluster members)" )
public static final Setting<Boolean> enable_pre_voting =
setting( "causal_clustering.enable_pre_voting", BOOLEAN, FALSE );

@Description( "The maximum batch size when catching up (in unit of entries)" )
public static final Setting<Integer> catchup_batch_size =
setting( "causal_clustering.catchup_batch_size", INTEGER, "64" );
Expand Down
Expand Up @@ -136,9 +136,12 @@ expectedClusterSize, electionTimeout, systemClock(), config.get( join_catch_up_t

raftTimeoutService = new DelayedRenewableTimeoutService( systemClock(), logProvider );

boolean supportsPreVoting = config.get( CausalClusteringSettings.enable_pre_voting );

raftMachine = new RaftMachine( myself, termState, voteState, raftLog, electionTimeout, heartbeatInterval,
raftTimeoutService, outbound, logProvider, raftMembershipManager, logShipping, inFlightCache,
RefuseToBeLeaderStrategy.shouldRefuseToBeLeader( config, logProvider.getLog( getClass() ) ), platformModule.monitors, systemClock() );
RefuseToBeLeaderStrategy.shouldRefuseToBeLeader( config, logProvider.getLog( getClass() ) ),
supportsPreVoting, platformModule.monitors, systemClock() );

life.add( new RaftCoreTopologyConnector( coreTopologyService, raftMachine ) );

Expand Down
Expand Up @@ -19,10 +19,17 @@
*/
package org.neo4j.causalclustering.core.consensus;

import java.util.Collection;

public class MajorityIncludingSelfQuorum
{
private static final int MIN_QUORUM = 2;

public static boolean isQuorum( Collection<?> cluster, Collection<?> countNotIncludingMyself )
{
return isQuorum( cluster.size(), countNotIncludingMyself.size() );
}

public static boolean isQuorum( int clusterSize, int countNotIncludingSelf )
{
return isQuorum( MIN_QUORUM, clusterSize, countNotIncludingSelf );
Expand Down
Expand Up @@ -97,7 +97,8 @@ public RaftMachine( MemberId myself, StateStorage<TermState> termStorage, StateS
RaftLog entryLog, long electionTimeout, long heartbeatInterval,
RenewableTimeoutService renewableTimeoutService, Outbound<MemberId,RaftMessages.RaftMessage> outbound,
LogProvider logProvider, RaftMembershipManager membershipManager, RaftLogShippingManager logShipping,
InFlightCache inFlightCache, boolean refuseToBecomeLeader, Monitors monitors, Clock clock )
InFlightCache inFlightCache, boolean refuseToBecomeLeader, boolean supportPreVoting, Monitors monitors,
Clock clock )
{
this.myself = myself;
this.electionTimeout = electionTimeout;
Expand All @@ -115,7 +116,7 @@ public RaftMachine( MemberId myself, StateStorage<TermState> termStorage, StateS

this.inFlightCache = inFlightCache;
this.state = new RaftState( myself, termStorage, membershipManager, entryLog, voteStorage, inFlightCache,
logProvider );
logProvider, supportPreVoting );

leaderNotFoundMonitor = monitors.newMonitor( LeaderNotFoundMonitor.class );
}
Expand Down

0 comments on commit 91c4bb7

Please sign in to comment.