Support pre-voting in Raft

Pre-voting adds robustness to the leadership elections. This is a breaking change and must be explicitly enabled. A Follower will only initiate an election if it has received a quorum of positive pre-votes from other servers. A positive pre-vote will only be returned from a server if that server is itself asking for pre-votes. Therefore an election will be triggered not by one server timing out, but by a majority of servers timing out. This commit introduces a visitor pattern for RaftMessages used by RaftMessageHandlers and RaftMessageEncoder.
neo4j · Nov 28, 2017 · 91c4bb7 · 91c4bb7
1 parent 2b75613
commit 91c4bb7
Show file tree

Hide file tree

Showing 26 changed files with 1,699 additions and 251 deletions.
diff --git a/...al-clustering/src/main/java/org/neo4j/causalclustering/core/CausalClusteringSettings.java b/...al-clustering/src/main/java/org/neo4j/causalclustering/core/CausalClusteringSettings.java
@@ -73,6 +73,10 @@ public class CausalClusteringSettings implements LoadableConfig
     public static final Setting<Boolean> refuse_to_be_leader =
             setting( "causal_clustering.refuse_to_be_leader", BOOLEAN, FALSE );
 
+    @Description( "Enable pre-voting extension to the Raft protocol (this is breaking and must match between the core cluster members)" )
+    public static final Setting<Boolean> enable_pre_voting =
+            setting( "causal_clustering.enable_pre_voting", BOOLEAN, FALSE );
+
     @Description( "The maximum batch size when catching up (in unit of entries)" )
     public static final Setting<Integer> catchup_batch_size =
             setting( "causal_clustering.catchup_batch_size", INTEGER, "64" );

diff --git a/...l-clustering/src/main/java/org/neo4j/causalclustering/core/consensus/ConsensusModule.java b/...l-clustering/src/main/java/org/neo4j/causalclustering/core/consensus/ConsensusModule.java
@@ -136,9 +136,12 @@ expectedClusterSize, electionTimeout, systemClock(), config.get( join_catch_up_t
 
         raftTimeoutService = new DelayedRenewableTimeoutService( systemClock(), logProvider );
 
+        boolean supportsPreVoting = config.get( CausalClusteringSettings.enable_pre_voting );
+
         raftMachine = new RaftMachine( myself, termState, voteState, raftLog, electionTimeout, heartbeatInterval,
                 raftTimeoutService, outbound, logProvider, raftMembershipManager, logShipping, inFlightCache,
-                RefuseToBeLeaderStrategy.shouldRefuseToBeLeader( config, logProvider.getLog( getClass() ) ), platformModule.monitors, systemClock() );
+                RefuseToBeLeaderStrategy.shouldRefuseToBeLeader( config, logProvider.getLog( getClass() ) ),
+               supportsPreVoting, platformModule.monitors, systemClock() );
 
         life.add( new RaftCoreTopologyConnector( coreTopologyService, raftMachine ) );
 

diff --git a/.../src/main/java/org/neo4j/causalclustering/core/consensus/MajorityIncludingSelfQuorum.java b/.../src/main/java/org/neo4j/causalclustering/core/consensus/MajorityIncludingSelfQuorum.java
@@ -19,10 +19,17 @@
  */
 package org.neo4j.causalclustering.core.consensus;
 
+import java.util.Collection;
+
 public class MajorityIncludingSelfQuorum
 {
     private static final int MIN_QUORUM = 2;
 
+    public static boolean isQuorum( Collection<?> cluster, Collection<?> countNotIncludingMyself )
+    {
+        return isQuorum( cluster.size(), countNotIncludingMyself.size() );
+    }
+
     public static boolean isQuorum( int clusterSize, int countNotIncludingSelf )
     {
         return isQuorum( MIN_QUORUM, clusterSize, countNotIncludingSelf );

diff --git a/...ausal-clustering/src/main/java/org/neo4j/causalclustering/core/consensus/RaftMachine.java b/...ausal-clustering/src/main/java/org/neo4j/causalclustering/core/consensus/RaftMachine.java
@@ -97,7 +97,8 @@ public RaftMachine( MemberId myself, StateStorage<TermState> termStorage, StateS
             RaftLog entryLog, long electionTimeout, long heartbeatInterval,
             RenewableTimeoutService renewableTimeoutService, Outbound<MemberId,RaftMessages.RaftMessage> outbound,
             LogProvider logProvider, RaftMembershipManager membershipManager, RaftLogShippingManager logShipping,
-            InFlightCache inFlightCache, boolean refuseToBecomeLeader, Monitors monitors, Clock clock )
+            InFlightCache inFlightCache, boolean refuseToBecomeLeader, boolean supportPreVoting, Monitors monitors,
+            Clock clock )
     {
         this.myself = myself;
         this.electionTimeout = electionTimeout;
@@ -115,7 +116,7 @@ public RaftMachine( MemberId myself, StateStorage<TermState> termStorage, StateS
 
         this.inFlightCache = inFlightCache;
         this.state = new RaftState( myself, termStorage, membershipManager, entryLog, voteStorage, inFlightCache,
-                logProvider );
+                logProvider, supportPreVoting );
 
         leaderNotFoundMonitor = monitors.newMonitor( LeaderNotFoundMonitor.class );
     }