diff --git a/enterprise/ha/src/main/java/org/neo4j/kernel/ha/HighlyAvailableGraphDatabase.java b/enterprise/ha/src/main/java/org/neo4j/kernel/ha/HighlyAvailableGraphDatabase.java index 758d7ea37ee8e..52abe7682d76b 100644 --- a/enterprise/ha/src/main/java/org/neo4j/kernel/ha/HighlyAvailableGraphDatabase.java +++ b/enterprise/ha/src/main/java/org/neo4j/kernel/ha/HighlyAvailableGraphDatabase.java @@ -489,7 +489,7 @@ protected void createModeSwitcher() @Override public void handle() { - highAvailabilityModeSwitcher.postMemberUnavailable(); + highAvailabilityModeSwitcher.forceElections(); } }; diff --git a/enterprise/ha/src/main/java/org/neo4j/kernel/ha/cluster/HighAvailabilityMemberStateMachine.java b/enterprise/ha/src/main/java/org/neo4j/kernel/ha/cluster/HighAvailabilityMemberStateMachine.java index f488f176a6d5b..0e6d7bc50a981 100644 --- a/enterprise/ha/src/main/java/org/neo4j/kernel/ha/cluster/HighAvailabilityMemberStateMachine.java +++ b/enterprise/ha/src/main/java/org/neo4j/kernel/ha/cluster/HighAvailabilityMemberStateMachine.java @@ -237,49 +237,16 @@ public void notify( HighAvailabilityMemberListener listener ) } } - - /** - * As soon as we receive an unavailability message and the instanceId belongs to us, depending on the current - * state we do the following: - * - * The assumption here is: as soon as we receive unavailability event about us - then something went wrong - * in a cluster and we need to perform new elections. - * Elections should be triggered for all states except {@link HighAvailabilityMemberState#PENDING}, since - * first of all there is nothing or we already made a switch and waiting election to start, so no reason to - * start them again. - *

- * Listener invoked from sync block in {@link org.neo4j.cluster.member.paxos.PaxosClusterMemberEvents} so we - * should not have any racing here. - *

- * @param role The role for which the member is unavailable - * @param unavailableId The id of the member which became unavailable for that role - */ @Override public void memberIsUnavailable( String role, InstanceId unavailableId ) { - if ( context.getMyId().equals( unavailableId ) ) + if ( context.getMyId().equals( unavailableId ) && + HighAvailabilityModeSwitcher.SLAVE.equals( role ) && + state == HighAvailabilityMemberState.SLAVE ) { - if ( HighAvailabilityMemberState.PENDING != state ) - { - HighAvailabilityMemberState oldState = state; - changeStateToPending(); - logger.debug( "Got memberIsUnavailable(" + unavailableId + "), moved to " + state + " from " + - oldState ); - logger.debug( "Forcing new round of elections." ); - election.performRoleElections(); - } - else - { - logger.debug( "Got memberIsUnavailable(" + unavailableId + "), but already in " + - HighAvailabilityMemberState.PENDING + " state, will skip state change and " + - "new election."); - } + HighAvailabilityMemberState oldState = state; + changeStateToPending(); + logger.debug( "Got memberIsUnavailable(" + unavailableId + "), moved to " + state + " from " + oldState ); } else { diff --git a/enterprise/ha/src/main/java/org/neo4j/kernel/ha/cluster/HighAvailabilityModeSwitcher.java b/enterprise/ha/src/main/java/org/neo4j/kernel/ha/cluster/HighAvailabilityModeSwitcher.java index a2f0dc1d6cc5c..a5ecad92c246e 100644 --- a/enterprise/ha/src/main/java/org/neo4j/kernel/ha/cluster/HighAvailabilityModeSwitcher.java +++ b/enterprise/ha/src/main/java/org/neo4j/kernel/ha/cluster/HighAvailabilityModeSwitcher.java @@ -24,6 +24,7 @@ import java.util.concurrent.Future; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; import org.neo4j.cluster.BindingListener; @@ -96,6 +97,7 @@ public static InstanceId getServerId( URI haUri ) private volatile URI me; private volatile Future modeSwitcherFuture; private volatile HighAvailabilityMemberState currentTargetState; + private final AtomicBoolean canAskForElections = new AtomicBoolean( true ); public HighAvailabilityModeSwitcher( SwitchToSlave switchToSlave, SwitchToMaster switchToMaster, @@ -205,9 +207,13 @@ public void removeModeSwitcher( ModeSwitcher modeSwitcher ) modeSwitchListeners = Listeners.removeListener( modeSwitcher, modeSwitchListeners ); } - public void postMemberUnavailable() + public void forceElections() { - clusterMemberAvailability.memberIsUnavailable( HighAvailabilityModeSwitcher.SLAVE ); + if ( canAskForElections.compareAndSet( true, false ) ) + { + clusterMemberAvailability.memberIsUnavailable( HighAvailabilityModeSwitcher.SLAVE ); + election.performRoleElections(); + } } private void stateChanged( HighAvailabilityMemberChangeEvent event ) @@ -231,6 +237,12 @@ private void stateChanged( HighAvailabilityMemberChangeEvent event ) switch ( event.getNewState() ) { case TO_MASTER: + + if ( event.getOldState().equals( HighAvailabilityMemberState.SLAVE ) ) + { + clusterMemberAvailability.memberIsUnavailable( SLAVE ); + } + switchToMaster(); break; case TO_SLAVE: @@ -286,6 +298,7 @@ public void notify( ModeSwitcher listener ) try { masterHaURI = switchToMaster.switchToMaster( haCommunicationLife, me ); + canAskForElections.set( true ); } catch ( Throwable e ) { @@ -365,6 +378,7 @@ public void notify( ModeSwitcher listener ) else { slaveHaURI = resultingSlaveHaURI; + canAskForElections.set( true ); } } catch ( HighAvailabilityStoreFailureException e ) diff --git a/enterprise/ha/src/main/java/org/neo4j/kernel/ha/com/slave/InvalidEpochExceptionHandler.java b/enterprise/ha/src/main/java/org/neo4j/kernel/ha/com/slave/InvalidEpochExceptionHandler.java index 5520b779cda15..33c3ee17d56c6 100644 --- a/enterprise/ha/src/main/java/org/neo4j/kernel/ha/com/slave/InvalidEpochExceptionHandler.java +++ b/enterprise/ha/src/main/java/org/neo4j/kernel/ha/com/slave/InvalidEpochExceptionHandler.java @@ -22,4 +22,13 @@ public interface InvalidEpochExceptionHandler { void handle(); + + InvalidEpochExceptionHandler NONE = new InvalidEpochExceptionHandler() + { + @Override + public void handle() + { + + } + }; } diff --git a/enterprise/ha/src/test/java/org/neo4j/kernel/ha/ClusterTopologyChangesIT.java b/enterprise/ha/src/test/java/org/neo4j/kernel/ha/ClusterTopologyChangesIT.java index 0b0bcd1146e61..651de0f6a40c0 100644 --- a/enterprise/ha/src/test/java/org/neo4j/kernel/ha/ClusterTopologyChangesIT.java +++ b/enterprise/ha/src/test/java/org/neo4j/kernel/ha/ClusterTopologyChangesIT.java @@ -20,6 +20,7 @@ package org.neo4j.kernel.ha; import org.junit.Before; +import org.junit.Ignore; import org.junit.Rule; import org.junit.Test; @@ -82,7 +83,6 @@ public void setup() throws Exception .startCluster(); } - @Test public void masterRejoinsAfterFailureAndReelection() throws Throwable { @@ -108,6 +108,7 @@ public void masterRejoinsAfterFailureAndReelection() throws Throwable } @Test + @Ignore public void slaveShouldServeTxsAfterMasterLostQuorumWentToPendingAndThenQuorumWasRestored() throws Throwable { // GIVEN: cluster with 3 members diff --git a/enterprise/ha/src/test/java/org/neo4j/kernel/ha/cluster/HighAvailabilityMemberStateMachineTest.java b/enterprise/ha/src/test/java/org/neo4j/kernel/ha/cluster/HighAvailabilityMemberStateMachineTest.java index 221673a07ebf7..f7a8e6297e861 100644 --- a/enterprise/ha/src/test/java/org/neo4j/kernel/ha/cluster/HighAvailabilityMemberStateMachineTest.java +++ b/enterprise/ha/src/test/java/org/neo4j/kernel/ha/cluster/HighAvailabilityMemberStateMachineTest.java @@ -123,86 +123,6 @@ public void shouldMoveToToMasterFromPendingOnMasterElectedForItself() throws Thr assertThat( stateMachine.getCurrentState(), equalTo( HighAvailabilityMemberState.TO_MASTER ) ); } - @Test - public void ignoreAnotherMemberNotAvailable() throws Throwable - { - InstanceId me = new InstanceId( 1 ); - InstanceId other = new InstanceId( 2 ); - HighAvailabilityMemberContext context = new SimpleHighAvailabilityMemberContext( me, false ); - ClusterMemberEvents events = mock( ClusterMemberEvents.class ); - ClusterMemberListenerContainer memberListenerContainer = mockAddClusterMemberListener( events ); - - HighAvailabilityMemberStateMachine stateMachine = buildMockedStateMachine( context, events ); - stateMachine.init(); - ClusterMemberListener memberListener = memberListenerContainer.get(); - - // When - memberListener.coordinatorIsElected( me ); - - // Then - assertThat( stateMachine.getCurrentState(), equalTo( HighAvailabilityMemberState.TO_MASTER ) ); - - // When - memberListener.memberIsUnavailable(HighAvailabilityModeSwitcher.SLAVE, other); - - // Then - assertThat( stateMachine.getCurrentState(), equalTo( HighAvailabilityMemberState.TO_MASTER ) ); - } - - @Test - public void switchToPendingAndForceElectionOnThisMemberNotAvailable() throws Throwable - { - InstanceId me = new InstanceId( 1 ); - InstanceId master = new InstanceId( 2 ); - HighAvailabilityMemberContext context = new SimpleHighAvailabilityMemberContext( me, false ); - ClusterMemberEvents events = mock( ClusterMemberEvents.class ); - ClusterMemberListenerContainer memberListenerContainer = mockAddClusterMemberListener( events ); - Election election = mock( Election.class ); - - HighAvailabilityMemberStateMachine stateMachine = new StateMachineBuilder().withContext( context ) - .withElection( election ).withEvents( events ).build(); - stateMachine.init(); - ClusterMemberListener memberListener = memberListenerContainer.get(); - - // When - memberListener.memberIsAvailable( HighAvailabilityModeSwitcher.MASTER, master, URI.create( "2" ), - StoreId.DEFAULT ); - - // Then - assertThat( stateMachine.getCurrentState(), equalTo( HighAvailabilityMemberState.TO_SLAVE ) ); - - // When - memberListener.memberIsUnavailable( HighAvailabilityModeSwitcher.SLAVE, me ); - - //then - assertThat( stateMachine.getCurrentState(), equalTo( HighAvailabilityMemberState.PENDING ) ); - verify( election ).performRoleElections(); - } - - @Test - public void whilePendingDoNotForceElectionOnThisMemberNotAvailable() throws Throwable - { - // Given - InstanceId me = new InstanceId( 1 ); - HighAvailabilityMemberContext context = new SimpleHighAvailabilityMemberContext( me, false ); - Election election = mock( Election.class ); - ClusterMemberEvents events = mock( ClusterMemberEvents.class ); - ClusterMemberListenerContainer memberListenerContainer = mockAddClusterMemberListener( events ); - - HighAvailabilityMemberStateMachine stateMachine = new StateMachineBuilder().withElection( election ) - .withEvents( events ).withContext( context ).build(); - stateMachine.init(); - - // Then - assertThat( stateMachine.getCurrentState(), equalTo( HighAvailabilityMemberState.PENDING ) ); - - memberListenerContainer.get().memberIsUnavailable( HighAvailabilityModeSwitcher.SLAVE, me ); - - //then - assertThat( stateMachine.getCurrentState(), equalTo( HighAvailabilityMemberState.PENDING ) ); - verifyZeroInteractions( election ); - } - @Test public void shouldRemainToPendingOnMasterElectedForSomeoneElse() throws Throwable { diff --git a/enterprise/ha/src/test/java/org/neo4j/kernel/ha/cluster/HighAvailabilityModeSwitcherTest.java b/enterprise/ha/src/test/java/org/neo4j/kernel/ha/cluster/HighAvailabilityModeSwitcherTest.java index cf70971dcd516..12eb1ae2fd448 100644 --- a/enterprise/ha/src/test/java/org/neo4j/kernel/ha/cluster/HighAvailabilityModeSwitcherTest.java +++ b/enterprise/ha/src/test/java/org/neo4j/kernel/ha/cluster/HighAvailabilityModeSwitcherTest.java @@ -434,7 +434,7 @@ public void shouldTakeNoActionIfSwitchingToSlaveForItselfAsMaster() throws Throw } @Test - public void shouldPostMemberUnavailableEvent() + public void shouldPerformForcedElections() { // Given ClusterMemberAvailability memberAvailability = mock( ClusterMemberAvailability.class ); @@ -445,16 +445,40 @@ public void shouldPostMemberUnavailableEvent() mock( InstanceId.class ), new DevNullLoggingService() ); // When - modeSwitcher.postMemberUnavailable(); + modeSwitcher.forceElections(); // Then InOrder inOrder = inOrder( memberAvailability, election ); inOrder.verify( memberAvailability ).memberIsUnavailable( HighAvailabilityModeSwitcher.SLAVE ); + inOrder.verify( election ).performRoleElections(); inOrder.verifyNoMoreInteractions(); } @Test - public void shouldPostMemberNotAvailableEventAfterModeSwitch() throws Throwable + public void shouldPerformForcedElectionsOnlyOnce() + { + // Given: HAMS + ClusterMemberAvailability memberAvailability = mock( ClusterMemberAvailability.class ); + Election election = mock( Election.class ); + + HighAvailabilityModeSwitcher modeSwitcher = new HighAvailabilityModeSwitcher( mock( SwitchToSlave.class ), + mock( SwitchToMaster.class ), election, memberAvailability, dependencyResolverMock(), + mock( InstanceId.class ), new DevNullLoggingService() ); + + // When: reelections are forced multiple times + modeSwitcher.forceElections(); + modeSwitcher.forceElections(); + modeSwitcher.forceElections(); + + // Then: instance sens out memberIsUnavailable and asks for elections and does this only once + InOrder inOrder = inOrder( memberAvailability, election ); + inOrder.verify( memberAvailability ).memberIsUnavailable( HighAvailabilityModeSwitcher.SLAVE ); + inOrder.verify( election ).performRoleElections(); + inOrder.verifyNoMoreInteractions(); + } + + @Test + public void shouldAllowForcedElectionsAfterModeSwitch() throws Throwable { // Given SwitchToSlave switchToSlave = mock( SwitchToSlave.class ); @@ -492,7 +516,7 @@ public Future answer( InvocationOnMock invocation ) throws Throwable modeSwitcher.init(); modeSwitcher.start(); - modeSwitcher.postMemberUnavailable(); + modeSwitcher.forceElections(); reset( memberAvailability, election ); // When @@ -500,11 +524,12 @@ public Future answer( InvocationOnMock invocation ) throws Throwable .class ), URI.create( "http://localhost:9090?serverId=42" ) ) ); modeSwitchHappened.await(); - modeSwitcher.postMemberUnavailable(); + modeSwitcher.forceElections(); // Then InOrder inOrder = inOrder( memberAvailability, election ); inOrder.verify( memberAvailability ).memberIsUnavailable( HighAvailabilityModeSwitcher.SLAVE ); + inOrder.verify( election ).performRoleElections(); inOrder.verifyNoMoreInteractions(); }