From e1369b8193cd64ab5ea40cbf2b176018bca6c532 Mon Sep 17 00:00:00 2001 From: Martin Furmanski Date: Tue, 17 Jan 2017 11:42:38 +0100 Subject: [PATCH] remove timeout for connecting to core It is not clear why it was added to begin with. The simplest solution is just to let it keep connecting until the core becomes available. --- .../causalclustering/ReplicationModule.java | 5 ++- .../core/replication/RaftReplicator.java | 2 +- .../tx/NeoStoreTransactionCounter.java | 44 ------------------- .../ConstantTimeRetryStrategy.java | 2 +- .../ExponentialBackoffStrategy.java | 5 ++- .../machines/tx => helper}/RetryStrategy.java | 2 +- .../EnterpriseReadReplicaEditionModule.java | 5 ++- .../ReadReplicaStartupProcess.java | 39 +++++----------- .../core/replication/RaftReplicatorTest.java | 8 ++-- .../ExponentialBackoffStrategyTest.java | 2 +- .../ReadReplicaStartupProcessTest.java | 11 ++--- 11 files changed, 34 insertions(+), 91 deletions(-) delete mode 100644 enterprise/causal-clustering/src/main/java/org/neo4j/causalclustering/core/state/machines/tx/NeoStoreTransactionCounter.java rename enterprise/causal-clustering/src/main/java/org/neo4j/causalclustering/{core/state/machines/tx => helper}/ConstantTimeRetryStrategy.java (96%) rename enterprise/causal-clustering/src/main/java/org/neo4j/causalclustering/{core/state/machines/tx => helper}/ExponentialBackoffStrategy.java (95%) rename enterprise/causal-clustering/src/main/java/org/neo4j/causalclustering/{core/state/machines/tx => helper}/RetryStrategy.java (94%) rename enterprise/causal-clustering/src/test/java/org/neo4j/causalclustering/{core/state/machines/tx => helper}/ExponentialBackoffStrategyTest.java (97%) diff --git a/enterprise/causal-clustering/src/main/java/org/neo4j/causalclustering/ReplicationModule.java b/enterprise/causal-clustering/src/main/java/org/neo4j/causalclustering/ReplicationModule.java index 28fd066d2de78..6f0ac9d1caa24 100644 --- a/enterprise/causal-clustering/src/main/java/org/neo4j/causalclustering/ReplicationModule.java +++ b/enterprise/causal-clustering/src/main/java/org/neo4j/causalclustering/ReplicationModule.java @@ -30,7 +30,7 @@ import org.neo4j.causalclustering.core.replication.session.GlobalSession; import org.neo4j.causalclustering.core.replication.session.GlobalSessionTrackerState; import org.neo4j.causalclustering.core.replication.session.LocalSessionPool; -import org.neo4j.causalclustering.core.state.machines.tx.ExponentialBackoffStrategy; +import org.neo4j.causalclustering.helper.ExponentialBackoffStrategy; import org.neo4j.causalclustering.core.state.storage.DurableStateStorage; import org.neo4j.causalclustering.identity.MemberId; import org.neo4j.causalclustering.messaging.Outbound; @@ -67,8 +67,9 @@ public ReplicationModule( MemberId myself, PlatformModule platformModule, Config LocalSessionPool sessionPool = new LocalSessionPool( myGlobalSession ); progressTracker = new ProgressTrackerImpl( myGlobalSession ); + ExponentialBackoffStrategy retryStrategy = new ExponentialBackoffStrategy( 10, 60, SECONDS ); replicator = life.add( new RaftReplicator( consensusModule.raftMachine(), myself, outbound, sessionPool, - progressTracker, new ExponentialBackoffStrategy( 10, 60, SECONDS ) ) ); + progressTracker, retryStrategy ) ); } public RaftReplicator getReplicator() diff --git a/enterprise/causal-clustering/src/main/java/org/neo4j/causalclustering/core/replication/RaftReplicator.java b/enterprise/causal-clustering/src/main/java/org/neo4j/causalclustering/core/replication/RaftReplicator.java index 307124fb5ee4f..8b3cee17b5708 100644 --- a/enterprise/causal-clustering/src/main/java/org/neo4j/causalclustering/core/replication/RaftReplicator.java +++ b/enterprise/causal-clustering/src/main/java/org/neo4j/causalclustering/core/replication/RaftReplicator.java @@ -28,7 +28,7 @@ import org.neo4j.causalclustering.messaging.Outbound; import org.neo4j.causalclustering.core.replication.session.LocalSessionPool; import org.neo4j.causalclustering.core.replication.session.OperationContext; -import org.neo4j.causalclustering.core.state.machines.tx.RetryStrategy; +import org.neo4j.causalclustering.helper.RetryStrategy; import org.neo4j.causalclustering.identity.MemberId; import org.neo4j.kernel.impl.util.Listener; import org.neo4j.kernel.lifecycle.LifecycleAdapter; diff --git a/enterprise/causal-clustering/src/main/java/org/neo4j/causalclustering/core/state/machines/tx/NeoStoreTransactionCounter.java b/enterprise/causal-clustering/src/main/java/org/neo4j/causalclustering/core/state/machines/tx/NeoStoreTransactionCounter.java deleted file mode 100644 index f8a1e828d162f..0000000000000 --- a/enterprise/causal-clustering/src/main/java/org/neo4j/causalclustering/core/state/machines/tx/NeoStoreTransactionCounter.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2002-2017 "Neo Technology," - * Network Engine for Objects in Lund AB [http://neotechnology.com] - * - * This file is part of Neo4j. - * - * Neo4j is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as - * published by the Free Software Foundation, either version 3 of the - * License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ -package org.neo4j.causalclustering.core.state.machines.tx; - -import org.neo4j.kernel.impl.store.NeoStores; - -public class NeoStoreTransactionCounter implements TransactionCounter -{ - private final NeoStores neoStore; - - public NeoStoreTransactionCounter( NeoStores neoStore ) - { - this.neoStore = neoStore; - } - - @Override - public long lastCommittedTransactionId() - { - throw new IllegalArgumentException( ); - } - -// @Override -// public long lastCommittedTransactionId() -// { -// return neoStore.getLastCommittedTransactionId(); -// } -} diff --git a/enterprise/causal-clustering/src/main/java/org/neo4j/causalclustering/core/state/machines/tx/ConstantTimeRetryStrategy.java b/enterprise/causal-clustering/src/main/java/org/neo4j/causalclustering/helper/ConstantTimeRetryStrategy.java similarity index 96% rename from enterprise/causal-clustering/src/main/java/org/neo4j/causalclustering/core/state/machines/tx/ConstantTimeRetryStrategy.java rename to enterprise/causal-clustering/src/main/java/org/neo4j/causalclustering/helper/ConstantTimeRetryStrategy.java index d40dedb0f4e7f..7badf8f7e433e 100644 --- a/enterprise/causal-clustering/src/main/java/org/neo4j/causalclustering/core/state/machines/tx/ConstantTimeRetryStrategy.java +++ b/enterprise/causal-clustering/src/main/java/org/neo4j/causalclustering/helper/ConstantTimeRetryStrategy.java @@ -17,7 +17,7 @@ * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . */ -package org.neo4j.causalclustering.core.state.machines.tx; +package org.neo4j.causalclustering.helper; import java.util.concurrent.TimeUnit; diff --git a/enterprise/causal-clustering/src/main/java/org/neo4j/causalclustering/core/state/machines/tx/ExponentialBackoffStrategy.java b/enterprise/causal-clustering/src/main/java/org/neo4j/causalclustering/helper/ExponentialBackoffStrategy.java similarity index 95% rename from enterprise/causal-clustering/src/main/java/org/neo4j/causalclustering/core/state/machines/tx/ExponentialBackoffStrategy.java rename to enterprise/causal-clustering/src/main/java/org/neo4j/causalclustering/helper/ExponentialBackoffStrategy.java index 829e7aa4b0477..ea6693ea7db8e 100644 --- a/enterprise/causal-clustering/src/main/java/org/neo4j/causalclustering/core/state/machines/tx/ExponentialBackoffStrategy.java +++ b/enterprise/causal-clustering/src/main/java/org/neo4j/causalclustering/helper/ExponentialBackoffStrategy.java @@ -17,10 +17,13 @@ * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . */ -package org.neo4j.causalclustering.core.state.machines.tx; +package org.neo4j.causalclustering.helper; import java.util.concurrent.TimeUnit; +/** + * Exponential backoff strategy helper class. + */ public class ExponentialBackoffStrategy implements RetryStrategy { private final long initialBackoffTimeMillis; diff --git a/enterprise/causal-clustering/src/main/java/org/neo4j/causalclustering/core/state/machines/tx/RetryStrategy.java b/enterprise/causal-clustering/src/main/java/org/neo4j/causalclustering/helper/RetryStrategy.java similarity index 94% rename from enterprise/causal-clustering/src/main/java/org/neo4j/causalclustering/core/state/machines/tx/RetryStrategy.java rename to enterprise/causal-clustering/src/main/java/org/neo4j/causalclustering/helper/RetryStrategy.java index 032c24702fb7d..f0fffc17362a9 100644 --- a/enterprise/causal-clustering/src/main/java/org/neo4j/causalclustering/core/state/machines/tx/RetryStrategy.java +++ b/enterprise/causal-clustering/src/main/java/org/neo4j/causalclustering/helper/RetryStrategy.java @@ -17,7 +17,7 @@ * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . */ -package org.neo4j.causalclustering.core.state.machines.tx; +package org.neo4j.causalclustering.helper; public interface RetryStrategy { diff --git a/enterprise/causal-clustering/src/main/java/org/neo4j/causalclustering/readreplica/EnterpriseReadReplicaEditionModule.java b/enterprise/causal-clustering/src/main/java/org/neo4j/causalclustering/readreplica/EnterpriseReadReplicaEditionModule.java index 6e9671499770d..f9dcb733773b2 100644 --- a/enterprise/causal-clustering/src/main/java/org/neo4j/causalclustering/readreplica/EnterpriseReadReplicaEditionModule.java +++ b/enterprise/causal-clustering/src/main/java/org/neo4j/causalclustering/readreplica/EnterpriseReadReplicaEditionModule.java @@ -37,7 +37,7 @@ import org.neo4j.causalclustering.catchup.tx.TxPullClient; import org.neo4j.causalclustering.core.CausalClusteringSettings; import org.neo4j.causalclustering.core.consensus.schedule.DelayedRenewableTimeoutService; -import org.neo4j.causalclustering.core.state.machines.tx.ExponentialBackoffStrategy; +import org.neo4j.causalclustering.helper.ExponentialBackoffStrategy; import org.neo4j.causalclustering.discovery.DiscoveryServiceFactory; import org.neo4j.causalclustering.discovery.TopologyService; import org.neo4j.causalclustering.discovery.procedures.ReadReplicaRoleProcedure; @@ -249,9 +249,10 @@ private OnlineBackupKernelExtension pickBackupExtension( NeoStoreDataSource data txPulling.add( catchupTimeoutService ); txPulling.add( new WaitForUpToDateStore( catchupProcess, logProvider ) ); + ExponentialBackoffStrategy retryStrategy = new ExponentialBackoffStrategy( 1, 30, TimeUnit.SECONDS ); life.add( new ReadReplicaStartupProcess( platformModule.fileSystem, storeFetcher, localDatabase, txPulling, new ConnectToRandomCoreMember( discoveryService ), - new ExponentialBackoffStrategy( 1, 30, TimeUnit.SECONDS ), logProvider, + retryStrategy, logProvider, platformModule.logging.getUserLogProvider(), copiedStoreRecovery ) ); dependencies.satisfyDependency( createSessionTracker() ); diff --git a/enterprise/causal-clustering/src/main/java/org/neo4j/causalclustering/readreplica/ReadReplicaStartupProcess.java b/enterprise/causal-clustering/src/main/java/org/neo4j/causalclustering/readreplica/ReadReplicaStartupProcess.java index 4cee7e8b0176b..49b22cf394adf 100644 --- a/enterprise/causal-clustering/src/main/java/org/neo4j/causalclustering/readreplica/ReadReplicaStartupProcess.java +++ b/enterprise/causal-clustering/src/main/java/org/neo4j/causalclustering/readreplica/ReadReplicaStartupProcess.java @@ -20,7 +20,6 @@ package org.neo4j.causalclustering.readreplica; import java.io.IOException; -import java.util.concurrent.locks.LockSupport; import org.neo4j.causalclustering.catchup.storecopy.CopiedStoreRecovery; import org.neo4j.causalclustering.catchup.storecopy.LocalDatabase; @@ -28,7 +27,7 @@ import org.neo4j.causalclustering.catchup.storecopy.StoreFetcher; import org.neo4j.causalclustering.catchup.storecopy.StoreIdDownloadFailedException; import org.neo4j.causalclustering.catchup.storecopy.StreamingTransactionsFailedException; -import org.neo4j.causalclustering.core.state.machines.tx.RetryStrategy; +import org.neo4j.causalclustering.helper.RetryStrategy; import org.neo4j.causalclustering.identity.MemberId; import org.neo4j.causalclustering.identity.StoreId; import org.neo4j.causalclustering.messaging.routing.CoreMemberSelectionException; @@ -42,8 +41,6 @@ class ReadReplicaStartupProcess implements Lifecycle { - private static final int MAX_ATTEMPTS = 5; - private final FileSystemAbstraction fs; private final StoreFetcher storeFetcher; private final LocalDatabase localDatabase; @@ -88,14 +85,22 @@ public void start() throws IOException { boolean syncedWithCore = false; RetryStrategy.Timeout timeout = retryStrategy.newTimeout(); - for ( int attempt = 1; attempt <= MAX_ATTEMPTS && !syncedWithCore; attempt++ ) + int attempt = 0; + while ( !syncedWithCore ) { - MemberId source = findCoreMemberToCopyFrom(); + attempt++; + MemberId source = null; try { + source = connectionStrategy.coreMember(); syncStoreWithCore( source ); syncedWithCore = true; } + catch ( CoreMemberSelectionException e ) + { + lastIssue = issueOf( "finding core member", attempt ); + debugLog.warn( lastIssue ); + } catch ( StoreCopyFailedException e ) { lastIssue = issueOf( format( "copying store files from %s", source ), attempt ); @@ -124,8 +129,6 @@ public void start() throws IOException debugLog.warn( lastIssue ); break; } - - attempt++; } if ( !syncedWithCore ) @@ -180,26 +183,6 @@ private void ensureSameStoreIdAs( MemberId remoteCore ) throws StoreIdDownloadFa } } - private MemberId findCoreMemberToCopyFrom() - { - RetryStrategy.Timeout timeout = retryStrategy.newTimeout(); - while ( true ) - { - try - { - MemberId memberId = connectionStrategy.coreMember(); - debugLog.info( "Server starting, connecting to core server %s", memberId ); - return memberId; - } - catch ( CoreMemberSelectionException ex ) - { - debugLog.info( "Failed to connect to core server. Retrying in %d ms.", timeout.getMillis() ); - LockSupport.parkUntil( timeout.getMillis() + System.currentTimeMillis() ); - timeout.increment(); - } - } - } - @Override public void stop() throws Throwable { diff --git a/enterprise/causal-clustering/src/test/java/org/neo4j/causalclustering/core/replication/RaftReplicatorTest.java b/enterprise/causal-clustering/src/test/java/org/neo4j/causalclustering/core/replication/RaftReplicatorTest.java index e728466901230..f8caf719d0d3f 100644 --- a/enterprise/causal-clustering/src/test/java/org/neo4j/causalclustering/core/replication/RaftReplicatorTest.java +++ b/enterprise/causal-clustering/src/test/java/org/neo4j/causalclustering/core/replication/RaftReplicatorTest.java @@ -32,13 +32,12 @@ import org.neo4j.causalclustering.messaging.Outbound; import org.neo4j.causalclustering.core.replication.session.GlobalSession; import org.neo4j.causalclustering.core.replication.session.LocalSessionPool; -import org.neo4j.causalclustering.core.state.machines.tx.ConstantTimeRetryStrategy; -import org.neo4j.causalclustering.core.state.machines.tx.RetryStrategy; +import org.neo4j.causalclustering.helper.ConstantTimeRetryStrategy; +import org.neo4j.causalclustering.helper.RetryStrategy; import org.neo4j.causalclustering.core.state.Result; import org.neo4j.causalclustering.identity.MemberId; import static java.util.concurrent.TimeUnit.MILLISECONDS; -import static java.util.concurrent.TimeUnit.SECONDS; import static junit.framework.TestCase.assertEquals; import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.CoreMatchers.not; @@ -56,7 +55,7 @@ public class RaftReplicatorTest private MemberId leader = new MemberId( UUID.randomUUID() ); private GlobalSession session = new GlobalSession( UUID.randomUUID(), myself ); private LocalSessionPool sessionPool = new LocalSessionPool( session ); - private RetryStrategy retryStrategy = new ConstantTimeRetryStrategy( 1, SECONDS ); + private RetryStrategy retryStrategy = new ConstantTimeRetryStrategy( 0, MILLISECONDS ); @Test public void shouldSendReplicatedContentToLeader() throws Exception @@ -94,7 +93,6 @@ public void shouldResendAfterTimeout() throws Exception CapturingProgressTracker capturedProgress = new CapturingProgressTracker(); CapturingOutbound outbound = new CapturingOutbound(); - ConstantTimeRetryStrategy retryStrategy = new ConstantTimeRetryStrategy( 100, MILLISECONDS ); RaftReplicator replicator = new RaftReplicator( leaderLocator, myself, outbound, sessionPool, capturedProgress, retryStrategy ); diff --git a/enterprise/causal-clustering/src/test/java/org/neo4j/causalclustering/core/state/machines/tx/ExponentialBackoffStrategyTest.java b/enterprise/causal-clustering/src/test/java/org/neo4j/causalclustering/helper/ExponentialBackoffStrategyTest.java similarity index 97% rename from enterprise/causal-clustering/src/test/java/org/neo4j/causalclustering/core/state/machines/tx/ExponentialBackoffStrategyTest.java rename to enterprise/causal-clustering/src/test/java/org/neo4j/causalclustering/helper/ExponentialBackoffStrategyTest.java index 568fa281ea302..c1ebec812369d 100644 --- a/enterprise/causal-clustering/src/test/java/org/neo4j/causalclustering/core/state/machines/tx/ExponentialBackoffStrategyTest.java +++ b/enterprise/causal-clustering/src/test/java/org/neo4j/causalclustering/helper/ExponentialBackoffStrategyTest.java @@ -17,7 +17,7 @@ * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . */ -package org.neo4j.causalclustering.core.state.machines.tx; +package org.neo4j.causalclustering.helper; import org.junit.Test; diff --git a/enterprise/causal-clustering/src/test/java/org/neo4j/causalclustering/readreplica/ReadReplicaStartupProcessTest.java b/enterprise/causal-clustering/src/test/java/org/neo4j/causalclustering/readreplica/ReadReplicaStartupProcessTest.java index 967d32aac5b60..ac8eba074a56c 100644 --- a/enterprise/causal-clustering/src/test/java/org/neo4j/causalclustering/readreplica/ReadReplicaStartupProcessTest.java +++ b/enterprise/causal-clustering/src/test/java/org/neo4j/causalclustering/readreplica/ReadReplicaStartupProcessTest.java @@ -29,7 +29,7 @@ import org.neo4j.causalclustering.catchup.storecopy.LocalDatabase; import org.neo4j.causalclustering.catchup.storecopy.StoreFetcher; import org.neo4j.causalclustering.catchup.storecopy.StoreIdDownloadFailedException; -import org.neo4j.causalclustering.core.state.machines.tx.ConstantTimeRetryStrategy; +import org.neo4j.causalclustering.helper.ConstantTimeRetryStrategy; import org.neo4j.causalclustering.discovery.CoreTopology; import org.neo4j.causalclustering.discovery.TopologyService; import org.neo4j.causalclustering.identity.MemberId; @@ -52,6 +52,7 @@ public class ReadReplicaStartupProcessTest { + private ConstantTimeRetryStrategy retryStrategy = new ConstantTimeRetryStrategy( 1, MILLISECONDS ); private CopiedStoreRecovery copiedStoreRecovery = mock( CopiedStoreRecovery.class ); private FileSystemAbstraction fs = mock( FileSystemAbstraction.class ); private StoreFetcher storeFetcher = mock( StoreFetcher.class ); @@ -83,7 +84,7 @@ public void shouldReplaceEmptyStoreWithRemote() throws Throwable ReadReplicaStartupProcess readReplicaStartupProcess = new ReadReplicaStartupProcess( fs, storeFetcher, localDatabase, txPulling, - new AlwaysChooseFirstMember( hazelcastTopology ), new ConstantTimeRetryStrategy( 1, MILLISECONDS ), + new AlwaysChooseFirstMember( hazelcastTopology ), retryStrategy, NullLogProvider.getInstance(), NullLogProvider.getInstance(), copiedStoreRecovery ); // when @@ -104,7 +105,7 @@ public void shouldNotStartWithMismatchedNonEmptyStore() throws Throwable ReadReplicaStartupProcess readReplicaStartupProcess = new ReadReplicaStartupProcess( fs, storeFetcher, localDatabase, txPulling, - new AlwaysChooseFirstMember( hazelcastTopology ), new ConstantTimeRetryStrategy( 1, MILLISECONDS ), + new AlwaysChooseFirstMember( hazelcastTopology ), retryStrategy, NullLogProvider.getInstance(), NullLogProvider.getInstance(), copiedStoreRecovery ); // when @@ -134,7 +135,7 @@ public void shouldStartWithMatchingDatabase() throws Throwable ReadReplicaStartupProcess readReplicaStartupProcess = new ReadReplicaStartupProcess( fs, storeFetcher, localDatabase, txPulling, - new AlwaysChooseFirstMember( hazelcastTopology ), new ConstantTimeRetryStrategy( 1, MILLISECONDS ), + new AlwaysChooseFirstMember( hazelcastTopology ), retryStrategy, NullLogProvider.getInstance(), NullLogProvider.getInstance(), copiedStoreRecovery ); // when @@ -154,7 +155,7 @@ public void stopShouldStopTheDatabaseAndStopPolling() throws Throwable ReadReplicaStartupProcess readReplicaStartupProcess = new ReadReplicaStartupProcess( fs, storeFetcher, localDatabase, txPulling, - new AlwaysChooseFirstMember( hazelcastTopology ), new ConstantTimeRetryStrategy( 1, MILLISECONDS ), + new AlwaysChooseFirstMember( hazelcastTopology ), retryStrategy, NullLogProvider.getInstance(), NullLogProvider.getInstance(), copiedStoreRecovery ); readReplicaStartupProcess.start();