Skip to content

Commit

Permalink
Better error message when db stopped for store copy
Browse files Browse the repository at this point in the history
This commit improves error message when users (bolt or embedded) try to
start a transaction while cluster member is copying store. It is done using a
special `AvailabilityRequirement` for the `AvailabilityGuard` when local
database is stopped to perform a store copy.
  • Loading branch information
lutovich committed Feb 20, 2017
1 parent 3719737 commit 507b792
Show file tree
Hide file tree
Showing 7 changed files with 70 additions and 21 deletions.
Expand Up @@ -46,7 +46,10 @@

public class LocalDatabase implements Lifecycle
{
private static final AvailabilityRequirement NOT_STOPPED = availabilityRequirement( "Database is stopped" );
private static final AvailabilityRequirement NOT_STOPPED =
availabilityRequirement( "Database is stopped" );
private static final AvailabilityRequirement NOT_COPYING_STORE =
availabilityRequirement( "Database is stopped to copy store from another cluster member" );

private final File storeDir;

Expand All @@ -60,7 +63,7 @@ public class LocalDatabase implements Lifecycle

private volatile StoreId storeId;
private volatile DatabaseHealth databaseHealth;
private boolean started = false;
private AvailabilityRequirement currentRequirement;

private volatile TransactionCommitProcess localCommit;

Expand All @@ -79,7 +82,7 @@ public LocalDatabase( File storeDir, StoreFiles storeFiles,
this.availabilityGuard = availabilityGuard;
this.log = logProvider.getLog( getClass() );

raiseAvailabilityGuard();
raiseAvailabilityGuard( NOT_STOPPED );
}

@Override
Expand All @@ -97,24 +100,28 @@ public synchronized void start() throws Throwable
dataSourceManager.start();

dropAvailabilityGuard();
started = true;
}

@Override
public synchronized void stop() throws Throwable
public void stop() throws Throwable
{
log.info( "Stopping" );
databaseHealth = null;
localCommit = null;
dataSourceManager.stop();
stopWithRequirement( NOT_STOPPED );
}

raiseAvailabilityGuard();
started = false;
/**
* Stop database to perform a store copy. This will raise {@link AvailabilityGuard} with
* a more friendly blocking requirement.
*
* @throws Throwable if any of the components are unable to stop.
*/
public void stopForStoreCopy() throws Throwable
{
stopWithRequirement( NOT_COPYING_STORE );
}

public boolean isAvailable()
{
return started;
return currentRequirement == null;
}

@Override
Expand All @@ -125,7 +132,7 @@ public void shutdown() throws Throwable

public synchronized StoreId storeId()
{
if ( started )
if ( isAvailable() )
{
return storeId;
}
Expand Down Expand Up @@ -226,13 +233,31 @@ public TransactionCommitProcess getCommitProcess()
return localCommit;
}

private void raiseAvailabilityGuard()
private synchronized void stopWithRequirement( AvailabilityRequirement requirement ) throws Throwable
{
log.info( "Stopping, reason: " + requirement.description() );
databaseHealth = null;
localCommit = null;
dataSourceManager.stop();

raiseAvailabilityGuard( requirement );
}

private void raiseAvailabilityGuard( AvailabilityRequirement requirement )
{
availabilityGuard.require( NOT_STOPPED );
// it is possible for the local database to be created and stopped right after that to perform a store copy
// in this case we need to impose new requirement and drop the old one
availabilityGuard.require( requirement );
if ( currentRequirement != null )
{
dropAvailabilityGuard();
}
currentRequirement = requirement;
}

private void dropAvailabilityGuard()
{
availabilityGuard.fulfill( NOT_STOPPED );
availabilityGuard.fulfill( currentRequirement );
currentRequirement = null;
}
}
Expand Up @@ -305,7 +305,7 @@ private void downloadDatabase( MemberId core, StoreId localStoreId )
{
try
{
localDatabase.stop();
localDatabase.stopForStoreCopy();
startStopOnStoreCopy.stop();
}
catch ( Throwable throwable )
Expand Down
Expand Up @@ -83,7 +83,7 @@ public synchronized void downloadSnapshot( MemberId source, CoreState coreState
}

startStopOnStoreCopy.stop();
localDatabase.stop();
localDatabase.stopForStoreCopy();

log.info( "Downloading snapshot from core server at %s", source );

Expand Down
Expand Up @@ -81,6 +81,22 @@ public void availabilityGuardRaisedOnStop() throws Throwable
assertDatabaseIsStoppedAndUnavailable( guard );
}

@Test
public void availabilityGuardRaisedOnStopForStoreCopy() throws Throwable
{
AvailabilityGuard guard = newAvailabilityGuard();
assertTrue( guard.isAvailable() );

LocalDatabase localDatabase = newLocalDatabase( guard );
assertFalse( guard.isAvailable() );

localDatabase.start();
assertTrue( guard.isAvailable() );

localDatabase.stopForStoreCopy();
assertDatabaseIsStoppedForStoreCopyAndUnavailable( guard );
}

private static LocalDatabase newLocalDatabase( AvailabilityGuard availabilityGuard )
{
return new LocalDatabase( mock( File.class ), mock( StoreFiles.class ), mock( DataSourceManager.class ),
Expand All @@ -98,4 +114,10 @@ private static void assertDatabaseIsStoppedAndUnavailable( AvailabilityGuard gua
assertFalse( guard.isAvailable() );
assertThat( guard.describeWhoIsBlocking(), containsString( "Database is stopped" ) );
}

private static void assertDatabaseIsStoppedForStoreCopyAndUnavailable( AvailabilityGuard guard )
{
assertFalse( guard.isAvailable() );
assertThat( guard.describeWhoIsBlocking(), containsString( "Database is stopped to copy store" ) );
}
}
Expand Up @@ -170,7 +170,7 @@ public void nextStateShouldBeTxPullingAfterASuccessfulStoreCopy() throws Throwab
timeoutService.invokeTimeout( TX_PULLER_TIMEOUT );

// then
verify( localDatabase ).stop();
verify( localDatabase ).stopForStoreCopy();
verify( startStopOnStoreCopy ).stop();
verify( storeCopyProcess ).replaceWithStoreFrom( any( MemberId.class ), eq( storeId ) );
verify( localDatabase ).start();
Expand Down
Expand Up @@ -27,9 +27,9 @@

import org.neo4j.causalclustering.catchup.CatchUpClient;
import org.neo4j.causalclustering.catchup.storecopy.LocalDatabase;
import org.neo4j.causalclustering.catchup.storecopy.RemoteStore;
import org.neo4j.causalclustering.catchup.storecopy.StoreCopyFailedException;
import org.neo4j.causalclustering.catchup.storecopy.StoreCopyProcess;
import org.neo4j.causalclustering.catchup.storecopy.RemoteStore;
import org.neo4j.causalclustering.core.state.CoreState;
import org.neo4j.causalclustering.core.state.machines.CoreStateMachines;
import org.neo4j.causalclustering.identity.MemberId;
Expand Down Expand Up @@ -100,7 +100,7 @@ public void shouldStopDatabaseDuringDownload() throws Throwable

// then
verify( startStopLife ).stop();
verify( localDatabase ).stop();
verify( localDatabase ).stopForStoreCopy();
verify( localDatabase ).start();
verify( startStopLife ).start();
}
Expand Down
Expand Up @@ -41,6 +41,7 @@
import org.neo4j.kernel.monitoring.Monitors;
import org.neo4j.test.causalclustering.ClusterRule;

import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.instanceOf;
import static org.hamcrest.Matchers.is;
import static org.junit.Assert.assertThat;
Expand Down Expand Up @@ -81,6 +82,7 @@ public void shouldNotBePossibleToStartTransactionsWhenReadReplicaCopiesStore() t
catch ( Exception e )
{
assertThat( e, instanceOf( TransactionFailureException.class ) );
assertThat( e.getMessage(), containsString( "Database is stopped to copy store" ) );
}
}
finally
Expand Down

0 comments on commit 507b792

Please sign in to comment.