Skip to content

Commit

Permalink
Resolves a deadlock scenario applying constraint
Browse files Browse the repository at this point in the history
The scenario, which takes place on database instance applying constraint
creation as an external transaction, looks like this:

- Transaction T1 creates the constraint index and population P starts
- Transaction T2 which activates the constraint starts applying
  and now has a read lock on the counts store
- Check point triggers, wants to rotate counts store and so acquires
  its write lock. It will have to block, but doing so will also blocks
  further read lock requests
- T2 moves on to activate the constraint. Doing so means first waiting for
  the index to come online
- P moves on to flip after population, something which includes initializing
  some sample data in counts store for this index. Will block on the counts
  store read lock, completing the deadlock

The solution in this commit breaks the deadlock by having T2 unlock its
read lock on the counts store before activating the constraint, i.e.
as soon as it notices that it is a schema transaction. Schema transactions
will not update the counts store anyway.
  • Loading branch information
tinwelint committed Mar 29, 2017
1 parent ffcc0cd commit 4ebfb55
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 4 deletions.
Expand Up @@ -23,6 +23,7 @@

import org.neo4j.kernel.impl.store.counts.CountsTracker;
import org.neo4j.kernel.impl.transaction.command.Command;
import org.neo4j.kernel.impl.transaction.command.Command.SchemaRuleCommand;
import org.neo4j.storageengine.api.TransactionApplicationMode;

public class CountsStoreTransactionApplier extends TransactionApplier.Adapter
Expand All @@ -40,6 +41,11 @@ public CountsStoreTransactionApplier( TransactionApplicationMode mode, CountsAcc
public void close() throws Exception
{
assert countsUpdater != null || mode == TransactionApplicationMode.RECOVERY : "You must call begin first";
closeCountsUpdaterIfOpen();
}

private void closeCountsUpdaterIfOpen()
{
if ( countsUpdater != null )
{ // CountsUpdater is null if we're in recovery and the counts store already has had this transaction applied.
countsUpdater.close();
Expand Down Expand Up @@ -68,4 +74,15 @@ public boolean visitRelationshipCountsCommand( Command.RelationshipCountsCommand
}
return false;
}

@Override
public boolean visitSchemaRuleCommand( SchemaRuleCommand command ) throws IOException
{
// This shows that this transaction is a schema transaction, so it cannot have commands
// updating any counts anyway. Therefore the counts updater is closed right away.
// This also breaks an otherwise deadlocking scenario between check pointer, this applier
// and an index population thread wanting to apply index sampling to the counts store.
closeCountsUpdaterIfOpen();
return false;
}
}
Expand Up @@ -374,6 +374,9 @@ protected BatchTransactionApplierFacade applier( TransactionApplicationMode mode
appliers.add( new CacheInvalidationBatchTransactionApplier( neoStores, cacheAccess ) );
}

// Counts store application
appliers.add( new CountsStoreBatchTransactionApplier( neoStores.getCounts(), mode ) );

// Schema index application
appliers.add( new IndexBatchTransactionApplier( indexingService, labelScanStoreSync, indexUpdatesSync,
neoStores.getNodeStore(), new PropertyLoader( neoStores ),
Expand All @@ -384,9 +387,6 @@ protected BatchTransactionApplierFacade applier( TransactionApplicationMode mode
new LegacyBatchIndexApplier( indexConfigStore, legacyIndexApplierLookup, legacyIndexTransactionOrdering,
mode ) );

// Counts store application
appliers.add( new CountsStoreBatchTransactionApplier( neoStores.getCounts(), mode ) );

// Perform the application
return new BatchTransactionApplierFacade(
appliers.toArray( new BatchTransactionApplier[appliers.size()] ) );
Expand Down
Expand Up @@ -117,7 +117,15 @@ public void indexPopulationScanComplete()
Future<Object> checkPointer = t3.execute( state ->
db.getDependencyResolver().resolveDependency( CheckPointer.class )
.forceCheckPoint( new SimpleTriggerInfo( "MANUAL" ) ) );
t3.get().waitUntilWaiting( details -> details.isAt( LockWrapper.class, "writeLock" ) );
try
{
t3.get().waitUntilWaiting( details -> details.isAt( LockWrapper.class, "writeLock" ) );
}
catch ( IllegalStateException e )
{
// Thrown when the fix is in, basically it's thrown if the check pointer didn't get blocked
checkPointer.get(); // to assert that no exception was thrown during in check point thread
}

// Alright the trap is set. Let the population thread move on and seal the deal
controller.release();
Expand Down

0 comments on commit 4ebfb55

Please sign in to comment.