diff --git a/community/kernel/src/test/java/org/neo4j/kernel/impl/transaction/log/checkpoint/CheckPointerConstraintCreationDeadlockIT.java b/community/kernel/src/test/java/org/neo4j/kernel/impl/transaction/log/checkpoint/CheckPointerConstraintCreationDeadlockIT.java new file mode 100644 index 000000000000..afc886a79e97 --- /dev/null +++ b/community/kernel/src/test/java/org/neo4j/kernel/impl/transaction/log/checkpoint/CheckPointerConstraintCreationDeadlockIT.java @@ -0,0 +1,192 @@ +/* + * Copyright (c) 2002-2017 "Neo Technology," + * Network Engine for Objects in Lund AB [http://neotechnology.com] + * + * This file is part of Neo4j. + * + * Neo4j is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package org.neo4j.kernel.impl.transaction.log.checkpoint; + +import org.junit.Rule; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.Future; + +import org.neo4j.graphdb.Label; +import org.neo4j.graphdb.Transaction; +import org.neo4j.kernel.api.exceptions.TransactionFailureException; +import org.neo4j.kernel.impl.api.TransactionCommitProcess; +import org.neo4j.kernel.impl.api.TransactionToApply; +import org.neo4j.kernel.impl.api.index.IndexingService; +import org.neo4j.kernel.impl.locking.LockWrapper; +import org.neo4j.kernel.impl.transaction.TransactionRepresentation; +import org.neo4j.kernel.impl.transaction.log.LogicalTransactionStore; +import org.neo4j.kernel.impl.transaction.log.TransactionCursor; +import org.neo4j.kernel.impl.transaction.log.TransactionIdStore; +import org.neo4j.kernel.internal.GraphDatabaseAPI; +import org.neo4j.kernel.monitoring.Monitors; +import org.neo4j.test.Barrier; +import org.neo4j.test.EphemeralFileSystemRule; +import org.neo4j.test.OtherThreadRule; +import org.neo4j.test.TestGraphDatabaseFactory; +import org.neo4j.test.TestLabels; + +import static java.util.concurrent.TimeUnit.SECONDS; + +import static org.neo4j.kernel.impl.transaction.tracing.CommitEvent.NULL; +import static org.neo4j.storageengine.api.TransactionApplicationMode.EXTERNAL; + +/** + * The scenario, which takes place on database instance applying constraint + * creation as an external transaction, looks like this: + * + *
    + *
  1. + * Transaction T1 creates the constraint index and population P starts + *
  2. + *
  3. + * Transaction T2 which activates the constraint starts applying and now has a read lock on the counts store + *
  4. + *
  5. + * Check point triggers, wants to rotate counts store and so acquires its write lock. + * It will have to block, but doing so will also blocks further read lock requests + *
  6. + *
  7. + * T2 moves on to activate the constraint. Doing so means first waiting for the index to come online + *
  8. + *
  9. + * P moves on to flip after population, something which includes initializing some sample data in counts store + * for this index. Will block on the counts store read lock, completing the deadlock + *
  10. + *
+ */ +public class CheckPointerConstraintCreationDeadlockIT +{ + private static final Label LABEL = TestLabels.LABEL_ONE; + private static final String KEY = "key"; + + @Rule + public final EphemeralFileSystemRule fs = new EphemeralFileSystemRule(); + @Rule + public final OtherThreadRule t2 = new OtherThreadRule<>( "T2" ); + @Rule + public final OtherThreadRule t3 = new OtherThreadRule<>( "T3" ); + + @Test + public void shouldNotDeadlock() throws Exception + { + List transactions = createConstraintCreatingTransactions(); + Monitors monitors = new Monitors(); + GraphDatabaseAPI db = (GraphDatabaseAPI) new TestGraphDatabaseFactory() + .setMonitors( monitors ).newImpermanentDatabase(); + Barrier.Control controller = new Barrier.Control(); + boolean success = false; + try + { + IndexingService.Monitor monitor = new IndexingService.MonitorAdapter() + { + @Override + public void indexPopulationScanComplete() + { + controller.reached(); + } + }; + monitors.addMonitorListener( monitor ); + Future applier = applyInT2( db, transactions ); + + controller.await(); + + // At this point the index population has completed and the population thread is ready to + // acquire the counts store read lock for initializing some samples there. We're starting the + // check pointer, which will eventually put itself in queue for acquiring the write lock + + Future checkPointer = t3.execute( state -> + db.getDependencyResolver().resolveDependency( CheckPointer.class ) + .forceCheckPoint( new SimpleTriggerInfo( "MANUAL" ) ) ); + t3.get().waitUntilWaiting( details -> details.isAt( LockWrapper.class, "writeLock" ) ); + + // Alright the trap is set. Let the population thread move on and seal the deal + controller.release(); + + // THEN these should complete + applier.get( 10, SECONDS ); + checkPointer.get( 10, SECONDS ); + success = true; + } + finally + { + if ( !success ) + { + t2.interrupt(); + t3.interrupt(); + // so that shutdown won't hang too + } + db.shutdown(); + } + } + + private Future applyInT2( GraphDatabaseAPI db, List transactions ) + { + TransactionCommitProcess commitProcess = + db.getDependencyResolver().resolveDependency( TransactionCommitProcess.class ); + return t2.execute( state -> + { + transactions.forEach( tx -> + { + try + { + // It will matter if the transactions are supplied all in the same batch or one by one + // since the CountsTracker#apply lock is held and released per transaction + commitProcess.commit( new TransactionToApply( tx ), NULL, EXTERNAL ); + } + catch ( TransactionFailureException e ) + { + throw new RuntimeException( e ); + } + } ); + return null; + } ); + } + + private static List createConstraintCreatingTransactions() throws Exception + { + GraphDatabaseAPI db = (GraphDatabaseAPI) new TestGraphDatabaseFactory().newImpermanentDatabase(); + try + { + try ( Transaction tx = db.beginTx() ) + { + db.schema().constraintFor( LABEL ).assertPropertyIsUnique( KEY ).create(); + tx.success(); + } + + LogicalTransactionStore txStore = db.getDependencyResolver().resolveDependency( LogicalTransactionStore.class ); + List result = new ArrayList<>(); + try ( TransactionCursor cursor = txStore.getTransactions( TransactionIdStore.BASE_TX_ID + 1 ) ) + { + while ( cursor.next() ) + { + result.add( cursor.get().getTransactionRepresentation() ); + } + } + return result; + } + finally + { + db.shutdown(); + } + } +} diff --git a/community/kernel/src/test/java/org/neo4j/test/OtherThreadExecutor.java b/community/kernel/src/test/java/org/neo4j/test/OtherThreadExecutor.java index 1ed85fb72a59..b8d1d6fa6202 100644 --- a/community/kernel/src/test/java/org/neo4j/test/OtherThreadExecutor.java +++ b/community/kernel/src/test/java/org/neo4j/test/OtherThreadExecutor.java @@ -22,6 +22,7 @@ import java.io.Closeable; import java.io.PrintStream; import java.lang.Thread.State; +import java.util.Arrays; import java.util.HashSet; import java.util.Set; import java.util.concurrent.ExecutionException; @@ -30,11 +31,13 @@ import java.util.concurrent.ThreadFactory; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; +import java.util.concurrent.locks.LockSupport; import java.util.function.Predicate; import org.neo4j.logging.Logger; import static java.lang.String.format; +import static java.lang.System.currentTimeMillis; import static java.util.Arrays.asList; import static java.util.concurrent.Executors.newSingleThreadExecutor; import static java.util.concurrent.TimeUnit.MILLISECONDS; @@ -217,12 +220,22 @@ public String toString() public WaitDetails waitUntilWaiting() throws TimeoutException { - return waitUntilThreadState( Thread.State.WAITING, Thread.State.TIMED_WAITING ); + return waitUntilWaiting( details -> true ); } public WaitDetails waitUntilBlocked() throws TimeoutException { - return waitUntilThreadState( Thread.State.BLOCKED ); + return waitUntilBlocked( details -> true ); + } + + public WaitDetails waitUntilWaiting( Predicate correctWait ) throws TimeoutException + { + return waitUntilThreadState( correctWait, Thread.State.WAITING, Thread.State.TIMED_WAITING ); + } + + public WaitDetails waitUntilBlocked( Predicate correctWait ) throws TimeoutException + { + return waitUntilThreadState( correctWait, Thread.State.BLOCKED ); } public WaitDetails waitUntilThreadState( final Thread.State... possibleStates ) throws TimeoutException @@ -230,6 +243,24 @@ public WaitDetails waitUntilThreadState( final Thread.State... possibleStates ) return waitUntil( new AnyThreadState( possibleStates ) ); } + public WaitDetails waitUntilThreadState( Predicate correctWait, + final Thread.State... possibleStates ) throws TimeoutException + { + long end = currentTimeMillis() + timeout; + WaitDetails details = null; + while ( !correctWait.test( details = waitUntil( new AnyThreadState( possibleStates )) ) ) + { + LockSupport.parkNanos( MILLISECONDS.toNanos( 20 ) ); + if ( currentTimeMillis() > end ) + { + throw new TimeoutException( "Wanted to wait for any of " + Arrays.toString( possibleStates ) + + " over at " + correctWait + ", but didn't managed to get there in " + timeout + "ms. " + + "instead ended up waiting in " + details ); + } + } + return details; + } + public WaitDetails waitUntil( Predicate condition ) throws TimeoutException { long end = System.currentTimeMillis() + timeout;