From f97226176a615f7ac7b96fe638bc566859ad92cc Mon Sep 17 00:00:00 2001 From: Jacob Hansson Date: Thu, 30 Jul 2015 23:06:35 -0500 Subject: [PATCH] Improve deadlock descriptions in enterprise lock manager. - Each lock client now optionally can have a description set, which is included when explaining a deadlock. - Cypher sets lock client description before execution of each query - MasterImpl sets lock client description to be relatable back to the slave it is acting on behalf of --- .../kernel/ha/com/master/MasterImpl.java | 8 +- .../kernel/ha/lock/SlaveLocksClient.java | 13 +++ .../kernel/ha/lock/forseti/ExclusiveLock.java | 7 +- .../kernel/ha/lock/forseti/ForsetiClient.java | 80 +++++++++++++--- .../ha/lock/forseti/ForsetiLockManager.java | 47 ++++++++-- .../kernel/ha/lock/forseti/SharedLock.java | 34 ++++--- .../ha/lock/forseti/ForsetiMessagesTest.java | 94 +++++++++++++++++++ 7 files changed, 247 insertions(+), 36 deletions(-) create mode 100644 enterprise/ha/src/test/java/org/neo4j/kernel/ha/lock/forseti/ForsetiMessagesTest.java diff --git a/enterprise/ha/src/main/java/org/neo4j/kernel/ha/com/master/MasterImpl.java b/enterprise/ha/src/main/java/org/neo4j/kernel/ha/com/master/MasterImpl.java index 2afd7b3b47912..a2c3351a81c5e 100644 --- a/enterprise/ha/src/main/java/org/neo4j/kernel/ha/com/master/MasterImpl.java +++ b/enterprise/ha/src/main/java/org/neo4j/kernel/ha/com/master/MasterImpl.java @@ -33,7 +33,6 @@ import org.neo4j.com.storecopy.StoreWriter; import org.neo4j.function.Consumer; import org.neo4j.helpers.Clock; -import org.neo4j.helpers.Factory; import org.neo4j.kernel.DeadlockDetectedException; import org.neo4j.kernel.IdType; import org.neo4j.kernel.api.exceptions.Status; @@ -144,11 +143,12 @@ private long generateEpoch() @Override public void start() throws Throwable { - this.slaveLockSessions = new TimedRepository<>( new Factory() + this.slaveLockSessions = new TimedRepository<>( new TimedRepository.ValueFactory() { - @Override public Locks.Client newInstance() + @Override public Locks.Client newInstance(RequestContext ctx) { - return spi.acquireClient(); + return spi.acquireClient().description( + String.format("Locks held on behalf of slave `%d`, slave transaction id `%d`", ctx.machineId(), ctx.getEventIdentifier() ) ); } }, new Consumer() { diff --git a/enterprise/ha/src/main/java/org/neo4j/kernel/ha/lock/SlaveLocksClient.java b/enterprise/ha/src/main/java/org/neo4j/kernel/ha/lock/SlaveLocksClient.java index 701b5930268b6..1859c7dc9db49 100644 --- a/enterprise/ha/src/main/java/org/neo4j/kernel/ha/lock/SlaveLocksClient.java +++ b/enterprise/ha/src/main/java/org/neo4j/kernel/ha/lock/SlaveLocksClient.java @@ -92,6 +92,19 @@ private Map getLockMap( return lockMap; } + @Override + public Locks.Client description( String desc ) + { + client.description( desc ); + return this; + } + + @Override + public String description() + { + return client.description(); + } + @Override public void acquireShared( Locks.ResourceType resourceType, long... resourceIds ) throws AcquireLockTimeoutException { diff --git a/enterprise/ha/src/main/java/org/neo4j/kernel/ha/lock/forseti/ExclusiveLock.java b/enterprise/ha/src/main/java/org/neo4j/kernel/ha/lock/forseti/ExclusiveLock.java index 6cac47d30b81c..9e3ea4ff084dc 100644 --- a/enterprise/ha/src/main/java/org/neo4j/kernel/ha/lock/forseti/ExclusiveLock.java +++ b/enterprise/ha/src/main/java/org/neo4j/kernel/ha/lock/forseti/ExclusiveLock.java @@ -19,6 +19,8 @@ */ package org.neo4j.kernel.ha.lock.forseti; +import java.util.Set; + import org.neo4j.kernel.impl.util.collection.SimpleBitSet; class ExclusiveLock implements ForsetiLockManager.Lock @@ -49,9 +51,10 @@ public boolean anyHolderIsWaitingFor( int client ) } @Override - public String describeWaitList() + public String describeWaitList( Set involvedParties ) { - return "ExclusiveLock[" + owner.describeWaitList() + "]"; + involvedParties.add( owner.id() ); + return owner.describeWaitList( involvedParties ); } @Override diff --git a/enterprise/ha/src/main/java/org/neo4j/kernel/ha/lock/forseti/ForsetiClient.java b/enterprise/ha/src/main/java/org/neo4j/kernel/ha/lock/forseti/ForsetiClient.java index 78ac467c118b4..6e6109cad9f07 100644 --- a/enterprise/ha/src/main/java/org/neo4j/kernel/ha/lock/forseti/ForsetiClient.java +++ b/enterprise/ha/src/main/java/org/neo4j/kernel/ha/lock/forseti/ForsetiClient.java @@ -19,13 +19,16 @@ */ package org.neo4j.kernel.ha.lock.forseti; +import java.util.Set; +import java.util.TreeSet; import java.util.concurrent.ConcurrentMap; -import org.neo4j.collection.primitive.Primitive; import org.neo4j.collection.pool.LinkedQueuePool; +import org.neo4j.collection.primitive.Primitive; import org.neo4j.collection.primitive.PrimitiveIntIterator; import org.neo4j.collection.primitive.PrimitiveLongIntMap; import org.neo4j.collection.primitive.PrimitiveLongVisitor; +import org.neo4j.function.Function; import org.neo4j.kernel.DeadlockDetectedException; import org.neo4j.kernel.impl.locking.AcquireLockTimeoutException; import org.neo4j.kernel.impl.locking.Locks; @@ -47,6 +50,17 @@ public class ForsetiClient implements Locks.Client /** Id for this client */ private final int myId; + /** + * Alias for this client, a user-friendly name used in error messages and lock descriptions. Ideally the user can use the description to tell which query is + * problematic. + */ + private String description; + + /** + * For helpful errors, this resolves a user-friendly description for a client, given a client id. + */ + private final Function clientDescription; + /** resourceType -> lock map. These are the global lock maps, shared across all clients. */ private final ConcurrentMap[] lockMaps; @@ -79,9 +93,12 @@ public class ForsetiClient implements Locks.Client public ForsetiClient( int id, ConcurrentMap[] lockMaps, WaitStrategy[] waitStrategies, - LinkedQueuePool clientPool ) + LinkedQueuePool clientPool, + Function clientDescription ) { this.myId = id; + this.description = String.format("Client[%d]", id); + this.clientDescription = clientDescription; this.lockMaps = lockMaps; this.waitStrategies = waitStrategies; this.clientPool = clientPool; @@ -501,6 +518,7 @@ public void releaseAll() public void close() { releaseAll(); + description = "N/A"; clientPool.release( this ); } @@ -555,7 +573,7 @@ public int hashCode() @Override public String toString() { - return String.format( "ForsetiClient[%d]", myId ); + return "Tx[" + myId + "]"; } /** Release a lock from the global pool. */ @@ -579,8 +597,7 @@ private boolean releaseLocalLock( Locks.ResourceType type, long resourceId, Prim int lockCount = localLocks.remove( resourceId ); if(lockCount == -1) { - throw new IllegalStateException( this + " cannot release lock that it does not hold: " + - type + "[" + resourceId + "]." ); + throw new IllegalStateException( this + " cannot release lock that it does not hold: " + type + "[" + resourceId + "]." ); } if(lockCount > 1) @@ -683,21 +700,45 @@ private void markAsWaitingFor( ForsetiLockManager.Lock lock, Locks.ResourceType lock.copyHolderWaitListsInto( waitList ); if(lock.anyHolderIsWaitingFor( myId ) && lock.holderWaitListSize() >= waitListSize()) { - waitList.clear(); - throw new DeadlockDetectedException( this + " can't acquire " + lock + " on " + type + "("+resourceId+"), because holders of that lock " + - "are waiting for " + this + ".\n Wait list:" + lock.describeWaitList() ); + Set involvedParties = new TreeSet<>(); // treeset to keep the clients sorted by id, just for readability purposes + String waitList = lock.describeWaitList( involvedParties ); + String legend = ForsetiLockManager.legendForClients( involvedParties, clientDescription ); + String desc = format("%s can't lock %s(%d), because that resource is locked by others in " + + "a way that would cause a deadlock if we waited for them.\nThe lock currently is %s, " + + "and holders of that lock are waiting in the following way: %s%n%nTransactions:%s", + this, type, resourceId, lock, waitList, legend); + + this.waitList.clear(); + throw new DeadlockDetectedException( desc ); } } - public String describeWaitList() + /** + * Describe who this client is waiting for in a human-comprehensible way. + * @param involvedParties All clients listed in the description will be added to this set, allowing the callee to print a legend with + * (client names -> descriptions) at the end of its output + */ + public String describeWaitList( Set involvedParties ) { - StringBuilder sb = new StringBuilder( format( "%nClient[%d] waits for [", id() ) ); + if(waitList.size() <= 1) + { + return format( "%n", myId ); + } + + StringBuilder sb = new StringBuilder( format( "%n 0 ? "," : "" ).append( iter.next() ); + int clientId = iter.next(); + if( clientId != myId ) + { + involvedParties.add( clientId ); + sb.append( first ? "" : "," ).append( "Tx[" + clientId + "]" ); + first = false; + } } - sb.append( "]" ); + sb.append( ">" ); return sb.toString(); } @@ -706,6 +747,19 @@ public int id() return myId; } + @Override + public Locks.Client description( String desc ) + { + this.description = desc; + return this; + } + + @Override + public String description() + { + return description; + } + // Visitors used for bulk ops on the lock maps (such as releasing all locks) private class ReleaseSharedLocksVisitor implements PrimitiveLongVisitor diff --git a/enterprise/ha/src/main/java/org/neo4j/kernel/ha/lock/forseti/ForsetiLockManager.java b/enterprise/ha/src/main/java/org/neo4j/kernel/ha/lock/forseti/ForsetiLockManager.java index c18d9e2f7e327..1988822f26858 100644 --- a/enterprise/ha/src/main/java/org/neo4j/kernel/ha/lock/forseti/ForsetiLockManager.java +++ b/enterprise/ha/src/main/java/org/neo4j/kernel/ha/lock/forseti/ForsetiLockManager.java @@ -21,19 +21,23 @@ import java.util.Map; import java.util.Queue; +import java.util.Set; +import java.util.TreeSet; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.atomic.AtomicInteger; import org.neo4j.collection.pool.LinkedQueuePool; -import org.neo4j.collection.pool.Pool; +import org.neo4j.function.Function; import org.neo4j.kernel.impl.locking.AcquireLockTimeoutException; import org.neo4j.kernel.impl.locking.Locks; import org.neo4j.kernel.impl.util.collection.SimpleBitSet; import org.neo4j.kernel.impl.util.concurrent.WaitStrategy; import org.neo4j.kernel.lifecycle.LifecycleAdapter; +import static java.lang.String.format; + /** *

Forseti, the Nordic god of justice

* @@ -96,6 +100,7 @@ */ public class ForsetiLockManager extends LifecycleAdapter implements Locks { + /** This is Forsetis internal lock API, which it uses to do deadlock detection. */ interface Lock { @@ -103,8 +108,11 @@ interface Lock int holderWaitListSize(); boolean anyHolderIsWaitingFor( int client ); - /** For introspection and error messages */ - String describeWaitList(); + /** + * For introspection and error messages + * @param involvedParties Clients involved in the waiting will be appended to this set, to allow the callee to construct a legend with client descriptions + */ + String describeWaitList( Set involvedParties ); } /** Pointers to lock maps, one array per resource type. */ @@ -114,7 +122,7 @@ interface Lock private final ResourceType[] resourceTypes; /** Pool forseti clients. */ - private final Pool clientPool; + private final ForsetiClientFlyweightPool clientPool; @SuppressWarnings( "unchecked" ) public ForsetiLockManager( ResourceType... resourceTypes ) @@ -159,7 +167,10 @@ public void accept( Visitor out ) for ( Map.Entry entry : lockMaps[i].entrySet() ) { Lock lock = entry.getValue(); - out.visit( type, entry.getKey(), lock.describeWaitList(), 0 ); + Set involvedParties = new TreeSet<>(); + String waitList = lock.describeWaitList( involvedParties ); + String legend = legendForClients( involvedParties, clientPool.clientDescription ); + out.visit( type, entry.getKey(), format("%s: %s%nTransactions:%s", lock.toString(), waitList, legend), 0 ); } } } @@ -183,8 +194,18 @@ private static class ForsetiClientFlyweightPool extends LinkedQueuePool unusedIds = new ConcurrentLinkedQueue<>(); + private final ConcurrentMap clientsById = new ConcurrentHashMap<>(); private final ConcurrentMap[] lockMaps; private final WaitStrategy[] waitStrategies; + private final Function clientDescription = new Function() + { + @Override + public String apply( Integer integer ) throws RuntimeException + { + ForsetiClient client = clientsById.get( integer ); + return client != null ? client.description() : "N/A"; + } + }; public ForsetiClientFlyweightPool( ConcurrentMap[] lockMaps, @@ -203,13 +224,16 @@ protected ForsetiClient create() { id = clientIds.getAndIncrement(); } - return new ForsetiClient(id, lockMaps, waitStrategies, this ); + ForsetiClient client = new ForsetiClient( id, lockMaps, waitStrategies, this, clientDescription ); + clientsById.put( id, client ); + return client; } @Override protected void dispose( ForsetiClient resource ) { super.dispose( resource ); + clientsById.remove( resource.id() ); if(resource.id() < 1024) { // Re-use all ids < 1024 @@ -217,4 +241,15 @@ protected void dispose( ForsetiClient resource ) } } } + + /** For error messages etc, build a legend of client id -> description, to allow users to tell what each of the different clients were up to */ + public static String legendForClients( Set clientIds, Function clientDescription ) + { + StringBuilder out = new StringBuilder(); + for ( Integer clientId : clientIds ) + { + out.append( format("%n Tx[%d]: %s", clientId, clientDescription.apply( clientId ) ) ); + } + return out.toString(); + } } diff --git a/enterprise/ha/src/main/java/org/neo4j/kernel/ha/lock/forseti/SharedLock.java b/enterprise/ha/src/main/java/org/neo4j/kernel/ha/lock/forseti/SharedLock.java index 7a96cb0204deb..cefd5e2e7f33d 100644 --- a/enterprise/ha/src/main/java/org/neo4j/kernel/ha/lock/forseti/SharedLock.java +++ b/enterprise/ha/src/main/java/org/neo4j/kernel/ha/lock/forseti/SharedLock.java @@ -19,6 +19,7 @@ */ package org.neo4j.kernel.ha.lock.forseti; +import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReferenceArray; @@ -188,18 +189,28 @@ public void releaseUpdateLock(ForsetiClient client) public int numberOfHolders() { - return refCount.get() & ~UPDATE_LOCK_FLAG; + return numberOfHolders( refCount.get() ); + } + + private int numberOfHolders(int count) + { + return count & ~UPDATE_LOCK_FLAG; } public boolean isUpdateLock() { - return (refCount.get() & UPDATE_LOCK_FLAG) == UPDATE_LOCK_FLAG; + return isUpdateLock( refCount.get() ); + } + + private boolean isUpdateLock( int localRefCount ) + { + return (localRefCount & UPDATE_LOCK_FLAG) == UPDATE_LOCK_FLAG; } @Override - public String describeWaitList() + public String describeWaitList( Set involvedParties ) { - StringBuilder sb = new StringBuilder( "SharedLock[" ); + StringBuilder sb = new StringBuilder(); for ( int i = 0; i < clientsHoldingThisLock.length; i++ ) { AtomicReferenceArray holders = clientsHoldingThisLock[i]; @@ -209,31 +220,32 @@ public String describeWaitList() ForsetiClient current = holders.get( j ); if(current != null) { - sb.append( first ? "" : ", " ).append( current.describeWaitList() ); + involvedParties.add( current.id() ); + sb.append( first ? "" : ", " ).append( current.describeWaitList( involvedParties ) ); first = false; } } } - return sb.append( "]" ).toString(); + return sb.toString(); } @Override public String toString() { - // TODO we should only read out the refCount once, and build a deterministic string based on that - if(isUpdateLock()) + int refCountSnapshot = refCount.get(); + if(isUpdateLock(refCountSnapshot)) { return "UpdateLock{" + "objectId=" + System.identityHashCode( this ) + - ", refCount=" + (refCount.get() & ~UPDATE_LOCK_FLAG) + - ", holder=" + updateHolder + + ", refCount=" + numberOfHolders( refCountSnapshot ) + + ", owner=" + updateHolder + '}'; } else { return "SharedLock{" + "objectId=" + System.identityHashCode( this ) + - ", refCount=" + refCount + + ", refCount=" + refCountSnapshot + '}'; } } diff --git a/enterprise/ha/src/test/java/org/neo4j/kernel/ha/lock/forseti/ForsetiMessagesTest.java b/enterprise/ha/src/test/java/org/neo4j/kernel/ha/lock/forseti/ForsetiMessagesTest.java new file mode 100644 index 0000000000000..76ab1b13b7298 --- /dev/null +++ b/enterprise/ha/src/test/java/org/neo4j/kernel/ha/lock/forseti/ForsetiMessagesTest.java @@ -0,0 +1,94 @@ +package org.neo4j.kernel.ha.lock.forseti; + +import org.junit.Rule; +import org.junit.Test; + +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Future; + +import org.neo4j.kernel.DeadlockDetectedException; +import org.neo4j.kernel.impl.locking.Locks; +import org.neo4j.kernel.impl.locking.ResourceTypes; +import org.neo4j.test.OtherThreadExecutor; +import org.neo4j.test.OtherThreadRule; + +import static org.junit.Assert.assertEquals; + +/** Test output of various Forseti introspection mechanisms */ +public class ForsetiMessagesTest +{ + @Rule public OtherThreadRule otherThread = new OtherThreadRule<>(); + + @Test + public void shouldIncludeFriendlyNameInDeadlockExceptions() throws Throwable + { + // Given + ForsetiLockManager locks = new ForsetiLockManager( ResourceTypes.NODE ); + + final Locks.Client client1 = locks.newClient().description( "`MATCH (n) RETURN n`" ); + final Locks.Client client2 = locks.newClient().description( "`MATCH (p) SET p.name = {name}` {name:'Bob'}" ); + final Locks.Client client3 = locks.newClient().description( "`MERGE (n) RETURN id(n)`" ); + + client1.acquireShared( ResourceTypes.NODE, 1337 ); + client3.acquireShared( ResourceTypes.NODE, 1337 ); + client2.acquireExclusive( ResourceTypes.NODE, 1338 ); + Future future = otherThread.execute( new OtherThreadExecutor.WorkerCommand() + { + @Override + public Object doWork( Void state ) throws Exception + { + client1.acquireExclusive( ResourceTypes.NODE, 1338 ); + return null; + } + } ); + + String client2Error = null, client1Error = null; + + // When + try + { + client2.acquireExclusive( ResourceTypes.NODE, 1337 ); + } + catch( DeadlockDetectedException e ) + { + client2Error = e.getMessage(); + } + + // Or when + try + { + future.get(); + } + catch( ExecutionException e ) + { + client1Error = e.getCause().getMessage(); + } + + // Then + if( client1Error != null ) + { + assertEquals("Tx[0] can't lock NODE(1338), because that resource is locked by others in a way that would cause a deadlock if we waited for them" + + ".\n" + + "The lock currently is ExclusiveLock{owner=Tx[1]}, and holders of that lock are waiting in the following way: \n" + + "\n" + + "\n" + + "Transactions:\n" + + " Tx[0]: `MATCH (n) RETURN n`\n" + + " Tx[1]: `MATCH (p) SET p.name = {name}` {name:'Bob'}\n" + + " Tx[2]: `MERGE (n) RETURN id(n)`", client1Error); + } + else + { + assertEquals("Tx[1] can't lock NODE(1337), because that resource is locked by others in a way that would cause a deadlock if we waited for them" + + ".\n" + + "The lock currently is SharedLock{objectId=233530418, refCount=2}, and holders of that lock are waiting in the following way: \n" + + ", \n" + + "\n" + + "\n" + + "Transactions:\n" + + " Tx[0]: `MATCH (n) RETURN n`\n" + + " Tx[1]: `MATCH (p) SET p.name = {name}` {name:'Bob'}\n" + + " Tx[2]: `MERGE (n) RETURN id(n)`", client2Error); + } + } +} \ No newline at end of file