Skip to content

Commit

Permalink
Improve deadlock descriptions in enterprise lock manager.
Browse files Browse the repository at this point in the history
- Each lock client now optionally can have a description set, which is included
  when explaining a deadlock.
- Cypher sets lock client description before execution of each query
- MasterImpl sets lock client description to be relatable back to the slave
  it is acting on behalf of
  • Loading branch information
jakewins committed Aug 27, 2015
1 parent 238f536 commit f972261
Show file tree
Hide file tree
Showing 7 changed files with 247 additions and 36 deletions.
Expand Up @@ -33,7 +33,6 @@
import org.neo4j.com.storecopy.StoreWriter;
import org.neo4j.function.Consumer;
import org.neo4j.helpers.Clock;
import org.neo4j.helpers.Factory;
import org.neo4j.kernel.DeadlockDetectedException;
import org.neo4j.kernel.IdType;
import org.neo4j.kernel.api.exceptions.Status;
Expand Down Expand Up @@ -144,11 +143,12 @@ private long generateEpoch()
@Override
public void start() throws Throwable
{
this.slaveLockSessions = new TimedRepository<>( new Factory<Locks.Client>()
this.slaveLockSessions = new TimedRepository<>( new TimedRepository.ValueFactory<RequestContext, Locks.Client>()
{
@Override public Locks.Client newInstance()
@Override public Locks.Client newInstance(RequestContext ctx)
{
return spi.acquireClient();
return spi.acquireClient().description(
String.format("Locks held on behalf of slave `%d`, slave transaction id `%d`", ctx.machineId(), ctx.getEventIdentifier() ) );
}
}, new Consumer<Locks.Client>()
{
Expand Down
Expand Up @@ -92,6 +92,19 @@ private Map<Long, AtomicInteger> getLockMap(
return lockMap;
}

@Override
public Locks.Client description( String desc )
{
client.description( desc );
return this;
}

@Override
public String description()
{
return client.description();
}

@Override
public void acquireShared( Locks.ResourceType resourceType, long... resourceIds ) throws AcquireLockTimeoutException
{
Expand Down
Expand Up @@ -19,6 +19,8 @@
*/
package org.neo4j.kernel.ha.lock.forseti;

import java.util.Set;

import org.neo4j.kernel.impl.util.collection.SimpleBitSet;

class ExclusiveLock implements ForsetiLockManager.Lock
Expand Down Expand Up @@ -49,9 +51,10 @@ public boolean anyHolderIsWaitingFor( int client )
}

@Override
public String describeWaitList()
public String describeWaitList( Set<Integer> involvedParties )
{
return "ExclusiveLock[" + owner.describeWaitList() + "]";
involvedParties.add( owner.id() );
return owner.describeWaitList( involvedParties );
}

@Override
Expand Down
Expand Up @@ -19,13 +19,16 @@
*/
package org.neo4j.kernel.ha.lock.forseti;

import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.ConcurrentMap;

import org.neo4j.collection.primitive.Primitive;
import org.neo4j.collection.pool.LinkedQueuePool;
import org.neo4j.collection.primitive.Primitive;
import org.neo4j.collection.primitive.PrimitiveIntIterator;
import org.neo4j.collection.primitive.PrimitiveLongIntMap;
import org.neo4j.collection.primitive.PrimitiveLongVisitor;
import org.neo4j.function.Function;
import org.neo4j.kernel.DeadlockDetectedException;
import org.neo4j.kernel.impl.locking.AcquireLockTimeoutException;
import org.neo4j.kernel.impl.locking.Locks;
Expand All @@ -47,6 +50,17 @@ public class ForsetiClient implements Locks.Client
/** Id for this client */
private final int myId;

/**
* Alias for this client, a user-friendly name used in error messages and lock descriptions. Ideally the user can use the description to tell which query is
* problematic.
*/
private String description;

/**
* For helpful errors, this resolves a user-friendly description for a client, given a client id.
*/
private final Function<Integer,String> clientDescription;

/** resourceType -> lock map. These are the global lock maps, shared across all clients. */
private final ConcurrentMap<Long, ForsetiLockManager.Lock>[] lockMaps;

Expand Down Expand Up @@ -79,9 +93,12 @@ public class ForsetiClient implements Locks.Client
public ForsetiClient( int id,
ConcurrentMap<Long, ForsetiLockManager.Lock>[] lockMaps,
WaitStrategy<AcquireLockTimeoutException>[] waitStrategies,
LinkedQueuePool<ForsetiClient> clientPool )
LinkedQueuePool<ForsetiClient> clientPool,
Function<Integer, String> clientDescription )
{
this.myId = id;
this.description = String.format("Client[%d]", id);
this.clientDescription = clientDescription;
this.lockMaps = lockMaps;
this.waitStrategies = waitStrategies;
this.clientPool = clientPool;
Expand Down Expand Up @@ -501,6 +518,7 @@ public void releaseAll()
public void close()
{
releaseAll();
description = "N/A";
clientPool.release( this );
}

Expand Down Expand Up @@ -555,7 +573,7 @@ public int hashCode()
@Override
public String toString()
{
return String.format( "ForsetiClient[%d]", myId );
return "Tx[" + myId + "]";
}

/** Release a lock from the global pool. */
Expand All @@ -579,8 +597,7 @@ private boolean releaseLocalLock( Locks.ResourceType type, long resourceId, Prim
int lockCount = localLocks.remove( resourceId );
if(lockCount == -1)
{
throw new IllegalStateException( this + " cannot release lock that it does not hold: " +
type + "[" + resourceId + "]." );
throw new IllegalStateException( this + " cannot release lock that it does not hold: " + type + "[" + resourceId + "]." );
}

if(lockCount > 1)
Expand Down Expand Up @@ -683,21 +700,45 @@ private void markAsWaitingFor( ForsetiLockManager.Lock lock, Locks.ResourceType
lock.copyHolderWaitListsInto( waitList );
if(lock.anyHolderIsWaitingFor( myId ) && lock.holderWaitListSize() >= waitListSize())
{
waitList.clear();
throw new DeadlockDetectedException( this + " can't acquire " + lock + " on " + type + "("+resourceId+"), because holders of that lock " +
"are waiting for " + this + ".\n Wait list:" + lock.describeWaitList() );
Set<Integer> involvedParties = new TreeSet<>(); // treeset to keep the clients sorted by id, just for readability purposes
String waitList = lock.describeWaitList( involvedParties );
String legend = ForsetiLockManager.legendForClients( involvedParties, clientDescription );
String desc = format("%s can't lock %s(%d), because that resource is locked by others in " +
"a way that would cause a deadlock if we waited for them.\nThe lock currently is %s, " +
"and holders of that lock are waiting in the following way: %s%n%nTransactions:%s",
this, type, resourceId, lock, waitList, legend);

this.waitList.clear();
throw new DeadlockDetectedException( desc );
}
}

public String describeWaitList()
/**
* Describe who this client is waiting for in a human-comprehensible way.
* @param involvedParties All clients listed in the description will be added to this set, allowing the callee to print a legend with
* (client names -> descriptions) at the end of its output
*/
public String describeWaitList( Set<Integer> involvedParties )
{
StringBuilder sb = new StringBuilder( format( "%nClient[%d] waits for [", id() ) );
if(waitList.size() <= 1)
{
return format( "%n<Tx[%d], running>", myId );
}

StringBuilder sb = new StringBuilder( format( "%n<Tx[%d], waiting for ", myId ) );
PrimitiveIntIterator iter = waitList.iterator();
for ( int i = 0; iter.hasNext(); i++ )
boolean first = true;
while( iter.hasNext() )
{
sb.append( i > 0 ? "," : "" ).append( iter.next() );
int clientId = iter.next();
if( clientId != myId )
{
involvedParties.add( clientId );
sb.append( first ? "" : "," ).append( "Tx[" + clientId + "]" );
first = false;
}
}
sb.append( "]" );
sb.append( ">" );
return sb.toString();
}

Expand All @@ -706,6 +747,19 @@ public int id()
return myId;
}

@Override
public Locks.Client description( String desc )
{
this.description = desc;
return this;
}

@Override
public String description()
{
return description;
}

// Visitors used for bulk ops on the lock maps (such as releasing all locks)

private class ReleaseSharedLocksVisitor implements PrimitiveLongVisitor<RuntimeException>
Expand Down
Expand Up @@ -21,19 +21,23 @@

import java.util.Map;
import java.util.Queue;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.atomic.AtomicInteger;

import org.neo4j.collection.pool.LinkedQueuePool;
import org.neo4j.collection.pool.Pool;
import org.neo4j.function.Function;
import org.neo4j.kernel.impl.locking.AcquireLockTimeoutException;
import org.neo4j.kernel.impl.locking.Locks;
import org.neo4j.kernel.impl.util.collection.SimpleBitSet;
import org.neo4j.kernel.impl.util.concurrent.WaitStrategy;
import org.neo4j.kernel.lifecycle.LifecycleAdapter;

import static java.lang.String.format;

/**
* <h1>Forseti, the Nordic god of justice</h1>
*
Expand Down Expand Up @@ -96,15 +100,19 @@
*/
public class ForsetiLockManager extends LifecycleAdapter implements Locks
{

/** This is Forsetis internal lock API, which it uses to do deadlock detection. */
interface Lock
{
void copyHolderWaitListsInto( SimpleBitSet waitList );
int holderWaitListSize();
boolean anyHolderIsWaitingFor( int client );

/** For introspection and error messages */
String describeWaitList();
/**
* For introspection and error messages
* @param involvedParties Clients involved in the waiting will be appended to this set, to allow the callee to construct a legend with client descriptions
*/
String describeWaitList( Set<Integer> involvedParties );
}

/** Pointers to lock maps, one array per resource type. */
Expand All @@ -114,7 +122,7 @@ interface Lock
private final ResourceType[] resourceTypes;

/** Pool forseti clients. */
private final Pool<ForsetiClient> clientPool;
private final ForsetiClientFlyweightPool clientPool;

@SuppressWarnings( "unchecked" )
public ForsetiLockManager( ResourceType... resourceTypes )
Expand Down Expand Up @@ -159,7 +167,10 @@ public void accept( Visitor out )
for ( Map.Entry<Long, Lock> entry : lockMaps[i].entrySet() )
{
Lock lock = entry.getValue();
out.visit( type, entry.getKey(), lock.describeWaitList(), 0 );
Set<Integer> involvedParties = new TreeSet<>();
String waitList = lock.describeWaitList( involvedParties );
String legend = legendForClients( involvedParties, clientPool.clientDescription );
out.visit( type, entry.getKey(), format("%s: %s%nTransactions:%s", lock.toString(), waitList, legend), 0 );
}
}
}
Expand All @@ -183,8 +194,18 @@ private static class ForsetiClientFlyweightPool extends LinkedQueuePool<ForsetiC
/** Re-use ids, forseti uses these in arrays, so we want to keep them low and not loose them. */
// TODO we could use a synchronised SimpleBitSet instead, since we know that we only care about reusing a very limited set of integers.
private final Queue<Integer> unusedIds = new ConcurrentLinkedQueue<>();
private final ConcurrentMap<Integer, ForsetiClient> clientsById = new ConcurrentHashMap<>();
private final ConcurrentMap<Long, ForsetiLockManager.Lock>[] lockMaps;
private final WaitStrategy<AcquireLockTimeoutException>[] waitStrategies;
private final Function<Integer,String> clientDescription = new Function<Integer,String>()
{
@Override
public String apply( Integer integer ) throws RuntimeException
{
ForsetiClient client = clientsById.get( integer );
return client != null ? client.description() : "N/A";
}
};

public ForsetiClientFlyweightPool(
ConcurrentMap<Long, ForsetiLockManager.Lock>[] lockMaps,
Expand All @@ -203,18 +224,32 @@ protected ForsetiClient create()
{
id = clientIds.getAndIncrement();
}
return new ForsetiClient(id, lockMaps, waitStrategies, this );
ForsetiClient client = new ForsetiClient( id, lockMaps, waitStrategies, this, clientDescription );
clientsById.put( id, client );
return client;
}

@Override
protected void dispose( ForsetiClient resource )
{
super.dispose( resource );
clientsById.remove( resource.id() );
if(resource.id() < 1024)
{
// Re-use all ids < 1024
unusedIds.offer( resource.id() );
}
}
}

/** For error messages etc, build a legend of client id -> description, to allow users to tell what each of the different clients were up to */
public static String legendForClients( Set<Integer> clientIds, Function<Integer, String> clientDescription )
{
StringBuilder out = new StringBuilder();
for ( Integer clientId : clientIds )
{
out.append( format("%n Tx[%d]: %s", clientId, clientDescription.apply( clientId ) ) );
}
return out.toString();
}
}

0 comments on commit f972261

Please sign in to comment.