From 953c322ad254bf6f544bac6738114176040a9c6f Mon Sep 17 00:00:00 2001 From: Michael Nitschinger Date: Mon, 23 Jun 2014 16:46:19 +0200 Subject: [PATCH] SPY-176: Enhance redistribution logic and avoid possible deadlocks. Motivation ---------- There have been issues reported that redistribution of operations does not work as expected, especially with authentication scenarios. This has been tracked down and the following changes have been made: Modifications ------------- - With the old redistribute logic, it could happen that subsequent ops in the retry queue got accidentally deleted. With the copy first, this cannot happen anymore. - On redistribute, if the handling node is still not set, just clone the operation to avoid NPEs. A op without a node set can happen if it is enqueued to retry because the target node is not yet authenticated. - Do not try to add operations to a node which is not yet authen ticated. This can lead to costly locks with redistributions since they are run from the IO thread. Without the change, it can happen that the IO thread waits for an auth latch, but is also responsible for telling listeners when auth has completed, therefore locking everything up until the auth latch wait runs out of time. Result ------ Much better resilience and performance with redistributions, especially if authentication takes longer than expected and from scenarios where the operations get redistributed/moved around from within the IO thread. Change-Id: Icbc5f9e4f568ea885500e8d2baedfa989c8ef801 Reviewed-on: http://review.couchbase.org/38669 Reviewed-by: Matt Ingenthron Reviewed-by: Michael Nitschinger Tested-by: Michael Nitschinger --- src/main/java/net/spy/memcached/MemcachedConnection.java | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/main/java/net/spy/memcached/MemcachedConnection.java b/src/main/java/net/spy/memcached/MemcachedConnection.java index 9fc817b39..09f7c49ce 100644 --- a/src/main/java/net/spy/memcached/MemcachedConnection.java +++ b/src/main/java/net/spy/memcached/MemcachedConnection.java @@ -473,8 +473,9 @@ private void handleOperationalTasks() throws IOException { } if (!retryOps.isEmpty()) { - redistributeOperations(new ArrayList(retryOps)); + ArrayList operations = new ArrayList(retryOps); retryOps.clear(); + redistributeOperations(operations); } handleShutdownQueue(); @@ -1042,7 +1043,7 @@ public void redistributeOperation(Operation op) { // The operation gets redistributed but has never been actually written, // it we just straight re-add it without cloning. - if (op.getState() == OperationState.WRITE_QUEUED) { + if (op.getState() == OperationState.WRITE_QUEUED && op.getHandlingNode() != null) { addOperation(op.getHandlingNode(), op); return; } @@ -1257,6 +1258,10 @@ public void insertOperation(final MemcachedNode node, final Operation o) { * @param o the operation to add. */ protected void addOperation(final MemcachedNode node, final Operation o) { + if (!node.isAuthenticated()) { + retryOperation(o); + return; + } o.setHandlingNode(node); o.initialize(); node.addOp(o);