Skip to content

Commit fdd134c

Browse files
metanetmdogan
authored andcommitted
Raft lock operation timeout fixes
Consider the following scenarios where client1 and client2 run on different threads: SCENARIO #1: - STEP 1: client1.lock(); Lock is acquired by client1. - STEP 2: client2.lock(); Wait key is added to the lock for client2. After some time, client2.lock() call fails with operation timeout - STEP3: client2.unlock(); Fails with IllegalMonitorStateException because client2 is not holder of the lock. However, its wait key is still present. So the lock will be assigned to client2 when client1 releases it. SCENARIO #2: - STEP 1: client1.lock(); Lock is acquired by client1. - STEP 2: client2.lock(); Wait key is added to the lock for client2. After some time, client2.lock() call fails with operation timeout - STEP 3: client2.lock(); A new wait key is added to the lock for client2, because its second lock() call has a new invocation uuid. - STEP 4: client1.unlock(); The lock will be assigned to client2 for its first lock() call. Now the lock is held by client2 and its lock count is just 1. However, nothing will be done for its other wait key in the list. - STEP 5: client2.unlock(); Client2 releases the lock, but since it has another wait key in the list, it will get the lock again. We need two separate fixes to resolve these issues. First, unlock() call should cancel all pending wait keys of the lock endpoint, if the lock is not currently held by itself. Second, if a lock() call is received while there is another wait key by the same client, the first lock() call must be cancelled via deleting its wait key from the wait list of the lock.
1 parent dee6b20 commit fdd134c

File tree

16 files changed

+424
-78
lines changed

16 files changed

+424
-78
lines changed

hazelcast-raft-client/src/main/java/com/hazelcast/raft/service/lock/client/RaftLockProxy.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
import com.hazelcast.raft.impl.RaftGroupIdImpl;
3131
import com.hazelcast.raft.impl.session.SessionExpiredException;
3232
import com.hazelcast.raft.service.lock.RaftLockService;
33+
import com.hazelcast.raft.service.lock.exception.LockRequestCancelledException;
3334
import com.hazelcast.raft.service.session.SessionAwareProxy;
3435
import com.hazelcast.raft.service.session.SessionManagerProvider;
3536
import com.hazelcast.spi.InternalCompletableFuture;
@@ -132,6 +133,8 @@ public boolean tryLock(long time, TimeUnit unit) {
132133
releaseSession(sessionId);
133134
}
134135
return locked;
136+
} catch (LockRequestCancelledException e) {
137+
return false;
135138
} catch (SessionExpiredException e) {
136139
invalidateSession(sessionId);
137140
}

hazelcast-raft-client/src/test/java/com/hazelcast/raft/service/lock/client/RaftFencedLockClientBasicTest.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import com.hazelcast.raft.impl.session.SessionExpiredException;
77
import com.hazelcast.raft.service.lock.FencedLock;
88
import com.hazelcast.raft.service.lock.RaftFencedLockBasicTest;
9+
import com.hazelcast.raft.service.lock.exception.LockRequestCancelledException;
910
import com.hazelcast.raft.service.session.AbstractSessionManager;
1011
import com.hazelcast.raft.service.session.SessionManagerProvider;
1112
import com.hazelcast.test.HazelcastSerialClassRunner;
@@ -33,6 +34,7 @@ protected HazelcastInstance[] createInstances() {
3334
lockInstance = f.newHazelcastClient();
3435
HazelcastClientInstanceImpl client = getClient(lockInstance);
3536
SessionExpiredException.register(client.getClientExceptionFactory());
37+
LockRequestCancelledException.register(client.getClientExceptionFactory());
3638
return instances;
3739
}
3840

hazelcast-raft-client/src/test/java/com/hazelcast/raft/service/lock/client/RaftLockClientBasicTest.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import com.hazelcast.raft.RaftGroupId;
77
import com.hazelcast.raft.impl.session.SessionExpiredException;
88
import com.hazelcast.raft.service.lock.RaftLockBasicTest;
9+
import com.hazelcast.raft.service.lock.exception.LockRequestCancelledException;
910
import com.hazelcast.raft.service.session.AbstractSessionManager;
1011
import com.hazelcast.raft.service.session.SessionManagerProvider;
1112
import com.hazelcast.test.HazelcastSerialClassRunner;
@@ -35,6 +36,7 @@ protected HazelcastInstance[] createInstances() {
3536
TestHazelcastFactory f = (TestHazelcastFactory) factory;
3637
client = f.newHazelcastClient();
3738
SessionExpiredException.register(getClient(client).getClientExceptionFactory());
39+
LockRequestCancelledException.register(getClient(client).getClientExceptionFactory());
3840
return instances;
3941
}
4042

hazelcast-raft-client/src/test/java/com/hazelcast/raft/service/semaphore/client/RaftSessionAwareSemaphoreClientBasicTest.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import com.hazelcast.core.ISemaphore;
66
import com.hazelcast.raft.RaftGroupId;
77
import com.hazelcast.raft.impl.session.SessionExpiredException;
8+
import com.hazelcast.raft.service.lock.exception.LockRequestCancelledException;
89
import com.hazelcast.raft.service.semaphore.RaftSessionAwareSemaphoreBasicTest;
910
import com.hazelcast.raft.service.session.AbstractSessionManager;
1011
import com.hazelcast.raft.service.session.SessionManagerProvider;
@@ -33,6 +34,7 @@ protected HazelcastInstance[] createInstances() {
3334
TestHazelcastFactory f = (TestHazelcastFactory) factory;
3435
semaphoreInstance = f.newHazelcastClient();
3536
SessionExpiredException.register(getClient(semaphoreInstance).getClientExceptionFactory());
37+
LockRequestCancelledException.register(getClient(semaphoreInstance).getClientExceptionFactory());
3638
return instances;
3739
}
3840

hazelcast-raft-client/src/test/java/com/hazelcast/raft/service/semaphore/client/RaftSessionlessSemaphoreClientBasicTest.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import com.hazelcast.core.ISemaphore;
66
import com.hazelcast.raft.RaftGroupId;
77
import com.hazelcast.raft.impl.session.SessionExpiredException;
8+
import com.hazelcast.raft.service.lock.exception.LockRequestCancelledException;
89
import com.hazelcast.raft.service.semaphore.RaftSessionlessSemaphoreBasicTest;
910
import com.hazelcast.test.HazelcastSerialClassRunner;
1011
import com.hazelcast.test.TestHazelcastInstanceFactory;
@@ -33,6 +34,7 @@ protected HazelcastInstance[] createInstances() {
3334
TestHazelcastFactory f = (TestHazelcastFactory) factory;
3435
client = f.newHazelcastClient();
3536
SessionExpiredException.register(getClient(client).getClientExceptionFactory());
37+
LockRequestCancelledException.register(getClient(client).getClientExceptionFactory());
3638
return instances;
3739
}
3840

hazelcast-raft-dataservices/src/main/java/com/hazelcast/raft/service/blocking/AbstractBlockingService.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
import com.hazelcast.spi.ManagedService;
3535
import com.hazelcast.spi.NodeEngine;
3636
import com.hazelcast.spi.exception.DistributedObjectDestroyedException;
37+
import com.hazelcast.spi.impl.NodeEngineImpl;
3738
import com.hazelcast.util.Clock;
3839

3940
import java.util.ArrayList;
@@ -61,15 +62,15 @@ public abstract class AbstractBlockingService<W extends WaitKey, R extends Block
6162
public static final long WAIT_TIMEOUT_TASK_UPPER_BOUND_MILLIS = 1500;
6263
private static final long WAIT_TIMEOUT_TASK_PERIOD_MILLIS = 500;
6364

64-
protected final NodeEngine nodeEngine;
65+
protected final NodeEngineImpl nodeEngine;
6566
protected final ILogger logger;
6667
protected volatile RaftService raftService;
6768

6869
private final ConcurrentMap<RaftGroupId, RR> registries = new ConcurrentHashMap<RaftGroupId, RR>();
6970
private volatile SessionAccessor sessionAccessor;
7071

7172
protected AbstractBlockingService(NodeEngine nodeEngine) {
72-
this.nodeEngine = nodeEngine;
73+
this.nodeEngine = (NodeEngineImpl) nodeEngine;
7374
this.logger = nodeEngine.getLogger(getClass());
7475
}
7576

hazelcast-raft-dataservices/src/main/java/com/hazelcast/raft/service/lock/LockRegistry.java

Lines changed: 29 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@
1919
import com.hazelcast.nio.serialization.IdentifiedDataSerializable;
2020
import com.hazelcast.raft.RaftGroupId;
2121
import com.hazelcast.raft.service.blocking.ResourceRegistry;
22+
import com.hazelcast.raft.service.lock.RaftLock.AcquireResult;
23+
import com.hazelcast.raft.service.lock.RaftLock.ReleaseResult;
2224

23-
import java.util.Collection;
24-
import java.util.Collections;
2525
import java.util.UUID;
2626

2727
import static com.hazelcast.raft.service.lock.RaftLockService.INVALID_FENCE;
@@ -43,46 +43,58 @@ protected RaftLock createNewResource(RaftGroupId groupId, String name) {
4343
return new RaftLock(groupId, name);
4444
}
4545

46-
long acquire(String name, LockEndpoint endpoint, long commitIndex, UUID invocationUid) {
47-
return getOrInitResource(name).acquire(endpoint, commitIndex, invocationUid, true);
46+
AcquireResult acquire(String name, LockEndpoint endpoint, long commitIndex, UUID invocationUid) {
47+
AcquireResult result = getOrInitResource(name).acquire(endpoint, commitIndex, invocationUid, true);
48+
49+
for (LockInvocationKey waitKey : result.notifications) {
50+
removeWaitKey(waitKey);
51+
}
52+
53+
return result;
4854
}
4955

50-
long tryAcquire(String name, LockEndpoint endpoint, long commitIndex, UUID invocationUid, long timeoutMs) {
56+
AcquireResult tryAcquire(String name, LockEndpoint endpoint, long commitIndex, UUID invocationUid, long timeoutMs) {
5157
boolean wait = (timeoutMs > 0);
52-
long fence = getOrInitResource(name).acquire(endpoint, commitIndex, invocationUid, wait);
58+
AcquireResult result = getOrInitResource(name).acquire(endpoint, commitIndex, invocationUid, wait);
59+
long fence = result.fence;
60+
61+
for (LockInvocationKey waitKey : result.notifications) {
62+
removeWaitKey(waitKey);
63+
}
64+
5365
if (wait && fence == INVALID_FENCE) {
5466
addWaitKey(new LockInvocationKey(name, endpoint, commitIndex, invocationUid), timeoutMs);
5567
}
5668

57-
return fence;
69+
return result;
5870
}
5971

60-
Collection<LockInvocationKey> release(String name, LockEndpoint endpoint, UUID invocationUid) {
72+
ReleaseResult release(String name, LockEndpoint endpoint, UUID invocationUid) {
6173
RaftLock lock = getResourceOrNull(name);
6274
if (lock == null) {
63-
return Collections.emptyList();
75+
return ReleaseResult.NOT_RELEASED;
6476
}
6577

66-
Collection<LockInvocationKey> keys = lock.release(endpoint, invocationUid);
67-
for (LockInvocationKey key : keys) {
78+
ReleaseResult result = lock.release(endpoint, invocationUid);
79+
for (LockInvocationKey key : result.notifications) {
6880
removeWaitKey(key);
6981
}
7082

71-
return keys;
83+
return result;
7284
}
7385

74-
Collection<LockInvocationKey> forceRelease(String name, long expectedFence, UUID invocationUid) {
86+
ReleaseResult forceRelease(String name, long expectedFence, UUID invocationUid) {
7587
RaftLock lock = getResourceOrNull(name);
7688
if (lock == null) {
77-
return Collections.emptyList();
89+
return ReleaseResult.NOT_RELEASED;
7890
}
7991

80-
Collection<LockInvocationKey> keys = lock.forceRelease(expectedFence, invocationUid);
81-
for (LockInvocationKey key : keys) {
92+
ReleaseResult result = lock.forceRelease(expectedFence, invocationUid);
93+
for (LockInvocationKey key : result.notifications) {
8294
removeWaitKey(key);
8395
}
8496

85-
return keys;
97+
return result;
8698
}
8799

88100
int getLockCount(String name, LockEndpoint endpoint) {

0 commit comments

Comments
 (0)