-
Notifications
You must be signed in to change notification settings - Fork 959
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add support for topology consensus #355
Previously, cluster topology refreshing could get stuck on a node that was previously discovered but got removed from the cluster. This was possible because multiple views were obtained and any arbitrary topology view was chosen. Lettuce now implements two consensus algorithms: Healthy Majority and Known Majority. Healthy Majority is applied on the very first topology retrieval, Known Majority for all subsequent topology refreshes. Healthy Majority votes for topology views containing the most nodes with healthy flags (without FAIL/PFAIL/NOADDR flags) to use a most healthy view. Known Majority selects topology views that contain nodes that were previously known. This consensus works for adding and removing nodes one-by-one or even multiple nodes. In case a cluster is split into even partitions the client can still get stuck on either side, but that issue can be solved by disabling dynamic refresh sources and specifying stable cluster seed nodes.
- Loading branch information
Showing
8 changed files
with
427 additions
and
16 deletions.
There are no files selected for viewing
41 changes: 41 additions & 0 deletions
41
src/main/java/com/lambdaworks/redis/cluster/PartitionsConsensus.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
package com.lambdaworks.redis.cluster; | ||
|
||
import java.util.Map; | ||
|
||
import com.lambdaworks.redis.RedisURI; | ||
import com.lambdaworks.redis.cluster.models.partitions.Partitions; | ||
|
||
/** | ||
* Consensus API to decide on the {@link com.lambdaworks.redis.cluster.models.partitions.Partitions topology view} to be used by | ||
* {@link RedisClusterClient}. | ||
* <p> | ||
* {@link PartitionsConsensus} takes the current {@link Partitions} and a {@link java.util.Map} of newly retrieved | ||
* {@link Partitions} to determine a view that shall be used. Implementing classes may reuse {@link Partitions} from input | ||
* arguments or construct a new {@link Partitions} object. | ||
* | ||
* @author Mark Paluch | ||
* @since 4.2 | ||
* @see com.lambdaworks.redis.cluster.models.partitions.Partitions | ||
* @see RedisClusterClient | ||
*/ | ||
abstract class PartitionsConsensus { | ||
|
||
/** | ||
* Consensus algorithm to select a partition containing the most previously known nodes. | ||
*/ | ||
public static final PartitionsConsensus KNOWN_MAJORITY = new PartitionsConsensusImpl.KnownMajority(); | ||
|
||
/** | ||
* Consensus algorithm to select a topology view containing the most active nodes. | ||
*/ | ||
public static final PartitionsConsensus HEALTHY_MAJORITY = new PartitionsConsensusImpl.HealthyMajority(); | ||
|
||
/** | ||
* Determine the {@link Partitions} to be used by {@link RedisClusterClient}. | ||
* | ||
* @param current the currently used topology view, must not be {@literal null}. | ||
* @param topologyViews the newly retrieved views, must not be {@literal null}. | ||
* @return the resulting {@link Partitions} to be used by {@link RedisClusterClient}. | ||
*/ | ||
abstract Partitions getPartitions(Partitions current, Map<RedisURI, Partitions> topologyViews); | ||
} |
103 changes: 103 additions & 0 deletions
103
src/main/java/com/lambdaworks/redis/cluster/PartitionsConsensusImpl.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
package com.lambdaworks.redis.cluster; | ||
|
||
import java.util.ArrayList; | ||
import java.util.Collections; | ||
import java.util.List; | ||
import java.util.Map; | ||
|
||
import com.lambdaworks.redis.RedisURI; | ||
import com.lambdaworks.redis.cluster.models.partitions.Partitions; | ||
import com.lambdaworks.redis.cluster.models.partitions.RedisClusterNode; | ||
|
||
/** | ||
* Implementations for {@link PartitionsConsensus}. | ||
* | ||
* @author Mark Paluch | ||
* @since 4.2 | ||
*/ | ||
class PartitionsConsensusImpl { | ||
|
||
/** | ||
* Votes for {@link Partitions} that contains the most known (previously existing) nodes. | ||
*/ | ||
static final class KnownMajority extends PartitionsConsensus { | ||
|
||
@Override | ||
Partitions getPartitions(Partitions current, Map<RedisURI, Partitions> topologyViews) { | ||
|
||
if (topologyViews.isEmpty()) { | ||
return current; | ||
} | ||
|
||
List<VotedPartitions> votedList = new ArrayList<>(); | ||
|
||
for (Partitions partitions : topologyViews.values()) { | ||
|
||
int knownNodes = 0; | ||
for (RedisClusterNode knownNode : current) { | ||
|
||
if (partitions.getPartitionByNodeId(knownNode.getNodeId()) != null) { | ||
knownNodes++; | ||
} | ||
} | ||
|
||
votedList.add(new VotedPartitions(knownNodes, partitions)); | ||
} | ||
|
||
Collections.shuffle(votedList); | ||
Collections.sort(votedList, (o1, o2) -> Integer.compare(o2.votes, o1.votes)); | ||
|
||
return votedList.get(0).partitions; | ||
} | ||
} | ||
|
||
/** | ||
* Votes for {@link Partitions} that contains the most active (in total) nodes. | ||
*/ | ||
static final class HealthyMajority extends PartitionsConsensus { | ||
|
||
@Override | ||
Partitions getPartitions(Partitions current, Map<RedisURI, Partitions> topologyViews) { | ||
|
||
if (topologyViews.isEmpty()) { | ||
return current; | ||
} | ||
|
||
List<VotedPartitions> votedList = new ArrayList<>(); | ||
|
||
for (Partitions partitions : topologyViews.values()) { | ||
|
||
int votes = 0; | ||
|
||
for (RedisClusterNode node : partitions) { | ||
|
||
if (node.is(RedisClusterNode.NodeFlag.FAIL) || node.is(RedisClusterNode.NodeFlag.EVENTUAL_FAIL) | ||
|| node.is(RedisClusterNode.NodeFlag.NOADDR)) { | ||
continue; | ||
} | ||
|
||
votes++; | ||
|
||
} | ||
|
||
votedList.add(new VotedPartitions(votes, partitions)); | ||
} | ||
|
||
Collections.shuffle(votedList); | ||
Collections.sort(votedList, (o1, o2) -> Integer.compare(o2.votes, o1.votes)); | ||
|
||
return votedList.get(0).partitions; | ||
} | ||
} | ||
|
||
static final class VotedPartitions { | ||
|
||
final int votes; | ||
final Partitions partitions; | ||
|
||
public VotedPartitions(int votes, Partitions partitions) { | ||
this.votes = votes; | ||
this.partitions = partitions; | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
97 changes: 97 additions & 0 deletions
97
src/test/java/com/lambdaworks/redis/cluster/HealthyMajorityPartitionsConsensusTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
package com.lambdaworks.redis.cluster; | ||
|
||
import static com.lambdaworks.redis.cluster.PartitionsConsensusTestSupport.createMap; | ||
import static com.lambdaworks.redis.cluster.PartitionsConsensusTestSupport.createNode; | ||
import static com.lambdaworks.redis.cluster.PartitionsConsensusTestSupport.createPartitions; | ||
import static org.assertj.core.api.Assertions.assertThat; | ||
|
||
import java.util.Arrays; | ||
import java.util.Collections; | ||
import java.util.Map; | ||
|
||
import org.junit.Test; | ||
|
||
import com.lambdaworks.redis.RedisURI; | ||
import com.lambdaworks.redis.cluster.models.partitions.Partitions; | ||
import com.lambdaworks.redis.cluster.models.partitions.RedisClusterNode; | ||
|
||
/** | ||
* @author Mark Paluch | ||
*/ | ||
public class HealthyMajorityPartitionsConsensusTest { | ||
|
||
private RedisClusterNode node1 = createNode(1); | ||
private RedisClusterNode node2 = createNode(2); | ||
private RedisClusterNode node3 = createNode(3); | ||
private RedisClusterNode node4 = createNode(4); | ||
private RedisClusterNode node5 = createNode(5); | ||
|
||
@Test | ||
public void sameSharedViewShouldDecideForHealthyNodes() throws Exception { | ||
|
||
Partitions partitions1 = createPartitions(node1, node2, node3, node4, node5); | ||
Partitions partitions2 = createPartitions(node1, node2, node3, node4, node5); | ||
Partitions partitions3 = createPartitions(node1, node2, node3, node4, node5); | ||
|
||
Map<RedisURI, Partitions> map = createMap(partitions1, partitions2, partitions3); | ||
|
||
Partitions result = PartitionsConsensus.HEALTHY_MAJORITY.getPartitions(null, map); | ||
|
||
assertThat(Arrays.asList(partitions1, partitions2, partitions3)).contains(result); | ||
} | ||
|
||
@Test | ||
public void unhealthyNodeViewShouldDecideForHealthyNodes() throws Exception { | ||
|
||
Partitions partitions1 = createPartitions(node1, node2); | ||
Partitions partitions2 = createPartitions(node2, node3, node4, node5); | ||
Partitions partitions3 = createPartitions(node2, node3, node4, node5); | ||
|
||
Map<RedisURI, Partitions> map = createMap(partitions1, partitions2, partitions3); | ||
|
||
node2.setFlags(Collections.singleton(RedisClusterNode.NodeFlag.FAIL)); | ||
node3.setFlags(Collections.singleton(RedisClusterNode.NodeFlag.FAIL)); | ||
node4.setFlags(Collections.singleton(RedisClusterNode.NodeFlag.FAIL)); | ||
node5.setFlags(Collections.singleton(RedisClusterNode.NodeFlag.FAIL)); | ||
|
||
Partitions result = PartitionsConsensus.HEALTHY_MAJORITY.getPartitions(null, map); | ||
|
||
assertThat(result).isSameAs(partitions1); | ||
} | ||
|
||
@Test | ||
public void splitNodeViewShouldDecideForHealthyNodes() throws Exception { | ||
|
||
Partitions partitions1 = createPartitions(node1, node2, node3); | ||
Partitions partitions2 = createPartitions(); | ||
Partitions partitions3 = createPartitions(node3, node4, node5); | ||
|
||
Map<RedisURI, Partitions> map = createMap(partitions1, partitions2, partitions3); | ||
|
||
node1.setFlags(Collections.singleton(RedisClusterNode.NodeFlag.FAIL)); | ||
node2.setFlags(Collections.singleton(RedisClusterNode.NodeFlag.FAIL)); | ||
node3.setFlags(Collections.singleton(RedisClusterNode.NodeFlag.FAIL)); | ||
|
||
Partitions result = PartitionsConsensus.HEALTHY_MAJORITY.getPartitions(null, map); | ||
|
||
assertThat(result).isSameAs(partitions3); | ||
} | ||
|
||
@Test | ||
public void splitUnhealthyNodeViewShouldDecideForHealthyNodes() throws Exception { | ||
|
||
Partitions partitions1 = createPartitions(node1, node2); | ||
Partitions partitions2 = createPartitions(node2, node3); | ||
Partitions partitions3 = createPartitions(node3, node4, node5); | ||
|
||
Map<RedisURI, Partitions> map = createMap(partitions1, partitions2, partitions3); | ||
|
||
node2.setFlags(Collections.singleton(RedisClusterNode.NodeFlag.FAIL)); | ||
node3.setFlags(Collections.singleton(RedisClusterNode.NodeFlag.FAIL)); | ||
node4.setFlags(Collections.singleton(RedisClusterNode.NodeFlag.FAIL)); | ||
|
||
Partitions result = PartitionsConsensus.HEALTHY_MAJORITY.getPartitions(null, map); | ||
|
||
assertThat(Arrays.asList(partitions1, partitions3)).contains(result); | ||
} | ||
} |
Oops, something went wrong.