Skip to content

Commit

Permalink
WIP Active/Active Failover test
Browse files Browse the repository at this point in the history
  • Loading branch information
ryanemerson committed Jun 4, 2024
1 parent 8c91bcb commit fae67a8
Show file tree
Hide file tree
Showing 14 changed files with 404 additions and 101 deletions.
44 changes: 4 additions & 40 deletions .github/workflows/rosa-run-crossdc-func-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,54 +63,18 @@ jobs:
with:
clusterName: ${{ inputs.clusterPrefix }}-a

- name: Get DC1 Infinispan URLs
- name: Get DC1 Context
shell: bash
run: |
ISPN_DC1_URL=https://$(kubectl get routes -n "${{ env.PROJECT }}" -l app=infinispan-service-external -o jsonpath='{.items[*].spec.host}')
echo "ISPN_DC1_URL=$ISPN_DC1_URL" >> "$GITHUB_ENV"
- name: Get DC1 Active/Passive URLs
if: ${{ !inputs.activeActive }}
shell: bash
run: |
KEYCLOAK_DC1_URL=https://$(kubectl get routes -n "${{ env.PROJECT }}" accelerator-loadbalancer -o jsonpath='{.spec.host}')
echo "KEYCLOAK_DC1_URL=$KEYCLOAK_DC1_URL" >> "$GITHUB_ENV"
LOAD_BALANCER_URL=https://$(kubectl get routes -n "${{ env.PROJECT }}" -l app=keycloak -o jsonpath='{.items[*].spec.host}')
echo "LOAD_BALANCER_URL=$LOAD_BALANCER_URL" >> "$GITHUB_ENV"
- name: Get DC1 Active/Active URLs
if: inputs.activeActive
shell: bash
run: |
KEYCLOAK_DC1_URL=https://$(kubectl get svc -n "${{ env.PROJECT }}" accelerator-loadbalancer -o jsonpath='{.status.loadBalancer.ingress[0].hostname}')
echo "KEYCLOAK_DC1_URL=$KEYCLOAK_DC1_URL" >> "$GITHUB_ENV"
LOAD_BALANCER_URL=https://$(kubectl get routes -n "${{ env.PROJECT }}" -l app=keycloak -o jsonpath='{.items[*].spec.host}')
echo "LOAD_BALANCER_URL=$LOAD_BALANCER_URL" >> "$GITHUB_ENV"
run: echo "KUBERNETES_1_CONTEXT=$(kubectl config current-context)" >> "$GITHUB_ENV"

- name: Login to OpenShift cluster B
uses: ./.github/actions/oc-keycloak-login
with:
clusterName: ${{ inputs.clusterPrefix }}-b

- name: Get DC2 Infinispan URLs
shell: bash
run: |
ISPN_DC2_URL=https://$(kubectl get routes -n "${{ env.PROJECT }}" -l app=infinispan-service-external -o jsonpath='{.items[*].spec.host}')
echo "ISPN_DC2_URL=$ISPN_DC2_URL" >> "$GITHUB_ENV"
- name: Get DC2 Active/Passive URLs
if: ${{ !inputs.activeActive }}
shell: bash
run: |
KEYCLOAK_DC2_URL=https://$(kubectl get routes -n "${{ env.PROJECT }}" aws-health-route -o jsonpath='{.status.loadBalancer.ingress[0].hostname}')
echo "KEYCLOAK_DC2_URL=$KEYCLOAK_DC2_URL" >> "$GITHUB_ENV"
- name: Get DC2 Active/Active URLs
if: inputs.activeActive
- name: Get DC2 Context
shell: bash
run: |
KEYCLOAK_DC2_URL=https://$(kubectl get svc -n "${{ env.PROJECT }}" accelerator-loadbalancer -o jsonpath='{.status.loadBalancer.ingress[0].hostname}')
echo "KEYCLOAK_DC2_URL=$KEYCLOAK_DC2_URL" >> "$GITHUB_ENV"
run: echo "KUBERNETES_2_CONTEXT=$(kubectl config current-context)" >> "$GITHUB_ENV"

- name: Run CrossDC functional tests
run: ./provision/rosa-cross-dc/keycloak-benchmark-crossdc-tests/run-crossdc-tests.sh
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ function createLoadBalancer() {
export CLUSTER_NAME=$1
SVC_NAME=$2
NAMESPACE=$3
ACCELERATOR_NAME=$4

bash ${SCRIPT_DIR}/../rosa_oc_login.sh > /dev/null
oc create namespace ${NAMESPACE} > /dev/null || true
Expand All @@ -38,7 +39,7 @@ function createLoadBalancer() {
metadata:
name: ${SVC_NAME}
annotations:
service.beta.kubernetes.io/aws-load-balancer-additional-resource-tags: site=${CLUSTER_NAME},namespace=${NAMESPACE}
service.beta.kubernetes.io/aws-load-balancer-additional-resource-tags: accelerator=${ACCELERATOR_NAME},site=${CLUSTER_NAME},namespace=${NAMESPACE}
service.beta.kubernetes.io/aws-load-balancer-type: "nlb"
service.beta.kubernetes.io/aws-load-balancer-healthcheck-path: "/lb-check"
service.beta.kubernetes.io/aws-load-balancer-healthcheck-protocol: "https"
Expand Down Expand Up @@ -71,8 +72,8 @@ if [[ "${CLUSTER_1_REGION}" != "${CLUSTER_2_REGION}" ]]; then
exit 1
fi

createLoadBalancer ${CLUSTER_1} ${ACCELERATOR_LB_NAME} ${KEYCLOAK_NAMESPACE}
createLoadBalancer ${CLUSTER_2} ${ACCELERATOR_LB_NAME} ${KEYCLOAK_NAMESPACE}
createLoadBalancer ${CLUSTER_1} ${ACCELERATOR_LB_NAME} ${KEYCLOAK_NAMESPACE} ${ACCELERATOR_NAME}
createLoadBalancer ${CLUSTER_2} ${ACCELERATOR_LB_NAME} ${KEYCLOAK_NAMESPACE} ${ACCELERATOR_NAME}

TOFU_CMD="tofu apply -auto-approve \
-var aws_region=${CLUSTER_1_REGION} \
Expand Down
5 changes: 5 additions & 0 deletions provision/rosa-cross-dc/Taskfile.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -682,6 +682,11 @@ tasks:
- ACCELERATOR_NAME
cmd: KEYCLOAK_NAMESPACE="{{.KC_NAMESPACE_PREFIX}}keycloak" ACCELERATOR_NAME={{.ACCELERATOR_NAME}} CLUSTER_1={{.ROSA_CLUSTER_NAME_1}} CLUSTER_2={{.ROSA_CLUSTER_NAME_2}} ./accelerator_multi_az_delete.sh

global-accelerator-recover:
desc: "Recover from Global Accelerator split-brain"
cmds:
- task: global-accelerator-create

route53:
desc: "Creates Route53 primary/backup DNS records"
dir: "{{.ROUTE53_DIR}}"
Expand Down
44 changes: 35 additions & 9 deletions provision/rosa-cross-dc/keycloak-benchmark-crossdc-tests/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
<maven.compiler.target>17</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<aws.java.sdk.version>2.20.43</aws.java.sdk.version>
<fabric8.version>6.13.0</fabric8.version>
</properties>

<dependencies>
Expand Down Expand Up @@ -61,6 +62,33 @@
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>software.amazon.awssdk</groupId>
<artifactId>globalaccelerator</artifactId>
<version>${aws.java.sdk.version}</version>
<exclusions>
<exclusion>
<groupId>software.amazon.awssdk</groupId>
<artifactId>netty-nio-client</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>software.amazon.awssdk</groupId>
<artifactId>elasticloadbalancingv2</artifactId>
<version>${aws.java.sdk.version}</version>
<exclusions>
<exclusion>
<groupId>software.amazon.awssdk</groupId>
<artifactId>netty-nio-client</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>io.fabric8</groupId>
<artifactId>openshift-client</artifactId>
<version>${fabric8.version}</version>
</dependency>
</dependencies>

<profiles>
Expand All @@ -84,6 +112,9 @@
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<systemPropertyVariables>
<deployment.type>active-active</deployment.type>
</systemPropertyVariables>
<excludedGroups>active-passive</excludedGroups>
</configuration>
</plugin>
Expand All @@ -101,18 +132,13 @@
<excludedGroups>active-active</excludedGroups>
<systemPropertyVariables>
<java.util.logging.manager>org.jboss.logmanager.LogManager</java.util.logging.manager>
<deployment.type>active-passive</deployment.type>
<!--suppress UnresolvedMavenProperty -->
<load-balancer.url>${LOAD_BALANCER_URL}</load-balancer.url>
<!--suppress UnresolvedMavenProperty -->
<keycloak.dc1.url>${KEYCLOAK_DC1_URL}</keycloak.dc1.url>
<!--suppress UnresolvedMavenProperty -->
<keycloak.dc2.url>${KEYCLOAK_DC2_URL}</keycloak.dc2.url>
<!--suppress UnresolvedMavenProperty -->
<infinispan.dc1.url>${ISPN_DC1_URL}</infinispan.dc1.url>
<deployment.namespace>${DEPLOYMENT_NAMESPACE}</deployment.namespace>
<!--suppress UnresolvedMavenProperty -->
<infinispan.dc2.url>${ISPN_DC2_URL}</infinispan.dc2.url>
<kubernetes.1.context>${KUBERNETES_1_CONTEXT}</kubernetes.1.context>
<!--suppress UnresolvedMavenProperty -->
<infinispan.password>${ISPN_PASSWORD}</infinispan.password>
<kubernetes.2.context>${KUBERNETES_2_CONTEXT}</kubernetes.2.context>
</systemPropertyVariables>
</configuration>
</plugin>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ SECRET_MANAGER_REGION="eu-central-1"
MAIN_PASSWORD=$(aws secretsmanager get-secret-value --region $SECRET_MANAGER_REGION --secret-id $KEYCLOAK_MASTER_PASSWORD_SECRET_NAME --query SecretString --output text --no-cli-pager)

MVN_CMD="./mvnw -B -f provision/rosa-cross-dc/keycloak-benchmark-crossdc-tests/pom.xml clean install -DcrossDCTests \
-Dload-balancer.url=$LOAD_BALANCER_URL \
-Dinfinispan.dc1.url=$ISPN_DC1_URL -Dkeycloak.dc1.url=$KEYCLOAK_DC1_URL \
-Dinfinispan.dc2.url=$ISPN_DC2_URL -Dkeycloak.dc2.url=$KEYCLOAK_DC2_URL \
-Dmain.password=$MAIN_PASSWORD"
-Ddeployment.namespace=${DEPLOYMENT_NAMESPACE} \
-Dkubernetes.1.context=${KUBERNETES_1_CONTEXT} \
-Dkubernetes.2.context=${KUBERNETES_2_CONTEXT} \
-Dmain.password=${MAIN_PASSWORD}"

if [ "${ACTIVE_ACTIVE}" == "true" ]; then
MVN_CMD+=" -Pactive-active"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@
import org.keycloak.benchmark.crossdc.client.DatacenterInfo;
import org.keycloak.benchmark.crossdc.client.KeycloakClient;
import org.keycloak.benchmark.crossdc.junit.tags.ActiveActive;
import org.keycloak.benchmark.crossdc.junit.tags.ActivePassive;
import org.keycloak.benchmark.crossdc.util.HttpClientUtils;
import org.keycloak.benchmark.crossdc.util.InfinispanUtils;
import org.keycloak.benchmark.crossdc.util.PropertyUtils;
import org.keycloak.representations.idm.ClientRepresentation;
import org.keycloak.representations.idm.CredentialRepresentation;
import org.keycloak.representations.idm.RealmRepresentation;
Expand Down Expand Up @@ -46,15 +48,14 @@ public abstract class AbstractCrossDCTest {
public static final String CLIENTID = "cross-dc-test-client";
public static final String CLIENT_SECRET = "cross-dc-test-client-secret";
public static final String USERNAME = "cross-dc-test-user";
public static final String MAIN_PASSWORD = System.getProperty("main.password");
public static final String MAIN_PASSWORD = PropertyUtils.getRequired("main.password");

public AbstractCrossDCTest(TestInfo testInfo) {
assertNotNull(MAIN_PASSWORD, "Main password must be set");
public AbstractCrossDCTest() {
var httpClient = HttpClientUtils.newHttpClient();
this.activePassive = !testInfo.getTags().contains(ActiveActive.TAG);
this.DC_1 = new DatacenterInfo(httpClient, System.getProperty("keycloak.dc1.url"), System.getProperty("infinispan.dc1.url"), activePassive);
this.DC_2 = new DatacenterInfo(httpClient, System.getProperty("keycloak.dc2.url"), System.getProperty("infinispan.dc2.url"), activePassive);
this.LOAD_BALANCER_KEYCLOAK = new KeycloakClient(httpClient, System.getProperty("load-balancer.url"), activePassive);
this.activePassive = !System.getProperty("deployment.type", "").equals(ActivePassive.TAG);
this.DC_1 = new DatacenterInfo(httpClient, 1, activePassive);
this.DC_2 = new DatacenterInfo(httpClient, 2, activePassive);
this.LOAD_BALANCER_KEYCLOAK = new KeycloakClient(httpClient, DC_1.getLoadbalancerURL(), activePassive);
}

@BeforeEach
Expand Down Expand Up @@ -154,22 +155,17 @@ public void tearDownTestEnvironment() throws URISyntaxException, IOException, In
});

MOCK_COOKIE_MANAGER.getCookieStore().removeAll();
failbackHealthChecks();
failbackLoadBalancers();
}

@AfterAll
public void tearDown() throws URISyntaxException, IOException, InterruptedException {
failbackHealthChecks();
failbackLoadBalancers();
}

private void failbackHealthChecks() throws URISyntaxException, IOException, InterruptedException {
protected void failbackLoadBalancers() throws URISyntaxException, IOException, InterruptedException {
DC_1.kc().markLBCheckUp();
DC_2.kc().markLBCheckUp();
if (activePassive) {
String domain = DC_1.getKeycloakServerURL().substring("https://".length());
AWSClient.updateRoute53HealthCheckPath(domain, "/lb-check");
DC_1.kc().waitToBeActive(LOAD_BALANCER_KEYCLOAK);
}
}

protected void assertCacheSize(String cache, int size) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,11 @@
import static org.keycloak.benchmark.crossdc.util.KeycloakUtils.getCreatedId;

import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.TestInfo;
import org.keycloak.admin.client.resource.UsersResource;
import org.keycloak.representations.idm.UserRepresentation;

public class EntityReplicationTest extends AbstractCrossDCTest {

public EntityReplicationTest(TestInfo testInfo) {
super(testInfo);
}

@Test
public void keycloakEntityReplicationOverCacheTest() {
UsersResource dc1Users = DC_1.kc().adminClient().realm(REALM_NAME).users();
Expand Down
Original file line number Diff line number Diff line change
@@ -1,21 +1,52 @@
package org.keycloak.benchmark.crossdc;

import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.TestInfo;
import org.keycloak.benchmark.crossdc.junit.tags.ActivePassive;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.fail;
import static org.keycloak.benchmark.crossdc.util.InfinispanUtils.SESSIONS;

import java.io.IOException;
import java.net.URISyntaxException;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;

import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.keycloak.benchmark.crossdc.util.InfinispanUtils.SESSIONS;
import org.junit.jupiter.api.Test;
import org.keycloak.benchmark.crossdc.client.AWSClient;
import org.keycloak.benchmark.crossdc.client.DatacenterInfo;
import org.keycloak.benchmark.crossdc.junit.tags.ActiveActive;
import org.keycloak.benchmark.crossdc.junit.tags.ActivePassive;

import io.fabric8.kubernetes.api.model.events.v1.Event;
import io.fabric8.kubernetes.client.KubernetesClient;
import io.fabric8.kubernetes.client.Watch;
import io.fabric8.kubernetes.client.Watcher;
import io.fabric8.kubernetes.client.WatcherException;

public class FailoverTest extends AbstractCrossDCTest {

public FailoverTest(TestInfo testInfo) {
super(testInfo);
static final String OPERATORS_NS = "openshift-operators";

@Override
protected void failbackLoadBalancers() throws URISyntaxException, IOException, InterruptedException {
if (activePassive) {
String domain = DC_1.getKeycloakServerURL().substring("https://".length());
AWSClient.updateRoute53HealthCheckPath(domain, "/lb-check");
DC_1.kc().waitToBeActive(LOAD_BALANCER_KEYCLOAK);
} else {
scaleUpGossipRouter(DC_1);
scaleUpGossipRouter(DC_2);
// Wait for JGroups site view to contain all sites
int count = 0;
double status;
while ((status = DC_1.ispn().getMetric("vendor_jgroups_site_view_status")) != 1.0) {
if (++count == 30) throw new IllegalStateException("Timedout waiting for cross-site view to reform");
Thread.sleep(1000);
}
AWSClient.acceleratorFallback(LOAD_BALANCER_KEYCLOAK.getKeycloakServerUrl());
}
super.failbackLoadBalancers();
}

@Test
Expand All @@ -38,4 +69,74 @@ public void logoutUserWithFailoverTest() throws IOException, URISyntaxException,

LOAD_BALANCER_KEYCLOAK.refreshToken(REALM_NAME, (String) tokensMap.get("refresh_token"), CLIENTID, CLIENT_SECRET, 400);
}

@Test
@ActiveActive
public void ensureAcceleratorUpdatedOnSplitBrainTest() throws Exception {
// Assert that both sites are part of the Accelerator EndpointGroup
var endpoints = AWSClient.getAcceleratorEndpoints(DC_1.getLoadbalancerURL());
assertEquals(2, endpoints.size());

// Trigger a split-brain by scaling down the GossipRouter in both sites
scaleDownGossipRouter(DC_1);
scaleDownGossipRouter(DC_2);

// Assert SiteOffline event is triggered on both clusters
CountDownLatch countDownLatch = new CountDownLatch(2);
try (Watch ignore = watchSiteOffline(DC_1, countDownLatch);
Watch ignore2 = watchSiteOffline(DC_2, countDownLatch)) {
assertTrue(countDownLatch.await(1, TimeUnit.MINUTES));
}

// Assert that the AWS Lambda was executed and that only one site LB remains in the Accelerator EndpointGroup
endpoints = AWSClient.getAcceleratorEndpoints(DC_1.getLoadbalancerURL());
assertEquals(1, endpoints.size());
}

private Watch watchSiteOffline(DatacenterInfo datacenter, CountDownLatch latch) {
return datacenter.oc()
.events()
.v1()
.events()
.inNamespace(datacenter.namespace())
.withLabel("accelerator", datacenter.getLoadbalancerURL().substring("https://".length()))
.watch(new Watcher<>() {
@Override
public void eventReceived(Action action, Event resource) {
assertEquals(Action.ADDED, action);
if (resource.getReason().equals("SiteOffline"))
latch.countDown();
}

@Override
public void onClose(WatcherException cause) {
}
});
}

private void scaleDownGossipRouter(DatacenterInfo datacenter) throws InterruptedException {
var oc = datacenter.oc();
scaleDeployment(oc, "infinispan-operator-controller-manager", OPERATORS_NS, 0);
scaleDeployment(oc, "infinispan-router", datacenter.namespace(), 0);
}

private void scaleUpGossipRouter(DatacenterInfo datacenter) throws InterruptedException {
var oc = datacenter.oc();
scaleDeployment(oc, "infinispan-operator-controller-manager", OPERATORS_NS, 1);
scaleDeployment(oc, "infinispan-router", datacenter.namespace(), 1);
}

private void scaleDeployment(KubernetesClient k8s, String name, String namespace, int replicas) throws InterruptedException {
k8s.apps()
.deployments()
.inNamespace(namespace)
.withName(name)
.scale(replicas);

k8s.apps()
.deployments()
.inNamespace(namespace)
.withName(name)
.waitUntilReady(30, TimeUnit.SECONDS);
}
}
Loading

0 comments on commit fae67a8

Please sign in to comment.