From 43b9c5c77f87654e6890075f923e2b72c33ba06b Mon Sep 17 00:00:00 2001 From: Michal Hajas Date: Wed, 12 Jun 2024 22:12:35 +0200 Subject: [PATCH] Fix FailoverTest intermittent failures Closes #812 Signed-off-by: Michal Hajas --- .../benchmark/crossdc/FailoverTest.java | 19 ++++++- .../benchmark/crossdc/client/AWSClient.java | 51 ++++++++++++++----- .../crossdc/client/DatacenterInfo.java | 5 ++ 3 files changed, 60 insertions(+), 15 deletions(-) diff --git a/provision/rosa-cross-dc/keycloak-benchmark-crossdc-tests/src/test/java/org/keycloak/benchmark/crossdc/FailoverTest.java b/provision/rosa-cross-dc/keycloak-benchmark-crossdc-tests/src/test/java/org/keycloak/benchmark/crossdc/FailoverTest.java index 9594810d..1dbd69da 100644 --- a/provision/rosa-cross-dc/keycloak-benchmark-crossdc-tests/src/test/java/org/keycloak/benchmark/crossdc/FailoverTest.java +++ b/provision/rosa-cross-dc/keycloak-benchmark-crossdc-tests/src/test/java/org/keycloak/benchmark/crossdc/FailoverTest.java @@ -18,6 +18,7 @@ import org.keycloak.benchmark.crossdc.junit.tags.ActivePassive; import io.fabric8.kubernetes.client.KubernetesClient; +import software.amazon.awssdk.services.cloudwatch.model.StateValue; public class FailoverTest extends AbstractCrossDCTest { @@ -28,8 +29,14 @@ protected void failbackLoadBalancers() throws URISyntaxException, IOException, I super.failbackLoadBalancers(); if (activePassive) { String domain = DC_1.getKeycloakServerURL().substring("https://".length()); - AWSClient.updateRoute53HealthCheckPath(domain, "/lb-check"); + String healthCheckId = AWSClient.getHealthCheckId(domain); + AWSClient.updateRoute53HealthCheckPath(healthCheckId, "/lb-check"); + AWSClient.waitForTheHealthCheckToBeInState(healthCheckId, StateValue.OK); DC_1.kc().waitToBeActive(LOAD_BALANCER_KEYCLOAK); + + // Assert that the health check path was updated + String route53HealthCheckPath = AWSClient.getRoute53HealthCheckPath(healthCheckId); + assertTrue(route53HealthCheckPath.endsWith("/lb-check"), "Health check path was supposed to end with /lb-check but was " + route53HealthCheckPath); } else { // Heal split-brain if previously initiated scaleUpGossipRouter(DC_1); @@ -49,6 +56,16 @@ public void logoutUserWithFailoverTest() throws IOException, URISyntaxException, Map tokensMap = LOAD_BALANCER_KEYCLOAK.exchangeCode(REALM_NAME, CLIENTID, CLIENT_SECRET, 200, code); DC_1.kc().markLBCheckDown(); + + // It seems in some cases the alarm is triggered later than the actual failover happens and the test passes + // so quickly that the alarm is still on OK state in the failbackLoadBalancers method which is causing failures + // in the following tests, therefore we will wait for the health check to be in ALARM state before proceeding + String healthCheckId = AWSClient.getHealthCheckId(DC_1.getKeycloakServerURL().substring("https://".length())); + AWSClient.waitForTheHealthCheckToBeInState(healthCheckId, StateValue.ALARM); + String route53HealthCheckPath = AWSClient.getRoute53HealthCheckPath(healthCheckId); + + // Check the failover lambda was executed and the health check path was updated to a non-existing url + assertTrue(route53HealthCheckPath.endsWith("/lb-check-failed-over"), "Health check path was supposed to end with /lb-check-failed-over but was " + route53HealthCheckPath); DC_2.kc().waitToBeActive(LOAD_BALANCER_KEYCLOAK); // Verify if the user session UUID in code, we fetched from Keycloak exists in session cache key of external ISPN in DC2 diff --git a/provision/rosa-cross-dc/keycloak-benchmark-crossdc-tests/src/test/java/org/keycloak/benchmark/crossdc/client/AWSClient.java b/provision/rosa-cross-dc/keycloak-benchmark-crossdc-tests/src/test/java/org/keycloak/benchmark/crossdc/client/AWSClient.java index dc012fba..551513c4 100644 --- a/provision/rosa-cross-dc/keycloak-benchmark-crossdc-tests/src/test/java/org/keycloak/benchmark/crossdc/client/AWSClient.java +++ b/provision/rosa-cross-dc/keycloak-benchmark-crossdc-tests/src/test/java/org/keycloak/benchmark/crossdc/client/AWSClient.java @@ -24,6 +24,7 @@ import software.amazon.awssdk.services.globalaccelerator.model.EndpointDescription; import software.amazon.awssdk.services.globalaccelerator.model.EndpointGroup; import software.amazon.awssdk.services.route53.Route53Client; +import software.amazon.awssdk.services.route53.model.GetHealthCheckRequest; import software.amazon.awssdk.services.route53.model.HealthCheck; import software.amazon.awssdk.services.route53.model.UpdateHealthCheckRequest; import software.amazon.awssdk.utils.builder.SdkBuilder; @@ -32,34 +33,56 @@ public class AWSClient { private static final Logger LOG = Logger.getLogger(AWSClient.class); - public static void updateRoute53HealthCheckPath(String domainName, String path) { + public static String getHealthCheckId(String domainName) { try (SdkHttpClient httpClient = ApacheHttpClient.builder().build(); - Route53Client route53 = Route53Client.builder().httpClient(httpClient).build(); - CloudWatchClient cloudWatch = CloudWatchClient.builder().region(Region.US_EAST_1).httpClient(httpClient).build()) { + Route53Client route53 = Route53Client.builder().httpClient(httpClient).build()) { + for (HealthCheck hc : route53.listHealthChecks(SdkBuilder::build).healthChecks()) { + if (domainName.equals(hc.healthCheckConfig().fullyQualifiedDomainName())) { + LOG.infof("Found Route53 HealthCheck '%s' for Domain='%s'", hc.id(), domainName); + return hc.id(); + } + } + } + return null; + } - String healthCheckId = null; - for (HealthCheck hc : route53.listHealthChecks(SdkBuilder::build).healthChecks()) { - if (domainName.equals(hc.healthCheckConfig().fullyQualifiedDomainName())) { - healthCheckId = hc.id(); - break; - } - } - LOG.infof("Updating Route53 HealthCheck '%s' for Domain='%s' to path='%s'", healthCheckId, domainName, path); + public static void updateRoute53HealthCheckPath(String healthCheckId, String path) { + try (SdkHttpClient httpClient = ApacheHttpClient.builder().build(); + Route53Client route53 = Route53Client.builder().httpClient(httpClient).build()) { + + LOG.infof("Updating Route53 HealthCheck '%s' to path='%s'", healthCheckId, path); route53.updateHealthCheck( UpdateHealthCheckRequest.builder() .healthCheckId(healthCheckId) .resourcePath(path) .build() ); + } + } - // Wait for the HealthCheck Alarm to be in the OK state - LOG.infof("Waiting for CloudWatch Alarm '%s' to be in state OK", healthCheckId); + public static String getRoute53HealthCheckPath(String healthCheckId) { + try (SdkHttpClient httpClient = ApacheHttpClient.builder().build(); + Route53Client route53 = Route53Client.builder().httpClient(httpClient).build()) { + + return route53.getHealthCheck( + GetHealthCheckRequest.builder() + .healthCheckId(healthCheckId) + .build() + ).healthCheck().healthCheckConfig().resourcePath(); + } + } + + public static void waitForTheHealthCheckToBeInState(String healthCheckId, StateValue stateValue) { + try (SdkHttpClient httpClient = ApacheHttpClient.builder().build(); + CloudWatchClient cloudWatch = CloudWatchClient.builder().region(Region.US_EAST_1).httpClient(httpClient).build()) { + LOG.infof("Waiting for CloudWatch Alarm '%s' to be in state %s", healthCheckId, stateValue); cloudWatch.waiter().waitUntilAlarmExists( DescribeAlarmsRequest.builder() .alarmNames(healthCheckId) - .stateValue(StateValue.OK) + .stateValue(stateValue) .build(), WaiterOverrideConfiguration.builder() + .maxAttempts(150) // by default this is 40 and it seems it takes precedence before 10 minutes .waitTimeout(Duration.ofMinutes(10)) .build() ); diff --git a/provision/rosa-cross-dc/keycloak-benchmark-crossdc-tests/src/test/java/org/keycloak/benchmark/crossdc/client/DatacenterInfo.java b/provision/rosa-cross-dc/keycloak-benchmark-crossdc-tests/src/test/java/org/keycloak/benchmark/crossdc/client/DatacenterInfo.java index 7af061f2..504c14df 100644 --- a/provision/rosa-cross-dc/keycloak-benchmark-crossdc-tests/src/test/java/org/keycloak/benchmark/crossdc/client/DatacenterInfo.java +++ b/provision/rosa-cross-dc/keycloak-benchmark-crossdc-tests/src/test/java/org/keycloak/benchmark/crossdc/client/DatacenterInfo.java @@ -2,6 +2,7 @@ import java.net.http.HttpClient; +import org.jboss.logging.Logger; import org.keycloak.benchmark.crossdc.AbstractCrossDCTest; import org.keycloak.benchmark.crossdc.util.PropertyUtils; @@ -11,6 +12,8 @@ public class DatacenterInfo implements AutoCloseable { + private static final Logger LOG = Logger.getLogger(DatacenterInfo.class); + private final String namespace; private final String keycloakServerURL; private final String infinispanServerURL; @@ -49,6 +52,8 @@ public DatacenterInfo(HttpClient httpClient, int index, boolean activePassive) { .get(0) .getHostname(); } + + LOG.infof("Keycloak server URL index %d: %s", index, keycloakServerURL); this.loadbalancerURL = getRouteHost("keycloak"); this.keycloak = new KeycloakClient(httpClient, keycloakServerURL, activePassive);