Skip to content

Commit

Permalink
Fix FailoverTest intermittent failures
Browse files Browse the repository at this point in the history
Closes #812

Signed-off-by: Michal Hajas <mhajas@redhat.com>
  • Loading branch information
mhajas committed Jun 12, 2024
1 parent 8d5df10 commit 43b9c5c
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import org.keycloak.benchmark.crossdc.junit.tags.ActivePassive;

import io.fabric8.kubernetes.client.KubernetesClient;
import software.amazon.awssdk.services.cloudwatch.model.StateValue;

public class FailoverTest extends AbstractCrossDCTest {

Expand All @@ -28,8 +29,14 @@ protected void failbackLoadBalancers() throws URISyntaxException, IOException, I
super.failbackLoadBalancers();
if (activePassive) {
String domain = DC_1.getKeycloakServerURL().substring("https://".length());
AWSClient.updateRoute53HealthCheckPath(domain, "/lb-check");
String healthCheckId = AWSClient.getHealthCheckId(domain);
AWSClient.updateRoute53HealthCheckPath(healthCheckId, "/lb-check");
AWSClient.waitForTheHealthCheckToBeInState(healthCheckId, StateValue.OK);
DC_1.kc().waitToBeActive(LOAD_BALANCER_KEYCLOAK);

// Assert that the health check path was updated
String route53HealthCheckPath = AWSClient.getRoute53HealthCheckPath(healthCheckId);
assertTrue(route53HealthCheckPath.endsWith("/lb-check"), "Health check path was supposed to end with /lb-check but was " + route53HealthCheckPath);
} else {
// Heal split-brain if previously initiated
scaleUpGossipRouter(DC_1);
Expand All @@ -49,6 +56,16 @@ public void logoutUserWithFailoverTest() throws IOException, URISyntaxException,
Map<String, Object> tokensMap = LOAD_BALANCER_KEYCLOAK.exchangeCode(REALM_NAME, CLIENTID, CLIENT_SECRET, 200, code);

DC_1.kc().markLBCheckDown();

// It seems in some cases the alarm is triggered later than the actual failover happens and the test passes
// so quickly that the alarm is still on OK state in the failbackLoadBalancers method which is causing failures
// in the following tests, therefore we will wait for the health check to be in ALARM state before proceeding
String healthCheckId = AWSClient.getHealthCheckId(DC_1.getKeycloakServerURL().substring("https://".length()));
AWSClient.waitForTheHealthCheckToBeInState(healthCheckId, StateValue.ALARM);
String route53HealthCheckPath = AWSClient.getRoute53HealthCheckPath(healthCheckId);

// Check the failover lambda was executed and the health check path was updated to a non-existing url
assertTrue(route53HealthCheckPath.endsWith("/lb-check-failed-over"), "Health check path was supposed to end with /lb-check-failed-over but was " + route53HealthCheckPath);
DC_2.kc().waitToBeActive(LOAD_BALANCER_KEYCLOAK);

// Verify if the user session UUID in code, we fetched from Keycloak exists in session cache key of external ISPN in DC2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import software.amazon.awssdk.services.globalaccelerator.model.EndpointDescription;
import software.amazon.awssdk.services.globalaccelerator.model.EndpointGroup;
import software.amazon.awssdk.services.route53.Route53Client;
import software.amazon.awssdk.services.route53.model.GetHealthCheckRequest;
import software.amazon.awssdk.services.route53.model.HealthCheck;
import software.amazon.awssdk.services.route53.model.UpdateHealthCheckRequest;
import software.amazon.awssdk.utils.builder.SdkBuilder;
Expand All @@ -32,34 +33,56 @@ public class AWSClient {

private static final Logger LOG = Logger.getLogger(AWSClient.class);

public static void updateRoute53HealthCheckPath(String domainName, String path) {
public static String getHealthCheckId(String domainName) {
try (SdkHttpClient httpClient = ApacheHttpClient.builder().build();
Route53Client route53 = Route53Client.builder().httpClient(httpClient).build();
CloudWatchClient cloudWatch = CloudWatchClient.builder().region(Region.US_EAST_1).httpClient(httpClient).build()) {
Route53Client route53 = Route53Client.builder().httpClient(httpClient).build()) {
for (HealthCheck hc : route53.listHealthChecks(SdkBuilder::build).healthChecks()) {
if (domainName.equals(hc.healthCheckConfig().fullyQualifiedDomainName())) {
LOG.infof("Found Route53 HealthCheck '%s' for Domain='%s'", hc.id(), domainName);
return hc.id();
}
}
}
return null;
}

String healthCheckId = null;
for (HealthCheck hc : route53.listHealthChecks(SdkBuilder::build).healthChecks()) {
if (domainName.equals(hc.healthCheckConfig().fullyQualifiedDomainName())) {
healthCheckId = hc.id();
break;
}
}
LOG.infof("Updating Route53 HealthCheck '%s' for Domain='%s' to path='%s'", healthCheckId, domainName, path);
public static void updateRoute53HealthCheckPath(String healthCheckId, String path) {
try (SdkHttpClient httpClient = ApacheHttpClient.builder().build();
Route53Client route53 = Route53Client.builder().httpClient(httpClient).build()) {

LOG.infof("Updating Route53 HealthCheck '%s' to path='%s'", healthCheckId, path);
route53.updateHealthCheck(
UpdateHealthCheckRequest.builder()
.healthCheckId(healthCheckId)
.resourcePath(path)
.build()
);
}
}

// Wait for the HealthCheck Alarm to be in the OK state
LOG.infof("Waiting for CloudWatch Alarm '%s' to be in state OK", healthCheckId);
public static String getRoute53HealthCheckPath(String healthCheckId) {
try (SdkHttpClient httpClient = ApacheHttpClient.builder().build();
Route53Client route53 = Route53Client.builder().httpClient(httpClient).build()) {

return route53.getHealthCheck(
GetHealthCheckRequest.builder()
.healthCheckId(healthCheckId)
.build()
).healthCheck().healthCheckConfig().resourcePath();
}
}

public static void waitForTheHealthCheckToBeInState(String healthCheckId, StateValue stateValue) {
try (SdkHttpClient httpClient = ApacheHttpClient.builder().build();
CloudWatchClient cloudWatch = CloudWatchClient.builder().region(Region.US_EAST_1).httpClient(httpClient).build()) {
LOG.infof("Waiting for CloudWatch Alarm '%s' to be in state %s", healthCheckId, stateValue);
cloudWatch.waiter().waitUntilAlarmExists(
DescribeAlarmsRequest.builder()
.alarmNames(healthCheckId)
.stateValue(StateValue.OK)
.stateValue(stateValue)
.build(),
WaiterOverrideConfiguration.builder()
.maxAttempts(150) // by default this is 40 and it seems it takes precedence before 10 minutes
.waitTimeout(Duration.ofMinutes(10))
.build()
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import java.net.http.HttpClient;

import org.jboss.logging.Logger;
import org.keycloak.benchmark.crossdc.AbstractCrossDCTest;
import org.keycloak.benchmark.crossdc.util.PropertyUtils;

Expand All @@ -11,6 +12,8 @@

public class DatacenterInfo implements AutoCloseable {

private static final Logger LOG = Logger.getLogger(DatacenterInfo.class);

private final String namespace;
private final String keycloakServerURL;
private final String infinispanServerURL;
Expand Down Expand Up @@ -49,6 +52,8 @@ public DatacenterInfo(HttpClient httpClient, int index, boolean activePassive) {
.get(0)
.getHostname();
}

LOG.infof("Keycloak server URL index %d: %s", index, keycloakServerURL);
this.loadbalancerURL = getRouteHost("keycloak");

this.keycloak = new KeycloakClient(httpClient, keycloakServerURL, activePassive);
Expand Down

0 comments on commit 43b9c5c

Please sign in to comment.