From d5f6ee4800dda3ac7d91e2c3fcda5e1194427682 Mon Sep 17 00:00:00 2001 From: Adrian Cole Date: Fri, 13 Sep 2019 16:15:30 +0800 Subject: [PATCH 1/5] Deletes code no longer needed regarding state checking of ES hosts We no longer need two-tiered checking of ES addresses because single endpoint is special cased to not cause notable overhead. See https://github.com/line/armeria/issues/2071#issuecomment-531139475 --- .../InitialEndpointSupplier.java | 16 +---------- .../elasticsearch/LazyHttpClientImpl.java | 27 ++++++++----------- 2 files changed, 12 insertions(+), 31 deletions(-) diff --git a/zipkin-server/src/main/java/zipkin2/server/internal/elasticsearch/InitialEndpointSupplier.java b/zipkin-server/src/main/java/zipkin2/server/internal/elasticsearch/InitialEndpointSupplier.java index ffa46d60cfe..b37b532c5dd 100644 --- a/zipkin-server/src/main/java/zipkin2/server/internal/elasticsearch/InitialEndpointSupplier.java +++ b/zipkin-server/src/main/java/zipkin2/server/internal/elasticsearch/InitialEndpointSupplier.java @@ -15,13 +15,11 @@ import com.linecorp.armeria.client.Endpoint; import com.linecorp.armeria.client.endpoint.EndpointGroup; -import com.linecorp.armeria.client.endpoint.dns.DnsAddressEndpointGroup; import com.linecorp.armeria.client.endpoint.dns.DnsAddressEndpointGroupBuilder; import com.linecorp.armeria.common.SessionProtocol; import java.net.URI; import java.util.ArrayList; import java.util.List; -import java.util.concurrent.TimeUnit; import java.util.function.Supplier; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -64,25 +62,13 @@ final class InitialEndpointSupplier implements Supplier { // A host that isn't an IP may resolve to multiple IP addresses, so we use a endpoint // group to round-robin over them. Users can mix addresses that resolve to multiple IPs // with single IPs freely, they'll all get used. - endpointGroups.add(resolveDnsAddresses(host, port)); + endpointGroups.add(new DnsAddressEndpointGroupBuilder(host).port(port).build()); } } return EndpointGroup.of(endpointGroups); } - // Rather than result in an empty group. Await DNS resolution as this call is deferred anyway - // TODO: delete the await when https://github.com/line/armeria/issues/2071 is complete - DnsAddressEndpointGroup resolveDnsAddresses(String host, int port) { - DnsAddressEndpointGroup result = new DnsAddressEndpointGroupBuilder(host).port(port).build(); - try { - result.awaitInitialEndpoints(1, TimeUnit.SECONDS); - } catch (Exception e) { - // let it fail later - } - return result; - } - int getPort(URI url) { int port = url.getPort(); if (port == -1) port = sessionProtocol.defaultPort(); diff --git a/zipkin-server/src/main/java/zipkin2/server/internal/elasticsearch/LazyHttpClientImpl.java b/zipkin-server/src/main/java/zipkin2/server/internal/elasticsearch/LazyHttpClientImpl.java index 7663c1c1563..57ce047fecc 100644 --- a/zipkin-server/src/main/java/zipkin2/server/internal/elasticsearch/LazyHttpClientImpl.java +++ b/zipkin-server/src/main/java/zipkin2/server/internal/elasticsearch/LazyHttpClientImpl.java @@ -15,7 +15,6 @@ import com.linecorp.armeria.client.Endpoint; import com.linecorp.armeria.client.HttpClient; -import com.linecorp.armeria.client.endpoint.DynamicEndpointGroup; import com.linecorp.armeria.client.endpoint.EndpointGroup; import com.linecorp.armeria.client.endpoint.EndpointGroupRegistry; import com.linecorp.armeria.client.endpoint.healthcheck.HealthCheckedEndpointGroup; @@ -72,23 +71,19 @@ final class LazyHttpClientImpl implements LazyHttpClient { Endpoint getEndpoint() { EndpointGroup endpointGroup = initialEndpoints.get(); + // don't decorate single endpoints if (endpointGroup instanceof Endpoint) return (Endpoint) endpointGroup; - // https://github.com/line/armeria/issues/2071 Composite endpoint groups don't extend dynamic - // once this is done, we can delete special-cased code in InitialEndpointSupplier about DNS, as - // when endpointGroup.isStatic() is false, we'd here. - if (endpointGroup instanceof DynamicEndpointGroup) { - try { - // Since we aren't holding up server startup, or sitting on the event loop, it is ok to - // block. The alternative is round-robin, which could be unlucky and hit a bad node first. - // - // We are blocking up to the connection timeout which should be enough time for any DNS - // resolution that hasn't happened yet to finish. - endpointGroup.awaitInitialEndpoints(timeoutMillis, TimeUnit.MILLISECONDS); - } catch (Exception e) { - // We'll try again next time around. - throw new IllegalStateException("couldn't connect any of " + endpointGroup.endpoints(), e); - } + try { + // Since we aren't holding up server startup, or sitting on the event loop, it is ok to + // block. The alternative is round-robin, which could be unlucky and hit a bad node first. + // + // We are blocking up to the connection timeout which should be enough time for any DNS + // resolution that hasn't happened yet to finish. + endpointGroup.awaitInitialEndpoints(timeoutMillis, TimeUnit.MILLISECONDS); + } catch (Exception e) { + // We'll try again next time around. + throw new IllegalStateException("couldn't connect any of " + endpointGroup.endpoints(), e); } if (healthCheck.isEnabled()) endpointGroup = decorateHealthCheck(endpointGroup); From a07357065fe8c9dc801638d9d8a90b3b13436970 Mon Sep 17 00:00:00 2001 From: Adrian Cole Date: Fri, 13 Sep 2019 17:39:51 +0800 Subject: [PATCH 2/5] Removes redundant wait --- .../elasticsearch/LazyHttpClientImpl.java | 29 ++++++------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/zipkin-server/src/main/java/zipkin2/server/internal/elasticsearch/LazyHttpClientImpl.java b/zipkin-server/src/main/java/zipkin2/server/internal/elasticsearch/LazyHttpClientImpl.java index 57ce047fecc..17bff30fdd9 100644 --- a/zipkin-server/src/main/java/zipkin2/server/internal/elasticsearch/LazyHttpClientImpl.java +++ b/zipkin-server/src/main/java/zipkin2/server/internal/elasticsearch/LazyHttpClientImpl.java @@ -70,9 +70,12 @@ final class LazyHttpClientImpl implements LazyHttpClient { } Endpoint getEndpoint() { - EndpointGroup endpointGroup = initialEndpoints.get(); - // don't decorate single endpoints - if (endpointGroup instanceof Endpoint) return (Endpoint) endpointGroup; + EndpointGroup rawEndpointGroup = initialEndpoints.get(); + // Only health-check when there are alternative endpoints. There aren't when instanceof Endpoint + if (rawEndpointGroup instanceof Endpoint) return (Endpoint) rawEndpointGroup; + + EndpointGroup endpointGroup = rawEndpointGroup; + if (healthCheck.isEnabled()) endpointGroup = decorateHealthCheck(rawEndpointGroup); try { // Since we aren't holding up server startup, or sitting on the event loop, it is ok to @@ -83,13 +86,11 @@ Endpoint getEndpoint() { endpointGroup.awaitInitialEndpoints(timeoutMillis, TimeUnit.MILLISECONDS); } catch (Exception e) { // We'll try again next time around. - throw new IllegalStateException("couldn't connect any of " + endpointGroup.endpoints(), e); + throw new IllegalStateException("couldn't connect any of " + rawEndpointGroup.endpoints(), e); } - if (healthCheck.isEnabled()) endpointGroup = decorateHealthCheck(endpointGroup); - - // TODO: why must we do this instead of using direct type references. - // The static factory is concerning. https://github.com/line/armeria/issues/1084 + // Currently, there's no alternative to static registries when using Endpoint.ofGroup + // https://github.com/line/armeria/issues/1084 EndpointGroupRegistry.register("elasticsearch", endpointGroup, ROUND_ROBIN); return Endpoint.ofGroup("elasticsearch"); } @@ -115,18 +116,6 @@ HealthCheckedEndpointGroup decorateHealthCheck(EndpointGroup endpointGroup) { .retryInterval(healthCheck.getInterval()) .build(); healthChecked.newMeterBinder("elasticsearch").bindTo(meterRegistry); - - // Since we aren't holding up server startup, or sitting on the event loop, it is ok to block. - // The alternative is round-robin, which could be unlucky and hit a bad node first. - // - // We are blocking up to the connection timeout which should be enough time for a health check - // to respond. - try { - healthChecked.awaitInitialEndpoints(timeoutMillis, TimeUnit.MILLISECONDS); - } catch (Exception e) { - healthChecked.close(); // we'll recreate it the next time around. - throw new IllegalStateException("couldn't connect any of " + endpointGroup.endpoints(), e); - } return healthChecked; } From fadb728d0ecef92627aa5603baf77fddb031ddfc Mon Sep 17 00:00:00 2001 From: Adrian Cole Date: Fri, 13 Sep 2019 17:49:03 +0800 Subject: [PATCH 3/5] linked to https://github.com/line/armeria/issues/2075 --- .../server/internal/elasticsearch/LazyHttpClientImpl.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/zipkin-server/src/main/java/zipkin2/server/internal/elasticsearch/LazyHttpClientImpl.java b/zipkin-server/src/main/java/zipkin2/server/internal/elasticsearch/LazyHttpClientImpl.java index 17bff30fdd9..ad849461a38 100644 --- a/zipkin-server/src/main/java/zipkin2/server/internal/elasticsearch/LazyHttpClientImpl.java +++ b/zipkin-server/src/main/java/zipkin2/server/internal/elasticsearch/LazyHttpClientImpl.java @@ -83,6 +83,9 @@ Endpoint getEndpoint() { // // We are blocking up to the connection timeout which should be enough time for any DNS // resolution that hasn't happened yet to finish. + // + // TODO: When using health checks, if all hosts fail the health check, this will throw TOE + // See https://github.com/line/armeria/issues/2075 endpointGroup.awaitInitialEndpoints(timeoutMillis, TimeUnit.MILLISECONDS); } catch (Exception e) { // We'll try again next time around. From 0a6a24f740180bc257853036642b5b73a1b7c8ea Mon Sep 17 00:00:00 2001 From: Adrian Cole Date: Fri, 13 Sep 2019 17:55:59 +0800 Subject: [PATCH 4/5] close --- .../server/internal/elasticsearch/LazyHttpClientImpl.java | 1 + 1 file changed, 1 insertion(+) diff --git a/zipkin-server/src/main/java/zipkin2/server/internal/elasticsearch/LazyHttpClientImpl.java b/zipkin-server/src/main/java/zipkin2/server/internal/elasticsearch/LazyHttpClientImpl.java index ad849461a38..368df9987d3 100644 --- a/zipkin-server/src/main/java/zipkin2/server/internal/elasticsearch/LazyHttpClientImpl.java +++ b/zipkin-server/src/main/java/zipkin2/server/internal/elasticsearch/LazyHttpClientImpl.java @@ -88,6 +88,7 @@ Endpoint getEndpoint() { // See https://github.com/line/armeria/issues/2075 endpointGroup.awaitInitialEndpoints(timeoutMillis, TimeUnit.MILLISECONDS); } catch (Exception e) { + endpointGroup.close(); // no-op when not health checked // We'll try again next time around. throw new IllegalStateException("couldn't connect any of " + rawEndpointGroup.endpoints(), e); } From eef1d015c0f20f048c257005b8cd1c976212205f Mon Sep 17 00:00:00 2001 From: Adrian Cole Date: Fri, 13 Sep 2019 18:36:49 +0800 Subject: [PATCH 5/5] Handles when there are no endpoints after awaiting, as that's not ok --- .../elasticsearch/LazyHttpClientImpl.java | 29 +++++++++++-------- 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/zipkin-server/src/main/java/zipkin2/server/internal/elasticsearch/LazyHttpClientImpl.java b/zipkin-server/src/main/java/zipkin2/server/internal/elasticsearch/LazyHttpClientImpl.java index 368df9987d3..30749c3d0f8 100644 --- a/zipkin-server/src/main/java/zipkin2/server/internal/elasticsearch/LazyHttpClientImpl.java +++ b/zipkin-server/src/main/java/zipkin2/server/internal/elasticsearch/LazyHttpClientImpl.java @@ -70,32 +70,37 @@ final class LazyHttpClientImpl implements LazyHttpClient { } Endpoint getEndpoint() { - EndpointGroup rawEndpointGroup = initialEndpoints.get(); + EndpointGroup initial = initialEndpoints.get(); // Only health-check when there are alternative endpoints. There aren't when instanceof Endpoint - if (rawEndpointGroup instanceof Endpoint) return (Endpoint) rawEndpointGroup; + if (initial instanceof Endpoint) return (Endpoint) initial; - EndpointGroup endpointGroup = rawEndpointGroup; - if (healthCheck.isEnabled()) endpointGroup = decorateHealthCheck(rawEndpointGroup); + // Wrap the result when health checking is enabled. + EndpointGroup result = initial; + if (healthCheck.isEnabled()) result = decorateHealthCheck(initial); + boolean empty = true; + Exception thrown = null; try { // Since we aren't holding up server startup, or sitting on the event loop, it is ok to // block. The alternative is round-robin, which could be unlucky and hit a bad node first. // // We are blocking up to the connection timeout which should be enough time for any DNS // resolution that hasn't happened yet to finish. - // - // TODO: When using health checks, if all hosts fail the health check, this will throw TOE - // See https://github.com/line/armeria/issues/2075 - endpointGroup.awaitInitialEndpoints(timeoutMillis, TimeUnit.MILLISECONDS); + empty = result.awaitInitialEndpoints(timeoutMillis, TimeUnit.MILLISECONDS).isEmpty(); } catch (Exception e) { - endpointGroup.close(); // no-op when not health checked - // We'll try again next time around. - throw new IllegalStateException("couldn't connect any of " + rawEndpointGroup.endpoints(), e); + thrown = e; + } + + // If health-checking is enabled, we can end up with no endpoints after waiting + // TODO: test this results in no cause following https://github.com/line/armeria/pull/2074 + if (empty) { + result.close(); // no-op when not health checked + throw new IllegalStateException("couldn't connect any of " + initial.endpoints(), thrown); } // Currently, there's no alternative to static registries when using Endpoint.ofGroup // https://github.com/line/armeria/issues/1084 - EndpointGroupRegistry.register("elasticsearch", endpointGroup, ROUND_ROBIN); + EndpointGroupRegistry.register("elasticsearch", result, ROUND_ROBIN); return Endpoint.ofGroup("elasticsearch"); }