Skip to content

Commit

Permalink
fix(kubernetes): Improve failure mode for unreachable cluster (#3770)
Browse files Browse the repository at this point in the history
* fix(kubernetes): Improve failure mode for unreachable cluster

We currently cache any call to get the namespaces for an account
with an expiry time of 30 s using a memoized supplier.

When a cluster is unreachable, the call to get the cluster's
namespaces will hang and eventually time out; we then log a
warning and return an empty array of namesapces.

If the call to kubectl returns an error, we don't cache the empty
list return value, so every call to get namespaces will call
kubectl.  This leads to a bad failure mode where a slow/unresponsive
cluster leads to more calls than a fast/responsive cluster.

To address this, when a call to get namespaces returns an error,
cache the empty list we're returning for the same amount of time
as a successful call.

* fix(kubernetes): Use custom memoizer for kubectl calls

We're currently using a guava memoized supplier for calls to
get namespaces and crds in a cluster, with an expiration time of
30s.

The way the guava memoizer works is to record the timestamp at the
time it starts executing the supplier function rather than when
the function completes.  This means that if the function to get
namespaces takes more than 30s, we never get a cache hit at all
because the entry has expired by the time it is added to the
cache. This leads to cases where the cache is least effective when
it is most necessary.

Instead write a small Memoizer class that wraps a caffeine cache,
as caffeine caches mark cache entries at the time of insertion to
the cache (after the work is finished) rather than when the work
starts. Use this for caching kubectl calls instead of the guava
cache.
  • Loading branch information
ezimanyi committed Jun 10, 2019
1 parent 654c18f commit 637fc13
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 15 deletions.
1 change: 1 addition & 0 deletions clouddriver-kubernetes/clouddriver-kubernetes.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ dependencies {
implementation "org.springframework.boot:spring-boot-actuator"
implementation "org.springframework.boot:spring-boot-starter-web"
implementation 'com.jayway.jsonpath:json-path:2.3.0'
implementation "com.github.ben-manes.caffeine:guava"

testImplementation "cglib:cglib-nodep"
testImplementation "org.objenesis:objenesis"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
package com.netflix.spinnaker.clouddriver.kubernetes.v2.security;

import com.fasterxml.jackson.annotation.JsonIgnore;
import com.google.common.base.Suppliers;
import com.github.benmanes.caffeine.cache.Caffeine;
import com.github.benmanes.caffeine.cache.LoadingCache;
import com.netflix.spectator.api.Clock;
import com.netflix.spectator.api.Registry;
import com.netflix.spinnaker.clouddriver.kubernetes.config.CustomKubernetesResource;
Expand Down Expand Up @@ -90,8 +91,8 @@ public class KubernetesV2Credentials implements KubernetesCredentials {
@Getter private final boolean debug;

private String cachedDefaultNamespace;
private final com.google.common.base.Supplier<List<String>> liveNamespaceSupplier;
private final com.google.common.base.Supplier<List<KubernetesKind>> liveCrdSupplier;
private final Supplier<List<String>> liveNamespaceSupplier;
private final Supplier<List<KubernetesKind>> liveCrdSupplier;

public KubernetesV2Credentials(
Registry registry,
Expand Down Expand Up @@ -131,22 +132,29 @@ public KubernetesV2Credentials(
this.debug = managedAccount.getDebug();

this.liveNamespaceSupplier =
Suppliers.memoizeWithExpiration(
() ->
jobExecutor
Memoizer.memoizeWithExpiration(
() -> {
try {
return jobExecutor
.list(
this,
Collections.singletonList(KubernetesKind.NAMESPACE),
"",
new KubernetesSelectorList())
.stream()
.map(KubernetesManifest::getName)
.collect(Collectors.toList()),
.collect(Collectors.toList());
} catch (KubectlException e) {
log.error(
"Could not list namespaces for account {}: {}", accountName, e.getMessage());
return new ArrayList<>();
}
},
NAMESPACE_EXPIRY_SECONDS,
TimeUnit.SECONDS);

this.liveCrdSupplier =
Suppliers.memoizeWithExpiration(
Memoizer.memoizeWithExpiration(
() -> {
try {
return this.list(KubernetesKind.CUSTOM_RESOURCE_DEFINITION, "").stream()
Expand Down Expand Up @@ -177,6 +185,30 @@ public KubernetesV2Credentials(
TimeUnit.SECONDS);
}

/**
* Thin wrapper around a Caffeine cache that handles memoizing a supplier function with expiration
*/
private static class Memoizer<T> implements Supplier<T> {
private static String CACHE_KEY = "key";
LoadingCache<String, T> cache;

private Memoizer(Supplier<T> supplier, long expirySeconds, TimeUnit timeUnit) {
this.cache =
Caffeine.newBuilder()
.expireAfterWrite(expirySeconds, timeUnit)
.build(key -> supplier.get());
}

public T get() {
return cache.get(CACHE_KEY);
}

public static <U> Memoizer<U> memoizeWithExpiration(
Supplier<U> supplier, long expirySeconds, TimeUnit timeUnit) {
return new Memoizer<>(supplier, expirySeconds, timeUnit);
}
}

public enum InvalidKindReason {
KIND_NONE("Kind [%s] is invalid"),
EXPLICITLY_OMITTED_BY_CONFIGURATION(
Expand Down Expand Up @@ -273,13 +305,7 @@ public List<String> getDeclaredNamespaces() {
if (!namespaces.isEmpty()) {
result = namespaces;
} else {
try {
result = liveNamespaceSupplier.get();

} catch (KubectlException e) {
log.warn("Could not list namespaces for account {}: {}", accountName, e.getMessage());
return new ArrayList<>();
}
result = liveNamespaceSupplier.get();
}

if (!omitNamespaces.isEmpty()) {
Expand Down

0 comments on commit 637fc13

Please sign in to comment.