fix(kubernetes): Improve failure mode for unreachable cluster (#3770)

* fix(kubernetes): Improve failure mode for unreachable cluster We currently cache any call to get the namespaces for an account with an expiry time of 30 s using a memoized supplier. When a cluster is unreachable, the call to get the cluster's namespaces will hang and eventually time out; we then log a warning and return an empty array of namesapces. If the call to kubectl returns an error, we don't cache the empty list return value, so every call to get namespaces will call kubectl. This leads to a bad failure mode where a slow/unresponsive cluster leads to more calls than a fast/responsive cluster. To address this, when a call to get namespaces returns an error, cache the empty list we're returning for the same amount of time as a successful call. * fix(kubernetes): Use custom memoizer for kubectl calls We're currently using a guava memoized supplier for calls to get namespaces and crds in a cluster, with an expiration time of 30s. The way the guava memoizer works is to record the timestamp at the time it starts executing the supplier function rather than when the function completes. This means that if the function to get namespaces takes more than 30s, we never get a cache hit at all because the entry has expired by the time it is added to the cache. This leads to cases where the cache is least effective when it is most necessary. Instead write a small Memoizer class that wraps a caffeine cache, as caffeine caches mark cache entries at the time of insertion to the cache (after the work is finished) rather than when the work starts. Use this for caching kubectl calls instead of the guava cache.
spinnaker · Jun 10, 2019 · 637fc13 · 637fc13
1 parent 654c18f
commit 637fc13
Show file tree

Hide file tree

Showing 2 changed files with 42 additions and 15 deletions.
diff --git a/clouddriver-kubernetes/clouddriver-kubernetes.gradle b/clouddriver-kubernetes/clouddriver-kubernetes.gradle
@@ -22,6 +22,7 @@ dependencies {
   implementation "org.springframework.boot:spring-boot-actuator"
   implementation "org.springframework.boot:spring-boot-starter-web"
   implementation 'com.jayway.jsonpath:json-path:2.3.0'
+  implementation "com.github.ben-manes.caffeine:guava"
 
   testImplementation "cglib:cglib-nodep"
   testImplementation "org.objenesis:objenesis"

diff --git a/...ovy/com/netflix/spinnaker/clouddriver/kubernetes/v2/security/KubernetesV2Credentials.java b/...ovy/com/netflix/spinnaker/clouddriver/kubernetes/v2/security/KubernetesV2Credentials.java
@@ -18,7 +18,8 @@
 package com.netflix.spinnaker.clouddriver.kubernetes.v2.security;
 
 import com.fasterxml.jackson.annotation.JsonIgnore;
-import com.google.common.base.Suppliers;
+import com.github.benmanes.caffeine.cache.Caffeine;
+import com.github.benmanes.caffeine.cache.LoadingCache;
 import com.netflix.spectator.api.Clock;
 import com.netflix.spectator.api.Registry;
 import com.netflix.spinnaker.clouddriver.kubernetes.config.CustomKubernetesResource;
@@ -90,8 +91,8 @@ public class KubernetesV2Credentials implements KubernetesCredentials {
   @Getter private final boolean debug;
 
   private String cachedDefaultNamespace;
-  private final com.google.common.base.Supplier<List<String>> liveNamespaceSupplier;
-  private final com.google.common.base.Supplier<List<KubernetesKind>> liveCrdSupplier;
+  private final Supplier<List<String>> liveNamespaceSupplier;
+  private final Supplier<List<KubernetesKind>> liveCrdSupplier;
 
   public KubernetesV2Credentials(
       Registry registry,
@@ -131,22 +132,29 @@ public KubernetesV2Credentials(
     this.debug = managedAccount.getDebug();
 
     this.liveNamespaceSupplier =
-        Suppliers.memoizeWithExpiration(
-            () ->
-                jobExecutor
+        Memoizer.memoizeWithExpiration(
+            () -> {
+              try {
+                return jobExecutor
                     .list(
                         this,
                         Collections.singletonList(KubernetesKind.NAMESPACE),
                         "",
                         new KubernetesSelectorList())
                     .stream()
                     .map(KubernetesManifest::getName)
-                    .collect(Collectors.toList()),
+                    .collect(Collectors.toList());
+              } catch (KubectlException e) {
+                log.error(
+                    "Could not list namespaces for account {}: {}", accountName, e.getMessage());
+                return new ArrayList<>();
+              }
+            },
             NAMESPACE_EXPIRY_SECONDS,
             TimeUnit.SECONDS);
 
     this.liveCrdSupplier =
-        Suppliers.memoizeWithExpiration(
+        Memoizer.memoizeWithExpiration(
             () -> {
               try {
                 return this.list(KubernetesKind.CUSTOM_RESOURCE_DEFINITION, "").stream()
@@ -177,6 +185,30 @@ public KubernetesV2Credentials(
             TimeUnit.SECONDS);
   }
 
+  /**
+   * Thin wrapper around a Caffeine cache that handles memoizing a supplier function with expiration
+   */
+  private static class Memoizer<T> implements Supplier<T> {
+    private static String CACHE_KEY = "key";
+    LoadingCache<String, T> cache;
+
+    private Memoizer(Supplier<T> supplier, long expirySeconds, TimeUnit timeUnit) {
+      this.cache =
+          Caffeine.newBuilder()
+              .expireAfterWrite(expirySeconds, timeUnit)
+              .build(key -> supplier.get());
+    }
+
+    public T get() {
+      return cache.get(CACHE_KEY);
+    }
+
+    public static <U> Memoizer<U> memoizeWithExpiration(
+        Supplier<U> supplier, long expirySeconds, TimeUnit timeUnit) {
+      return new Memoizer<>(supplier, expirySeconds, timeUnit);
+    }
+  }
+
   public enum InvalidKindReason {
     KIND_NONE("Kind [%s] is invalid"),
     EXPLICITLY_OMITTED_BY_CONFIGURATION(
@@ -273,13 +305,7 @@ public List<String> getDeclaredNamespaces() {
     if (!namespaces.isEmpty()) {
       result = namespaces;
     } else {
-      try {
-        result = liveNamespaceSupplier.get();
-
-      } catch (KubectlException e) {
-        log.warn("Could not list namespaces for account {}: {}", accountName, e.getMessage());
-        return new ArrayList<>();
-      }
+      result = liveNamespaceSupplier.get();
     }
 
     if (!omitNamespaces.isEmpty()) {