Skip to content

Commit

Permalink
perf(kubernetes): Defer checking valid kinds until after startup (#4005)
Browse files Browse the repository at this point in the history
* refactor(kubernetes): Rename InvalidKindReason and avoid nulls

Instead of having an enum InvalidKindReason, and returning null
when a kind is valid, jsut call it KindStatus and use an enum
value VALID to represent a valid kind. This avoids the need to
use null as a sentinel value.

* refactor(kubernetes): Change lists to immutable sets

We store a list of kinds and kinds to omit on each account;
these lists are never modified after they are created, and
we only ever do set-like operations on them (contains, size).

In order to make lookups more efficient, and prevent bugs (and
make the following refactor easier), change the lists to immutable
sets.

* perf(kubernetes): Defer checking valid kinds until after startup

Instead of blocking the startup of clouddriver while we check
whether we can read every kind in every account, defer these
checks until first use.

Encapsulate this logic in an inner class called KindValidator;
this class handles keeping track of what kinds are valid for an
account as well as whether metrics are readable for the account.
It will only ever attempt to read a kind once (the first time it
is asked), and short-circuits just like the earlier logic based on
the contents of kinds and omitKinds.

* refactor(kubernetes): Rename isMetricsComputed

The fact that whether metrics is enabled or not is computed by
reading metrics is an implementation details; make it easier for
consumers by renaming this function isMetricsEnabled.

Also, remove the getter for metrics, which should never be read
by consumers of the class, which should always use isMetricsEnabled.

* perf(kubernetes): Don't check valid kinds in agent scheduling

We currently check whether kinds are valid while scheduling caching
agents which blocks agents starting. Caching agents themselves should
(and currently do) check whether kinds are valid before actually
querying them; remove the check that occurs before scheduling agents.

This means that at worst we'll schedule a no-op agent that immediately
returns for an invalid kind (though given that kinds are generally
grouped in agents that handle many kinds, this likely won't happen
much in practice).

While the earlier commit in this PR deferred checking readable kinds,
the fact that we're checking all readable kinds during agent scheduling
means we're still effectively blocking startup on checking readable
kinds. Removing this allow startup to proceed before we've checked all
readable kinds.

* test(kubernetes): Fix function call in tests

I had this change locally, but forgot to commit it

* style(kubernetes): Address PR review comments

* refactor(kubernetes): Move non-permission check logic from inner class

Consolidate the inner class PermissionValidator so that it only
handles permission checks; pull the logic about handling kinds and
omitKinds back up to the outer class.
  • Loading branch information
ezimanyi committed Sep 6, 2019
1 parent b6bd26b commit c0cf70b
Show file tree
Hide file tree
Showing 6 changed files with 196 additions and 161 deletions.
Expand Up @@ -17,8 +17,6 @@

package com.netflix.spinnaker.clouddriver.kubernetes.v2.caching.agent;

import static com.netflix.spinnaker.clouddriver.kubernetes.v2.description.manifest.KubernetesKind.NONE;

import com.fasterxml.jackson.databind.ObjectMapper;
import com.netflix.spectator.api.Registry;
import com.netflix.spinnaker.clouddriver.kubernetes.caching.KubernetesCachingAgent;
Expand Down Expand Up @@ -70,7 +68,6 @@ public Collection<KubernetesCachingAgent> buildAllCachingAgents(
propertyRegistry.values().stream()
.map(KubernetesResourceProperties::getHandler)
.filter(Objects::nonNull)
.filter(h -> v2Credentials.isValidKind(h.kind()) || h.kind().equals(NONE))
.map(
h ->
h.buildCachingAgent(
Expand All @@ -83,7 +80,7 @@ public Collection<KubernetesCachingAgent> buildAllCachingAgents(
.filter(Objects::nonNull)
.forEach(c -> result.add((KubernetesCachingAgent) c)));

if (v2Credentials.isMetricsComputed()) {
if (v2Credentials.isMetricsEnabled()) {
IntStream.range(0, credentials.getCacheThreads())
.boxed()
.forEach(
Expand Down
Expand Up @@ -83,7 +83,7 @@ public KubernetesV2Manifest getManifest(
: Collections.emptyList();

List<KubernetesPodMetric.ContainerMetric> metrics = Collections.emptyList();
if (kind.equals(KubernetesKind.POD) && credentials.isMetricsComputed()) {
if (kind.equals(KubernetesKind.POD) && credentials.isMetricsEnabled()) {
metrics =
credentials.topPod(namespace, parsedName.getRight()).stream()
.map(KubernetesPodMetric::getContainerMetrics)
Expand Down
Expand Up @@ -17,11 +17,14 @@

package com.netflix.spinnaker.clouddriver.kubernetes.v2.security;

import static com.google.common.collect.ImmutableSet.toImmutableSet;
import static lombok.EqualsAndHashCode.Include;

import com.fasterxml.jackson.annotation.JsonIgnore;
import com.github.benmanes.caffeine.cache.Caffeine;
import com.github.benmanes.caffeine.cache.LoadingCache;
import com.google.common.base.Suppliers;
import com.google.common.collect.ImmutableSet;
import com.netflix.spectator.api.Clock;
import com.netflix.spectator.api.Registry;
import com.netflix.spinnaker.clouddriver.kubernetes.config.CustomKubernetesResource;
Expand All @@ -47,8 +50,8 @@
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeUnit;
import java.util.function.Function;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import javax.annotation.Nonnull;
Expand Down Expand Up @@ -76,11 +79,9 @@ public class KubernetesV2Credentials implements KubernetesCredentials {

@Include @Getter private final List<String> omitNamespaces;

@Include private final List<KubernetesKind> kinds;
@Include private final ImmutableSet<KubernetesKind> kinds;

@Include List<String> omitKinds;

private final Map<KubernetesKind, InvalidKindReason> omitKindsComputed;
@Include private final ImmutableSet<KubernetesKind> omitKinds;

@Include @Getter private final List<CustomKubernetesResource> customResources;

Expand Down Expand Up @@ -108,9 +109,7 @@ public class KubernetesV2Credentials implements KubernetesCredentials {

@Include @JsonIgnore @Getter private final List<String> oAuthScopes;

@Include @Getter private boolean metrics;

@Getter private boolean metricsComputed;
@Include private boolean metrics;

@Include @Getter private final boolean debug;

Expand All @@ -119,6 +118,7 @@ public class KubernetesV2Credentials implements KubernetesCredentials {
private final Supplier<List<KubernetesKind>> liveCrdSupplier;
@Getter private final ResourcePropertyRegistry resourcePropertyRegistry;
@Getter private final KubernetesKindRegistry kindRegistry;
private final PermissionValidator permissionValidator;

public KubernetesV2Credentials(
Registry registry,
Expand Down Expand Up @@ -149,17 +149,14 @@ public KubernetesV2Credentials(
return new KubernetesKindProperties(k, true, false, true);
}))
.map(KubernetesKindProperties::getKubernetesKind)
.collect(Collectors.toList());
.collect(toImmutableSet());
// omitKinds is a simple placeholder that we can use to compare one instance to another
// when refreshing credentials.
this.omitKinds = managedAccount.getOmitKinds();

this.omitKindsComputed =
this.omitKinds =
managedAccount.getOmitKinds().stream()
.map(KubernetesKind::fromString)
.collect(
Collectors.toMap(
k -> k, k -> InvalidKindReason.EXPLICITLY_OMITTED_BY_CONFIGURATION));
.collect(toImmutableSet());
this.permissionValidator = new PermissionValidator();

this.customResources = managedAccount.getCustomResources();

Expand All @@ -179,7 +176,6 @@ public KubernetesV2Credentials(
this.oAuthScopes = managedAccount.getOAuthScopes();

this.metrics = managedAccount.isMetrics();
this.metricsComputed = managedAccount.isMetrics();

this.debug = managedAccount.isDebug();

Expand Down Expand Up @@ -272,7 +268,8 @@ public static <U> Memoizer<U> memoizeWithExpiration(
}
}

public enum InvalidKindReason {
public enum KubernetesKindStatus {
VALID("Kind [%s] is a valid kind"),
KIND_NONE("Kind [%s] is invalid"),
EXPLICITLY_OMITTED_BY_CONFIGURATION(
"Kind [%s] included in 'omitKinds' of kubernetes account configuration"),
Expand All @@ -282,7 +279,7 @@ public enum InvalidKindReason {

private final String errorMessage;

InvalidKindReason(String errorMessage) {
KubernetesKindStatus(String errorMessage) {
this.errorMessage = errorMessage;
}

Expand All @@ -292,17 +289,33 @@ public String getErrorMessage(KubernetesKind kind) {
}

public boolean isValidKind(@Nonnull KubernetesKind kind) {
return getInvalidKindReason(kind) == null;
return getKindStatus(kind) == KubernetesKindStatus.VALID;
}

public InvalidKindReason getInvalidKindReason(@Nonnull KubernetesKind kind) {
/**
* Returns the status of a given kubernetes kind with respect to the current account. Checks of
* whether a kind is readable are cached for the lifetime of the process (and are only performed
* when a kind is otherwise considered valid for the account).
*/
@Nonnull
public KubernetesKindStatus getKindStatus(@Nonnull KubernetesKind kind) {
if (kind.equals(KubernetesKind.NONE)) {
return InvalidKindReason.KIND_NONE;
} else if (!this.kinds.isEmpty()) {
return !kinds.contains(kind) ? InvalidKindReason.MISSING_FROM_ALLOWED_KINDS : null;
} else {
return this.omitKindsComputed.getOrDefault(kind, null);
return KubernetesKindStatus.KIND_NONE;
}

if (!kinds.isEmpty()) {
return kinds.contains(kind)
? KubernetesKindStatus.VALID
: KubernetesKindStatus.MISSING_FROM_ALLOWED_KINDS;
}

if (omitKinds.contains(kind)) {
return KubernetesKindStatus.EXPLICITLY_OMITTED_BY_CONFIGURATION;
}

return permissionValidator.isKindReadable(kind)
? KubernetesKindStatus.VALID
: KubernetesKindStatus.READ_ERROR;
}

public String getDefaultNamespace() {
Expand Down Expand Up @@ -352,10 +365,6 @@ public void initialize() {
// ensure this is called at least once before the credentials object is created to ensure all
// crds are registered
this.liveCrdSupplier.get();

if (checkPermissionsOnStartup) {
determineOmitKinds();
}
}

public List<KubernetesKind> getCrds() {
Expand All @@ -379,92 +388,8 @@ public List<String> getDeclaredNamespaces() {
return result;
}

private void determineOmitKinds() {
List<String> namespaces = getDeclaredNamespaces();

if (namespaces.isEmpty()) {
log.warn(
"There are no namespaces configured (or loadable) -- please check that the list of 'omitNamespaces' for account '"
+ accountName
+ "' doesn't prevent access from all namespaces in this cluster, or that the cluster is reachable.");
return;
}

// we are making the assumption that the roles granted to spinnaker for this account in all
// namespaces are identical.
// otherwise, checking all namespaces for all kinds is too expensive in large clusters (imagine
// a cluster with 100s of namespaces).
String checkNamespace = namespaces.get(0);
List<KubernetesKind> allKinds =
kindRegistry.getRegisteredKinds().stream()
.map(KubernetesKindProperties::getKubernetesKind)
.collect(Collectors.toList());

log.info(
"Checking permissions on configured kinds for account {}... {}", accountName, allKinds);
long startTime = System.nanoTime();

// compute list of kinds we explicitly know the server doesn't support
try {
Set<KubernetesKind> availableResources = jobExecutor.apiResources(this);
Map<KubernetesKind, InvalidKindReason> unavailableKinds =
allKinds.stream()
.filter(Objects::nonNull)
.filter(k -> !k.equals(KubernetesKind.NONE))
.filter(k -> !availableResources.contains(k))
.collect(Collectors.toConcurrentMap(k -> k, k -> InvalidKindReason.READ_ERROR));

omitKindsComputed.putAll(unavailableKinds);
} catch (Exception e) {
log.warn("Failed to evaluate kinds available on server. {}.", e.getMessage());
}

Map<KubernetesKind, InvalidKindReason> unreadableKinds =
allKinds
.parallelStream()
.filter(Objects::nonNull)
.filter(k -> !k.equals(KubernetesKind.NONE))
.filter(k -> !omitKindsComputed.containsKey(k))
.filter(k -> !canReadKind(k, checkNamespace))
.collect(
Collectors.toConcurrentMap(Function.identity(), k -> InvalidKindReason.READ_ERROR));
long endTime = System.nanoTime();
long duration = (endTime - startTime) / 1000000;
log.info("determineOmitKinds for account {} took {} ms", accountName, duration);
omitKindsComputed.putAll(unreadableKinds);

if (metricsComputed) {
try {
log.info("Checking if pod metrics are readable for account {}...", accountName);
topPod(checkNamespace, null);
} catch (Exception e) {
log.warn(
"Could not read pod metrics in account '{}' for reason: {}",
accountName,
e.getMessage());
log.debug("Reading logs for account '{}' failed with exception: ", accountName, e);
metricsComputed = false;
}
}
}

private boolean canReadKind(KubernetesKind kind, String checkNamespace) {
log.info("Checking if {} is readable in account '{}'...", kind, accountName);
boolean allowed;
if (kindRegistry.getRegisteredKind(kind).isNamespaced()) {
allowed = jobExecutor.authCanINamespaced(this, checkNamespace, kind.getName(), "list");
} else {
allowed = jobExecutor.authCanI(this, kind.getName(), "list");
}

if (!allowed) {
log.info(
"Kind {} will not be cached in account '{}' because it cannot be listed.",
kind,
accountName);
}

return allowed;
public boolean isMetricsEnabled() {
return metrics && permissionValidator.isMetricsReadable();
}

public KubernetesManifest get(KubernetesKind kind, String namespace, String name) {
Expand Down Expand Up @@ -670,4 +595,109 @@ private <T> T runAndRecordMetrics(
}
}
}

/**
* Handles validating which kubernetes kinds the current account has permission to read, as well
* as whether the current account has permission to read pod metrics.
*/
private class PermissionValidator {
private final Supplier<String> checkNamespace = Suppliers.memoize(this::computeCheckNamespace);
private final Map<KubernetesKind, Boolean> readableKinds = new ConcurrentHashMap<>();
private final Supplier<Boolean> metricsReadable = Suppliers.memoize(this::checkMetricsReadable);

private String getCheckNamespace() {
return checkNamespace.get();
}

private String computeCheckNamespace() {
List<String> namespaces = getDeclaredNamespaces();

if (namespaces.isEmpty()) {
log.warn(
"There are no namespaces configured (or loadable) -- please check that the list of"
+ " 'omitNamespaces' for account '{}' doesn't prevent access from all namespaces"
+ " in this cluster, or that the cluster is reachable.",
accountName);
return null;
}

// we are making the assumption that the roles granted to spinnaker for this account in all
// namespaces are identical.
// otherwise, checking all namespaces for all kinds is too expensive in large clusters
// (imagine a cluster with 100s of namespaces).
return namespaces.get(0);
}

private boolean skipPermissionChecks() {
// checkPermissionsOnStartup exists from when permission checks were done at startup (and took
// a long time); this flag was added to skip the checks and assume all kinds were readable.
// Now that permissions are checked on-the-fly, this flag is probably not necessary, but for
// now we'll continue to support the prior behavior, which is to short-circuit and assume all
// kinds are readable before checking.
// Before removing this flag, we'll need to check that nobody is depending on Spinnaker
// skipping permission checks for reasons other than performance. (For example, users may
// be relying on the skipped permission checks because of differences in permissions between
// namespaces.)
return !checkPermissionsOnStartup;
}

private boolean canReadKind(KubernetesKind kind) {
if (skipPermissionChecks()) {
return true;
}
log.info("Checking if {} is readable in account '{}'...", kind, accountName);
boolean allowed;
if (kindRegistry.getRegisteredKind(kind).isNamespaced()) {
allowed =
jobExecutor.authCanINamespaced(
KubernetesV2Credentials.this, getCheckNamespace(), kind.getName(), "list");
} else {
allowed = jobExecutor.authCanI(KubernetesV2Credentials.this, kind.getName(), "list");
}

if (!allowed) {
log.info(
"Kind {} will not be cached in account '{}' because it cannot be listed.",
kind,
accountName);
}

return allowed;
}

private boolean checkMetricsReadable() {
if (skipPermissionChecks()) {
return true;
}
try {
log.info("Checking if pod metrics are readable for account {}...", accountName);
topPod(getCheckNamespace(), null);
return true;
} catch (Exception e) {
log.warn(
"Could not read pod metrics in account '{}' for reason: {}",
accountName,
e.getMessage());
log.debug("Reading logs for account '{}' failed with exception: ", accountName, e);
return false;
}
}

/**
* Returns whether the given kind is readable for the current kubernetes account. This check is
* cached for each kind for the lifetime of the process, and subsequent calls return the cached
* value.
*/
boolean isKindReadable(@Nonnull KubernetesKind kind) {
return readableKinds.computeIfAbsent(kind, this::canReadKind);
}

/**
* Returns whether metrics are readable for the current kubernetes account. This check is cached
* for the lifetime of the process, and subsequent calls return the cached value.
*/
boolean isMetricsReadable() {
return metricsReadable.get();
}
}
}

0 comments on commit c0cf70b

Please sign in to comment.