Skip to content

Commit

Permalink
fix(kubernetes): Move generation checks to be object-specific (#4313)
Browse files Browse the repository at this point in the history
* fix(kubernetes): Replace null status with false

When the observed generation in the status does not match the object's
generation, we set the stable and failed conditions to null. This should
not be treated any differently than if the status generation did match
but the object had not fully rolled out---it just means we need to wait
a bit longer to see the new status.

The recent changes to orca have explicitly made it so that from orca's
perspective there is no different beteween stable/failed being null and
stable/failed being false. Make things more clear (and null-safe) by
removing the ability to null these conditions.

* fix(kubernetes): Move generation checks to be object-specific

We have a generic function in KubernetesManifest to determine
whether the status generation is correct. It has a lot of type-safety
and null-safety issues as it's operating on an untyped map.

As each status handler already converts the manifest to a strongly-typed
representation of its handled object, let's move the status checks to
happen on that object.

This in particular fixes some null-pointer issues that were documented
in the tests.

* test(kubernetes): Fix observed generation in stateful set test

One of the stateful set tests incorrectly had the observed generation
newer than the metadata one; fix this.

* fix(kubernetes): Fix pod phase status reporting

This was the last remaining TODO I had noted when adding tests to
all of the handlers.

* refactor(kubernetes): Pull unknown status to factory method

We frequently create an unknown status with the same text;
pull it up to a factory method so we don't keep repeating the
same text.

Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
  • Loading branch information
ezimanyi and mergify[bot] committed Feb 10, 2020
1 parent 2c7c9cd commit fe6cb54
Show file tree
Hide file tree
Showing 15 changed files with 148 additions and 86 deletions.
Expand Up @@ -287,6 +287,9 @@ public Object getStatus() {
return get("status");
}

// Consumers should convert to a strongly-typed object and implement type-specific logic instead
// of calling this function.
@Deprecated
@JsonIgnore
public int getObservedGeneration() {
Object statusObj = getStatus();
Expand All @@ -307,6 +310,9 @@ public int getObservedGeneration() {
return ((Number) observedGenObj).intValue();
}

// Consumers should convert to a strongly-typed object and implement type-specific logic instead
// of calling this function.
@Deprecated
@JsonIgnore
public int getGeneration() {
Object generationObj = getMetadata().get("generation");
Expand All @@ -327,6 +333,9 @@ public static String getFullResourceName(KubernetesKind kind, String name) {
return String.join(" ", kind.toString(), name);
}

// Consumers should convert to a strongly-typed object and implement type-specific logic instead
// of calling this function.
@Deprecated
@JsonIgnore
public boolean isNewerThanObservedGeneration() {
int generation = getGeneration();
Expand Down
Expand Up @@ -46,19 +46,19 @@ public interface Manifest {
@NonnullByDefault
@ToString
class Status {
private @Nullable Condition stable = Condition.withState(true);
private Condition stable = Condition.withState(true);
private Condition paused = Condition.withState(false);
private Condition available = Condition.withState(true);
private @Nullable Condition failed = Condition.withState(false);
private Condition failed = Condition.withState(false);

public static Status defaultStatus() {
return new Status();
}

public Status unknown() {
stable = null;
failed = null;
return this;
public static Status noneReported() {
return defaultStatus()
.unstable("No status reported yet")
.unavailable("No availability reported");
}

public Status failed(@Nullable String message) {
Expand Down
Expand Up @@ -87,9 +87,7 @@ protected KubernetesV2CachingAgentFactory cachingAgentFactory() {
private Status status(V2alpha1CronJob job) {
V2alpha1CronJobStatus status = job.getStatus();
if (status == null) {
return Status.defaultStatus()
.unstable("No status reported yet")
.unavailable("No availability reported");
return Status.noneReported();
}

return Status.defaultStatus();
Expand Down
Expand Up @@ -29,9 +29,11 @@
import com.netflix.spinnaker.clouddriver.kubernetes.v2.description.manifest.KubernetesKind;
import com.netflix.spinnaker.clouddriver.kubernetes.v2.description.manifest.KubernetesManifest;
import com.netflix.spinnaker.clouddriver.kubernetes.v2.model.Manifest.Status;
import io.kubernetes.client.openapi.models.V1ObjectMeta;
import io.kubernetes.client.openapi.models.V1beta2DaemonSet;
import io.kubernetes.client.openapi.models.V1beta2DaemonSetStatus;
import java.util.Map;
import java.util.Optional;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import org.springframework.stereotype.Component;
Expand Down Expand Up @@ -87,9 +89,6 @@ protected KubernetesV2CachingAgentFactory cachingAgentFactory() {

@Override
public Status status(KubernetesManifest manifest) {
if (manifest.isNewerThanObservedGeneration()) {
return Status.defaultStatus().unknown();
}
V1beta2DaemonSet v1beta2DaemonSet =
KubernetesCacheDataConverter.getResource(manifest, V1beta2DaemonSet.class);
return status(v1beta2DaemonSet);
Expand All @@ -106,9 +105,11 @@ public Map<String, Object> hydrateSearchResult(InfrastructureCacheKey key) {
private Status status(V1beta2DaemonSet daemonSet) {
V1beta2DaemonSetStatus status = daemonSet.getStatus();
if (status == null) {
return Status.defaultStatus()
.unstable("No status reported yet")
.unavailable("No availability reported");
return Status.noneReported();
}

if (!generationMatches(daemonSet, status)) {
return Status.defaultStatus().unstable(UnstableReason.OLD_GENERATION.getMessage());
}

if (!daemonSet.getSpec().getUpdateStrategy().getType().equalsIgnoreCase("rollingupdate")) {
Expand Down Expand Up @@ -145,6 +146,14 @@ private Status status(V1beta2DaemonSet daemonSet) {
return Status.defaultStatus();
}

private boolean generationMatches(V1beta2DaemonSet daemonSet, V1beta2DaemonSetStatus status) {
Optional<Long> metadataGeneration =
Optional.ofNullable(daemonSet.getMetadata()).map(V1ObjectMeta::getGeneration);
Optional<Long> statusGeneration = Optional.ofNullable(status.getObservedGeneration());

return statusGeneration.isPresent() && statusGeneration.equals(metadataGeneration);
}

// Unboxes an Integer, returning 0 if the input is null
private int defaultToZero(@Nullable Integer input) {
return input == null ? 0 : input;
Expand Down
Expand Up @@ -38,6 +38,7 @@
import io.kubernetes.client.openapi.models.V1Deployment;
import io.kubernetes.client.openapi.models.V1DeploymentCondition;
import io.kubernetes.client.openapi.models.V1DeploymentStatus;
import io.kubernetes.client.openapi.models.V1ObjectMeta;
import java.util.Collection;
import java.util.List;
import java.util.Optional;
Expand Down Expand Up @@ -95,9 +96,6 @@ public Status status(KubernetesManifest manifest) {
if (!SUPPORTED_API_VERSIONS.contains(manifest.getApiVersion())) {
throw new UnsupportedVersionException(manifest);
}
if (manifest.isNewerThanObservedGeneration()) {
return Status.defaultStatus().unknown();
}
V1Deployment appsV1Deployment =
KubernetesCacheDataConverter.getResource(manifest, V1Deployment.class);
return status(appsV1Deployment);
Expand All @@ -111,9 +109,11 @@ protected KubernetesV2CachingAgentFactory cachingAgentFactory() {
private Status status(V1Deployment deployment) {
V1DeploymentStatus status = deployment.getStatus();
if (status == null) {
return Status.defaultStatus()
.unstable("No status reported yet")
.unavailable("No availability reported");
return Status.noneReported();
}

if (!generationMatches(deployment, status)) {
return Status.defaultStatus().unstable(UnstableReason.OLD_GENERATION.getMessage());
}

List<V1DeploymentCondition> conditions =
Expand Down Expand Up @@ -155,6 +155,14 @@ private static Optional<String> getFailedReason(Collection<V1DeploymentCondition
.findAny();
}

private boolean generationMatches(V1Deployment deployment, V1DeploymentStatus status) {
Optional<Long> metadataGeneration =
Optional.ofNullable(deployment.getMetadata()).map(V1ObjectMeta::getGeneration);
Optional<Long> statusGeneration = Optional.ofNullable(status.getObservedGeneration());

return statusGeneration.isPresent() && statusGeneration.equals(metadataGeneration);
}

// Unboxes an Integer, returning 0 if the input is null
private static int defaultToZero(@Nullable Integer input) {
return input == null ? 0 : input;
Expand Down
Expand Up @@ -89,9 +89,7 @@ protected KubernetesV2CachingAgentFactory cachingAgentFactory() {
private Status status(V1Job job) {
V1JobStatus status = job.getStatus();
if (status == null) {
return Status.defaultStatus()
.unstable("No status reported yet")
.unavailable("No availability reported");
return Status.noneReported();
}

int completions = 1;
Expand Down
Expand Up @@ -33,6 +33,8 @@
import io.kubernetes.client.openapi.models.V1PodStatus;
import java.util.Map;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import lombok.Getter;
import org.springframework.stereotype.Component;

@Component
Expand Down Expand Up @@ -71,36 +73,45 @@ public Status status(KubernetesManifest manifest) {
V1PodStatus status = pod.getStatus();

if (status == null) {
return Status.defaultStatus()
.unstable("No status reported yet")
.unavailable("No availability reported");
return Status.noneReported();
}

// https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/
String phase = status.getPhase();

// This code appears to be broken, as Kubernetes always reports pod status in initial caps
// and we're comparing to lower-case strings. We'll thus never enter any of the else-if cases.
// TODO(ezimanyi): Fix this when cleaning up status reporting code
if (phase == null) {
return Status.defaultStatus()
.unstable("No phase reported yet")
.unavailable("No availability reported");
} else if (phase.equals("pending")) {
return Status.defaultStatus()
.unstable("Pod is 'pending'")
.unavailable("Pod has not been scheduled yet");
} else if (phase.equals("unknown")) {
return Status.defaultStatus()
.unstable("Pod has 'unknown' phase")
.unavailable("No availability reported");
} else if (phase.equals("failed")) {
return Status.defaultStatus().failed("Pod has 'failed'").unavailable("Pod is not running");
PodPhase phase = PodPhase.fromString(status.getPhase());
if (phase.isUnstable()) {
return Status.defaultStatus().unstable(phase.getMessage()).unavailable(phase.getMessage());
}

return Status.defaultStatus();
}

private enum PodPhase {
// https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/
Pending(true, "Pod is pending"),
Running(false, ""),
Succeeded(false, ""),
Failed(true, "Pod has failed"),
Unknown(true, "Pod phase is unknown");

@Getter private String message;
@Getter private boolean unstable;

PodPhase(boolean unstable, String message) {
this.message = message;
this.unstable = unstable;
}

static PodPhase fromString(@Nullable String phase) {
if (phase == null) {
return Unknown;
}
try {
return valueOf(phase);
} catch (IllegalArgumentException e) {
return Unknown;
}
}
}

@Override
public Map<String, Object> hydrateSearchResult(InfrastructureCacheKey key) {
Map<String, Object> result = super.hydrateSearchResult(key);
Expand Down
Expand Up @@ -107,9 +107,7 @@ public Status status(KubernetesManifest manifest) {
private Status status(V1ReplicaSet replicaSet) {
V1ReplicaSetStatus status = replicaSet.getStatus();
if (status == null) {
return Status.defaultStatus()
.unstable("No status reported yet")
.unavailable("No availability reported");
return Status.noneReported();
}

Optional<UnstableReason> unstableReason = checkReplicaCounts(replicaSet, status);
Expand Down
Expand Up @@ -31,13 +31,15 @@
import com.netflix.spinnaker.clouddriver.kubernetes.v2.description.manifest.KubernetesKind;
import com.netflix.spinnaker.clouddriver.kubernetes.v2.description.manifest.KubernetesManifest;
import com.netflix.spinnaker.clouddriver.kubernetes.v2.model.Manifest.Status;
import io.kubernetes.client.openapi.models.V1ObjectMeta;
import io.kubernetes.client.openapi.models.V1beta2RollingUpdateStatefulSetStrategy;
import io.kubernetes.client.openapi.models.V1beta2StatefulSet;
import io.kubernetes.client.openapi.models.V1beta2StatefulSetStatus;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.function.BiFunction;
import java.util.stream.Collectors;
import javax.annotation.Nonnull;
Expand Down Expand Up @@ -96,9 +98,6 @@ protected KubernetesV2CachingAgentFactory cachingAgentFactory() {

@Override
public Status status(KubernetesManifest manifest) {
if (manifest.isNewerThanObservedGeneration()) {
return Status.defaultStatus().unknown();
}
V1beta2StatefulSet v1beta2StatefulSet =
KubernetesCacheDataConverter.getResource(manifest, V1beta2StatefulSet.class);
return status(v1beta2StatefulSet);
Expand All @@ -125,9 +124,11 @@ private Status status(V1beta2StatefulSet statefulSet) {

V1beta2StatefulSetStatus status = statefulSet.getStatus();
if (status == null) {
return Status.defaultStatus()
.unstable("No status reported yet")
.unavailable("No availability reported");
return Status.noneReported();
}

if (!generationMatches(statefulSet, status)) {
return Status.defaultStatus().unstable(UnstableReason.OLD_GENERATION.getMessage());
}

int desiredReplicas = defaultToZero(statefulSet.getSpec().getReplicas());
Expand Down Expand Up @@ -170,6 +171,15 @@ private Status status(V1beta2StatefulSet statefulSet) {
return Status.defaultStatus();
}

private boolean generationMatches(
V1beta2StatefulSet statefulSet, V1beta2StatefulSetStatus status) {
Optional<Long> metadataGeneration =
Optional.ofNullable(statefulSet.getMetadata()).map(V1ObjectMeta::getGeneration);
Optional<Long> statusGeneration = Optional.ofNullable(status.getObservedGeneration());

return statusGeneration.isPresent() && statusGeneration.equals(metadataGeneration);
}

// Unboxes an Integer, returning 0 if the input is null
private static int defaultToZero(@Nullable Integer input) {
return input == null ? 0 : input;
Expand Down
Expand Up @@ -21,6 +21,7 @@
enum UnstableReason {
AVAILABLE_REPLICAS("Waiting for all replicas to be available"),
FULLY_LABELED_REPLICAS("Waiting for all replicas to be fully-labeled"),
OLD_GENERATION("Waiting for status generation to match updated object generation"),
OLD_REPLICAS("Waiting for old replicas to finish termination"),
READY_REPLICAS("Waiting for all replicas to be ready"),
UPDATED_REPLICAS("Waiting for all replicas to be updated");
Expand Down
Expand Up @@ -17,7 +17,6 @@
package com.netflix.spinnaker.clouddriver.kubernetes.v2.op.handler;

import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatExceptionOfType;

import com.netflix.spinnaker.clouddriver.kubernetes.v2.description.manifest.KubernetesManifest;
import com.netflix.spinnaker.clouddriver.kubernetes.v2.model.Manifest.Status;
Expand All @@ -32,9 +31,14 @@ final class KubernetesDaemonSetHandlerTest {
@Test
void noStatus() {
KubernetesManifest daemonSet = ManifestFetcher.getManifest("daemonset/base.yml");
// Documenting the existing behavior before refactoring
assertThatExceptionOfType(NullPointerException.class)
.isThrownBy(() -> handler.status(daemonSet));
Status status = handler.status(daemonSet);

assertThat(status.getStable().isState()).isFalse();
assertThat(status.getStable().getMessage()).isEqualTo("No status reported yet");
assertThat(status.getAvailable().isState()).isFalse();
assertThat(status.getAvailable().getMessage()).isEqualTo("No availability reported");
assertThat(status.getPaused().isState()).isFalse();
assertThat(status.getFailed().isState()).isFalse();
}

@Test
Expand Down Expand Up @@ -99,10 +103,12 @@ void oldGeneration() {
ManifestFetcher.getManifest("daemonset/base.yml", "daemonset/old-generation.yml");
Status status = handler.status(daemonSet);

assertThat(status.getStable()).isNull();
assertThat(status.getStable().isState()).isFalse();
assertThat(status.getStable().getMessage())
.isEqualTo("Waiting for status generation to match updated object generation");
assertThat(status.getAvailable().isState()).isTrue();
assertThat(status.getPaused().isState()).isFalse();
assertThat(status.getFailed()).isNull();
assertThat(status.getFailed().isState()).isFalse();
}

@Test
Expand Down

0 comments on commit fe6cb54

Please sign in to comment.