diff --git a/orca-clouddriver/src/main/groovy/com/netflix/spinnaker/orca/clouddriver/pipeline/servergroup/strategies/MonitoredDeployStrategy.groovy b/orca-clouddriver/src/main/groovy/com/netflix/spinnaker/orca/clouddriver/pipeline/servergroup/strategies/MonitoredDeployStrategy.groovy index 2c0999ed6f..59840081ca 100644 --- a/orca-clouddriver/src/main/groovy/com/netflix/spinnaker/orca/clouddriver/pipeline/servergroup/strategies/MonitoredDeployStrategy.groovy +++ b/orca-clouddriver/src/main/groovy/com/netflix/spinnaker/orca/clouddriver/pipeline/servergroup/strategies/MonitoredDeployStrategy.groovy @@ -15,7 +15,6 @@ */ package com.netflix.spinnaker.orca.clouddriver.pipeline.servergroup.strategies -import com.fasterxml.jackson.annotation.JsonProperty import com.netflix.spinnaker.config.DeploymentMonitorServiceProvider import com.netflix.spinnaker.orca.clouddriver.pipeline.cluster.RollbackClusterStage import com.netflix.spinnaker.orca.clouddriver.pipeline.cluster.ScaleDownClusterStage @@ -181,6 +180,7 @@ class MonitoredDeployStrategy implements Strategy { stage, SyntheticStageOwner.STAGE_AFTER ) + notifyDeployStartingStage.setAllowSiblingStagesToContinueOnFailure(true) stages << notifyDeployStartingStage } else { log.warn("No deployment monitor specified, all monitoring will be skipped") @@ -246,7 +246,7 @@ class MonitoredDeployStrategy implements Strategy { if (stageData.deploymentMonitor?.id) { evalContext.currentProgress = p - stages << newStage( + Stage evaluateHealthStage = newStage( stage.execution, EvaluateDeploymentHealthStage.PIPELINE_CONFIG_TYPE, "Evaluate health of deployed instances", @@ -254,6 +254,9 @@ class MonitoredDeployStrategy implements Strategy { stage, SyntheticStageOwner.STAGE_AFTER ) + evaluateHealthStage.setAllowSiblingStagesToContinueOnFailure(true) + + stages << evaluateHealthStage } }) diff --git a/orca-core/src/main/java/com/netflix/spinnaker/orca/pipeline/model/Stage.java b/orca-core/src/main/java/com/netflix/spinnaker/orca/pipeline/model/Stage.java index 54b614dbaa..6fdc731e1a 100644 --- a/orca-core/src/main/java/com/netflix/spinnaker/orca/pipeline/model/Stage.java +++ b/orca-core/src/main/java/com/netflix/spinnaker/orca/pipeline/model/Stage.java @@ -39,6 +39,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Sets; +import com.netflix.spinnaker.kork.exceptions.SpinnakerException; import com.netflix.spinnaker.orca.ExecutionStatus; import com.netflix.spinnaker.orca.jackson.OrcaObjectMapper; import com.netflix.spinnaker.orca.pipeline.model.support.RequisiteStageRefIdDeserializer; @@ -755,6 +756,38 @@ public Optional getTimeout() { return Optional.empty(); } + /** + * Check if this stage should propagate FAILED_CONTINUE to parent stage. Normally, if a synthetic + * child fails with FAILED_CONTINUE {@link + * com.netflix.spinnaker.orca.q.handler.CompleteStageHandler} will propagate the FAILED_CONTINUE + * status to the parent, preventing all subsequent sibling stages from executing. This allows for + * an option (similar to Tasks) to continue execution if a child stage returns FAILED_CONTINUE + * + * @return true if we want to allow subsequent siblings to continue even if this stage returns + * FAILED_CONTINUE + */ + @JsonIgnore + public boolean getAllowSiblingStagesToContinueOnFailure() { + if (parentStageId == null) { + return false; + } + + StageContext context = (StageContext) getContext(); + return (boolean) context.getCurrentOnly("allowSiblingStagesToContinueOnFailure", false); + } + + @JsonIgnore + public void setAllowSiblingStagesToContinueOnFailure(boolean propagateFailuresToParent) { + if (parentStageId == null) { + throw new SpinnakerException( + String.format( + "Not allowed to set propagateFailuresToParent on a non-child stage: %s with id %s", + getType(), getId())); + } + + context.put("allowSiblingStagesToContinueOnFailure", propagateFailuresToParent); + } + public static class LastModifiedDetails implements Serializable { private String user; diff --git a/orca-core/src/main/java/com/netflix/spinnaker/orca/pipeline/model/StageContext.java b/orca-core/src/main/java/com/netflix/spinnaker/orca/pipeline/model/StageContext.java index 98f4166558..ac206e9806 100644 --- a/orca-core/src/main/java/com/netflix/spinnaker/orca/pipeline/model/StageContext.java +++ b/orca-core/src/main/java/com/netflix/spinnaker/orca/pipeline/model/StageContext.java @@ -64,6 +64,16 @@ public Object get(@Nullable Object key) { } } + /** + * Get a value from the current context ONLY - never looking at the ancestors' outputs + * + * @param key The key to look + * @param defaultValue default value to return if key is not present + * @return value or null if not present + */ + Object getCurrentOnly(@Nullable Object key, Object defaultValue) { + return super.getOrDefault(key, defaultValue); + } /* * Gets all objects matching 'key', sorted by proximity to the current stage. * If the key exists in the current context, it will be the first element returned diff --git a/orca-core/src/test/groovy/com/netflix/spinnaker/orca/pipeline/model/StageSpec.groovy b/orca-core/src/test/groovy/com/netflix/spinnaker/orca/pipeline/model/StageSpec.groovy index 0bef738f9b..89d9b5827f 100644 --- a/orca-core/src/test/groovy/com/netflix/spinnaker/orca/pipeline/model/StageSpec.groovy +++ b/orca-core/src/test/groovy/com/netflix/spinnaker/orca/pipeline/model/StageSpec.groovy @@ -16,6 +16,7 @@ package com.netflix.spinnaker.orca.pipeline.model +import com.netflix.spinnaker.kork.exceptions.SpinnakerException import spock.lang.Specification import spock.lang.Unroll @@ -318,4 +319,41 @@ class StageSpec extends Specification { 100 | 200 | null || 200 100 | 200 | 300 || 300 } + + def "should set propagateFailuresToParent correctly"() { + given: + def pipeline = pipeline { + stage { + refId = "parent" + + stage { + refId = "child" + } + } + } + + def parentStage = pipeline.stageByRef("parent") + def childStage = pipeline.stageByRef("child") + + when: 'trying to set PropagateFailuresToParent on parent stage' + parentStage.setAllowSiblingStagesToContinueOnFailure(true) + + then: 'it should fail' + thrown(SpinnakerException) + + when: 'parent stage erroneously has the setting in context' + parentStage = pipeline.stageByRef("parent") + parentStage.context.put("allowSiblingStagesToContinueOnFailure", true) + + then: 'we ignore the value in context' + childStage.getAllowSiblingStagesToContinueOnFailure() == false + + when: 'trying to set PropagateFailuresToParent on a child stage' + childStage.setAllowSiblingStagesToContinueOnFailure(true) + + then: 'it should succeed' + noExceptionThrown() + childStage.context.allowSiblingStagesToContinueOnFailure == true + childStage.getAllowSiblingStagesToContinueOnFailure() == true + } } diff --git a/orca-queue/src/main/kotlin/com/netflix/spinnaker/orca/q/handler/CompleteStageHandler.kt b/orca-queue/src/main/kotlin/com/netflix/spinnaker/orca/q/handler/CompleteStageHandler.kt index b46e5bc8de..6700f7cb48 100644 --- a/orca-queue/src/main/kotlin/com/netflix/spinnaker/orca/q/handler/CompleteStageHandler.kt +++ b/orca-queue/src/main/kotlin/com/netflix/spinnaker/orca/q/handler/CompleteStageHandler.kt @@ -142,7 +142,7 @@ class CompleteStageHandler( // When a synthetic stage ends with FAILED_CONTINUE, propagate that status up to the stage's // parent so that no more of the parent's synthetic children will run. - if (stage.status == FAILED_CONTINUE && stage.syntheticStageOwner != null) { + if (stage.status == FAILED_CONTINUE && stage.syntheticStageOwner != null && !stage.allowSiblingStagesToContinueOnFailure) { queue.push(message.copy(stageId = stage.parentStageId!!)) } else if (stage.status in listOf(SUCCEEDED, FAILED_CONTINUE, SKIPPED)) { stage.startNext() diff --git a/orca-queue/src/test/kotlin/com/netflix/spinnaker/orca/q/handler/CompleteStageHandlerTest.kt b/orca-queue/src/test/kotlin/com/netflix/spinnaker/orca/q/handler/CompleteStageHandlerTest.kt index f8a2a19ad1..10522c245e 100644 --- a/orca-queue/src/test/kotlin/com/netflix/spinnaker/orca/q/handler/CompleteStageHandlerTest.kt +++ b/orca-queue/src/test/kotlin/com/netflix/spinnaker/orca/q/handler/CompleteStageHandlerTest.kt @@ -1078,6 +1078,41 @@ object CompleteStageHandlerTest : SubjectSpek({ verify(queue).push(message.copy(stageId = pipeline.stageByRef("1").id)) } } + + given("a synthetic stage's task ends with $FAILED_CONTINUE status and the synthetic allows siblings to continue") { + val pipeline = pipeline { + stage { + refId = "1" + type = stageWithSyntheticBefore.type + stageWithSyntheticBefore.buildBeforeStages(this) + stageWithSyntheticBefore.plan(this) + } + } + + val syntheticStage = pipeline.stageByRef("1<1") + syntheticStage.allowSiblingStagesToContinueOnFailure = true + val message = CompleteStage(syntheticStage) + + beforeGroup { + pipeline.stageById(message.stageId).apply { + status = RUNNING + singleTaskStage.plan(this) + tasks.first().status = FAILED_CONTINUE + } + + whenever(repository.retrieve(PIPELINE, message.executionId)) doReturn pipeline + } + + on("receiving the message") { + subject.handle(message) + } + + afterGroup(::resetMocks) + + it("starts executing the next sibling") { + verify(queue).push(StartStage(pipeline.stageByRef("1<2"))) + } + } } setOf(TERMINAL, CANCELED).forEach { taskStatus ->