Skip to content

Commit

Permalink
feat(orchestration): Allow sibling stages to continue on FAILED_CONTI…
Browse files Browse the repository at this point in the history
…NUE (#3252)

Normally, if a synthetic child stage fails with `FAILED_CONTINUE` the `CompleteStageHandler`
will propagate the `FAILED_CONTINUE` status to the parent. This will prevent all subsequent
sibling stages from executing.
This change allows for an option (similar to Tasks) to continue execution even when a
child stage has a `status=FAILED_CONTINUE`

This option can only be set on a stage that is a synthetic child.
This is needed for Monitored Deploy where we want to allow the deploy to proceed even if the
monitoring fails
  • Loading branch information
marchello2000 committed Oct 25, 2019
1 parent 7f02cb8 commit ae0e2cf
Show file tree
Hide file tree
Showing 6 changed files with 122 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
*/
package com.netflix.spinnaker.orca.clouddriver.pipeline.servergroup.strategies

import com.fasterxml.jackson.annotation.JsonProperty
import com.netflix.spinnaker.config.DeploymentMonitorServiceProvider
import com.netflix.spinnaker.orca.clouddriver.pipeline.cluster.RollbackClusterStage
import com.netflix.spinnaker.orca.clouddriver.pipeline.cluster.ScaleDownClusterStage
Expand Down Expand Up @@ -181,6 +180,7 @@ class MonitoredDeployStrategy implements Strategy {
stage,
SyntheticStageOwner.STAGE_AFTER
)
notifyDeployStartingStage.setAllowSiblingStagesToContinueOnFailure(true)
stages << notifyDeployStartingStage
} else {
log.warn("No deployment monitor specified, all monitoring will be skipped")
Expand Down Expand Up @@ -246,14 +246,17 @@ class MonitoredDeployStrategy implements Strategy {
if (stageData.deploymentMonitor?.id) {
evalContext.currentProgress = p

stages << newStage(
Stage evaluateHealthStage = newStage(
stage.execution,
EvaluateDeploymentHealthStage.PIPELINE_CONFIG_TYPE,
"Evaluate health of deployed instances",
evalContext,
stage,
SyntheticStageOwner.STAGE_AFTER
)
evaluateHealthStage.setAllowSiblingStagesToContinueOnFailure(true)

stages << evaluateHealthStage
}
})

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Sets;
import com.netflix.spinnaker.kork.exceptions.SpinnakerException;
import com.netflix.spinnaker.orca.ExecutionStatus;
import com.netflix.spinnaker.orca.jackson.OrcaObjectMapper;
import com.netflix.spinnaker.orca.pipeline.model.support.RequisiteStageRefIdDeserializer;
Expand Down Expand Up @@ -755,6 +756,38 @@ public Optional<Long> getTimeout() {
return Optional.empty();
}

/**
* Check if this stage should propagate FAILED_CONTINUE to parent stage. Normally, if a synthetic
* child fails with FAILED_CONTINUE {@link
* com.netflix.spinnaker.orca.q.handler.CompleteStageHandler} will propagate the FAILED_CONTINUE
* status to the parent, preventing all subsequent sibling stages from executing. This allows for
* an option (similar to Tasks) to continue execution if a child stage returns FAILED_CONTINUE
*
* @return true if we want to allow subsequent siblings to continue even if this stage returns
* FAILED_CONTINUE
*/
@JsonIgnore
public boolean getAllowSiblingStagesToContinueOnFailure() {
if (parentStageId == null) {
return false;
}

StageContext context = (StageContext) getContext();
return (boolean) context.getCurrentOnly("allowSiblingStagesToContinueOnFailure", false);
}

@JsonIgnore
public void setAllowSiblingStagesToContinueOnFailure(boolean propagateFailuresToParent) {
if (parentStageId == null) {
throw new SpinnakerException(
String.format(
"Not allowed to set propagateFailuresToParent on a non-child stage: %s with id %s",
getType(), getId()));
}

context.put("allowSiblingStagesToContinueOnFailure", propagateFailuresToParent);
}

public static class LastModifiedDetails implements Serializable {
private String user;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,16 @@ public Object get(@Nullable Object key) {
}
}

/**
* Get a value from the current context ONLY - never looking at the ancestors' outputs
*
* @param key The key to look
* @param defaultValue default value to return if key is not present
* @return value or null if not present
*/
Object getCurrentOnly(@Nullable Object key, Object defaultValue) {
return super.getOrDefault(key, defaultValue);
}
/*
* Gets all objects matching 'key', sorted by proximity to the current stage.
* If the key exists in the current context, it will be the first element returned
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

package com.netflix.spinnaker.orca.pipeline.model

import com.netflix.spinnaker.kork.exceptions.SpinnakerException
import spock.lang.Specification
import spock.lang.Unroll

Expand Down Expand Up @@ -318,4 +319,41 @@ class StageSpec extends Specification {
100 | 200 | null || 200
100 | 200 | 300 || 300
}

def "should set propagateFailuresToParent correctly"() {
given:
def pipeline = pipeline {
stage {
refId = "parent"

stage {
refId = "child"
}
}
}

def parentStage = pipeline.stageByRef("parent")
def childStage = pipeline.stageByRef("child")

when: 'trying to set PropagateFailuresToParent on parent stage'
parentStage.setAllowSiblingStagesToContinueOnFailure(true)

then: 'it should fail'
thrown(SpinnakerException)

when: 'parent stage erroneously has the setting in context'
parentStage = pipeline.stageByRef("parent")
parentStage.context.put("allowSiblingStagesToContinueOnFailure", true)

then: 'we ignore the value in context'
childStage.getAllowSiblingStagesToContinueOnFailure() == false

when: 'trying to set PropagateFailuresToParent on a child stage'
childStage.setAllowSiblingStagesToContinueOnFailure(true)

then: 'it should succeed'
noExceptionThrown()
childStage.context.allowSiblingStagesToContinueOnFailure == true
childStage.getAllowSiblingStagesToContinueOnFailure() == true
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ class CompleteStageHandler(

// When a synthetic stage ends with FAILED_CONTINUE, propagate that status up to the stage's
// parent so that no more of the parent's synthetic children will run.
if (stage.status == FAILED_CONTINUE && stage.syntheticStageOwner != null) {
if (stage.status == FAILED_CONTINUE && stage.syntheticStageOwner != null && !stage.allowSiblingStagesToContinueOnFailure) {
queue.push(message.copy(stageId = stage.parentStageId!!))
} else if (stage.status in listOf(SUCCEEDED, FAILED_CONTINUE, SKIPPED)) {
stage.startNext()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1078,6 +1078,41 @@ object CompleteStageHandlerTest : SubjectSpek<CompleteStageHandler>({
verify(queue).push(message.copy(stageId = pipeline.stageByRef("1").id))
}
}

given("a synthetic stage's task ends with $FAILED_CONTINUE status and the synthetic allows siblings to continue") {
val pipeline = pipeline {
stage {
refId = "1"
type = stageWithSyntheticBefore.type
stageWithSyntheticBefore.buildBeforeStages(this)
stageWithSyntheticBefore.plan(this)
}
}

val syntheticStage = pipeline.stageByRef("1<1")
syntheticStage.allowSiblingStagesToContinueOnFailure = true
val message = CompleteStage(syntheticStage)

beforeGroup {
pipeline.stageById(message.stageId).apply {
status = RUNNING
singleTaskStage.plan(this)
tasks.first().status = FAILED_CONTINUE
}

whenever(repository.retrieve(PIPELINE, message.executionId)) doReturn pipeline
}

on("receiving the message") {
subject.handle(message)
}

afterGroup(::resetMocks)

it("starts executing the next sibling") {
verify(queue).push(StartStage(pipeline.stageByRef("1<2")))
}
}
}

setOf(TERMINAL, CANCELED).forEach { taskStatus ->
Expand Down

0 comments on commit ae0e2cf

Please sign in to comment.