Skip to content

Commit

Permalink
feat(rollback): Support rollbacks for red/black when deploy fails (#2067
Browse files Browse the repository at this point in the history
)

This PR also ensures that the previous server group is 100% enabled
when rolling back an RRB.
  • Loading branch information
ajordens authored Mar 21, 2018
1 parent 43afdc2 commit 481f43f
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 45 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,9 @@ public void taskGraph(@Nonnull Stage stage, @Nonnull TaskNode.Builder builder) {
public void afterStages(@Nonnull Stage parent, @Nonnull StageGraphBuilder graph) {
StageData stageData = parent.mapTo(StageData.class);

Map<String, String> rollbackTypes = (Map<String, String>) parent.getOutputs().get("rollbackTypes");
Map<String, Object> parentOutputs = parent.getOutputs();
Map<String, String> rollbackTypes = (Map<String, String>) parentOutputs.get("rollbackTypes");
Map<String, Map<String, Object>> rollbackContexts = (Map<String, Map<String, Object>>) parentOutputs.get("rollbackContexts");

// filter out any regions that do _not_ have a rollback target
List<String> regionsToRollback = stageData.regions
Expand All @@ -72,10 +74,13 @@ public void afterStages(@Nonnull Stage parent, @Nonnull StageGraphBuilder graph)
"rollbackType",
((Map) parent.getOutputs().get("rollbackTypes")).get(region)
);
context.put(
"rollbackContext",
((Map) parent.getOutputs().get("rollbackContexts")).get(region)
);

Map<String, Object> rollbackContext = rollbackContexts.get(region);
if (stageData.additionalRollbackContext != null) {
rollbackContext.putAll(stageData.additionalRollbackContext);
}

context.put("rollbackContext", rollbackContext);
context.put("type", rollbackServerGroupStage.getType());
context.put("region", region);
context.put("credentials", stageData.credentials);
Expand Down Expand Up @@ -131,5 +136,7 @@ static class StageData {

public List<String> regions;
public Long waitTimeBetweenRegions;

public Map<String, Object> additionalRollbackContext;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@

package com.netflix.spinnaker.orca.clouddriver.pipeline.servergroup

import com.netflix.spinnaker.orca.ExecutionStatus
import com.netflix.spinnaker.orca.kato.pipeline.strategy.Strategy

import javax.annotation.Nonnull
import com.fasterxml.jackson.annotation.JsonProperty
import com.netflix.spinnaker.moniker.Moniker
Expand Down Expand Up @@ -87,17 +90,35 @@ class CreateServerGroupStage extends AbstractDeployStrategyStage {
return
}

// TODO: should this append after any existing stages in the graph?
graph.add {
it.type = rollbackClusterStage.type
it.name = "Rollback ${stageData.cluster}"
it.context = [
"credentials" : stageData.credentials,
"cloudProvider" : stageData.cloudProvider,
"regions" : [stageData.region],
"serverGroup" : stageData.serverGroup,
"stageTimeoutMs": MINUTES.toMillis(30) // timebox a rollback to 30 minutes
]
def strategySupportsRollback = false
def additionalRollbackContext = [:]

def strategy = Strategy.fromStrategy(stageData.strategy)
if (strategy == Strategy.ROLLING_RED_BLACK) {
// rollback is always supported regardless of where the failure occurred
strategySupportsRollback = true
additionalRollbackContext.enableAndDisableOnly = true
} else if (strategy == Strategy.RED_BLACK) {
// rollback is only supported if the failure occurred launching the new server group
// no rollback should be attempted if the failure occurs while tearing down the old server group
strategySupportsRollback = stage.tasks.any { it.status == ExecutionStatus.TERMINAL }
additionalRollbackContext.disableOnly = true
}

if (strategySupportsRollback) {
// TODO: should this append after any existing stages in the graph?
graph.add {
it.type = rollbackClusterStage.type
it.name = "Rollback ${stageData.cluster}"
it.context = [
"credentials" : stageData.credentials,
"cloudProvider" : stageData.cloudProvider,
"regions" : [stageData.region],
"serverGroup" : stageData.serverGroup,
"stageTimeoutMs" : MINUTES.toMillis(30), // timebox a rollback to 30 minutes
"additionalRollbackContext": additionalRollbackContext
]
}
}
}

Expand All @@ -108,6 +129,7 @@ class CreateServerGroupStage extends AbstractDeployStrategyStage {
String cloudProvider
Moniker moniker

String strategy
Rollback rollback

@JsonProperty("deploy.server.groups")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ import com.netflix.spinnaker.orca.clouddriver.pipeline.providers.aws.CaptureSour
import com.netflix.spinnaker.orca.clouddriver.pipeline.servergroup.DisableServerGroupStage
import com.netflix.spinnaker.orca.clouddriver.pipeline.servergroup.EnableServerGroupStage
import com.netflix.spinnaker.orca.clouddriver.pipeline.servergroup.ResizeServerGroupStage
import com.netflix.spinnaker.orca.kato.pipeline.strategy.Strategy
import com.netflix.spinnaker.orca.kato.pipeline.support.ResizeStrategy
import com.netflix.spinnaker.orca.pipeline.model.Stage
import com.netflix.spinnaker.orca.pipeline.model.SyntheticStageOwner
Expand All @@ -33,6 +32,8 @@ class ExplicitRollback implements Rollback {
String rollbackServerGroupName
String restoreServerGroupName
Integer targetHealthyRollbackPercentage
Boolean disableOnly
Boolean enableAndDisableOnly

@Autowired
@JsonIgnore
Expand Down Expand Up @@ -63,24 +64,29 @@ class ExplicitRollback implements Rollback {
parentStage.execution, disableServerGroupStage.type, "disable", disableServerGroupContext, parentStage, SyntheticStageOwner.STAGE_AFTER
)

def parentDeployStage = getParentDeployStage(parentStage)
def parentDeployStageStrategy = parentDeployStage?.context?.strategy as String
if (Strategy.fromStrategy(parentDeployStageStrategy) == Strategy.ROLLING_RED_BLACK) {
// no need to do anything but disable the newly deployed (and failing!) server group when dealing with a
// rolling red/black deployment
if (disableOnly) {
// no need to do anything but disable the newly deployed (and failing!) server group
return [
disableServerGroupStage
]
}

def stages = []
Map enableServerGroupContext = new HashMap(parentStage.context)
enableServerGroupContext.targetHealthyDeployPercentage = targetHealthyRollbackPercentage
enableServerGroupContext.serverGroupName = restoreServerGroupName
stages << newStage(
def enableServerGroupStage = newStage(
parentStage.execution, enableServerGroupStage.type, "enable", enableServerGroupContext, parentStage, SyntheticStageOwner.STAGE_AFTER
)

if (enableAndDisableOnly) {
// ensure previous server group is 100% enabled before disabling the new server group
return [
enableServerGroupStage,
disableServerGroupStage
]
}

def stages = [enableServerGroupStage]
if (!parentStage.getContext().containsKey("sourceServerGroupCapacitySnapshot")) {
// capacity has been previously captured (likely as part of a failed deploy), no need to do again!
stages << buildCaptureSourceServerGroupCapacityStage(parentStage, parentStage.mapTo(ResizeStrategy.Source))
Expand Down Expand Up @@ -150,12 +156,4 @@ class ExplicitRollback implements Rollback {
SyntheticStageOwner.STAGE_AFTER
)
}

static Stage getParentDeployStage(Stage parentStage) {
while (parentStage && !parentStage.context.containsKey("strategy")) {
parentStage = parentStage.parent
}

return parentStage
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@

package com.netflix.spinnaker.orca.clouddriver.pipeline.servergroup

import com.netflix.spinnaker.orca.ExecutionStatus
import com.netflix.spinnaker.orca.pipeline.model.Task

import java.util.concurrent.TimeUnit
import com.netflix.spinnaker.orca.clouddriver.pipeline.cluster.RollbackClusterStage
import com.netflix.spinnaker.orca.pipeline.graph.StageGraphBuilder
Expand All @@ -38,7 +41,11 @@ class CreateServerGroupStageSpec extends Specification {
"deploy.server.groups": deployServerGroups,
"application" : "myapplication",
"account" : "test",
"cloudProvider" : "aws"
"cloudProvider" : "aws",
"strategy" : strategy
]
tasks = [
new Task(status: failedTask ? ExecutionStatus.TERMINAL : ExecutionStatus.SUCCEEDED)
]
}

Expand All @@ -57,18 +64,25 @@ class CreateServerGroupStageSpec extends Specification {
onFailureStageContexts == expectedOnFailureStageContexts

where:
shouldRollbackOnFailure | deployServerGroups || expectedOnFailureStageContexts
false | null || []
true | null || []
false | ["us-west-1": ["myapplication-stack-v001"]] || []
true | ["us-west-1": ["myapplication-stack-v001"]] || [
[
regions : ["us-west-1"],
serverGroup : "myapplication-stack-v001",
credentials : "test",
cloudProvider : "aws",
stageTimeoutMs: TimeUnit.MINUTES.toMillis(30)
]
shouldRollbackOnFailure | strategy | deployServerGroups | failedTask || expectedOnFailureStageContexts
false | "rollingredblack" | null | false || []
true | "rollingredblack" | null | false || []
false | "rollingredblack" | ["us-west-1": ["myapplication-stack-v001"]] | false || []
true | "redblack" | ["us-west-1": ["myapplication-stack-v001"]] | false || [] // only rollback if task has failed
true | "highlander" | ["us-west-1": ["myapplication-stack-v001"]] | false || [] // highlander is not supported
true | "rollingredblack" | ["us-west-1": ["myapplication-stack-v001"]] | false || [expectedContext([enableAndDisableOnly: true])]
true | "redblack" | ["us-west-1": ["myapplication-stack-v001"]] | true || [expectedContext([disableOnly: true])]
}


Map expectedContext(Map<String, Object> additionalRollbackContext) {
return [
regions : ["us-west-1"],
serverGroup : "myapplication-stack-v001",
credentials : "test",
cloudProvider : "aws",
stageTimeoutMs : TimeUnit.MINUTES.toMillis(30),
additionalRollbackContext: additionalRollbackContext
]
}
}

0 comments on commit 481f43f

Please sign in to comment.