Skip to content

Commit

Permalink
fix(RRB): Add dedicated pin stage (#2968)
Browse files Browse the repository at this point in the history
For RRB we start by pinning the source SG so that it doesn't scale down during deploy (and end with unpinning it).
However, if for whatever reason some of the instances are taken out of discovery/targetgroup we will timeout
waiting for capacity match on this operation.
Since this pin/unpin operations are purely "cautionary" and don't require us to actually match the capacity,
don't wait for capacity match so we don't fail for this non-critical reason
  • Loading branch information
marchello2000 committed Jun 10, 2019
1 parent 2235205 commit 50360e3
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

package com.netflix.spinnaker.orca.clouddriver.pipeline.providers.aws

import com.netflix.spinnaker.orca.clouddriver.pipeline.servergroup.ResizeServerGroupStage
import com.netflix.spinnaker.orca.clouddriver.pipeline.servergroup.PinServerGroupStage
import com.netflix.spinnaker.orca.clouddriver.pipeline.servergroup.strategies.AbstractDeployStrategyStage
import com.netflix.spinnaker.orca.clouddriver.pipeline.servergroup.strategies.DeployStagePreProcessor
import com.netflix.spinnaker.orca.clouddriver.pipeline.servergroup.support.TargetServerGroup
Expand All @@ -39,7 +39,7 @@ class AwsDeployStagePreProcessor implements DeployStagePreProcessor {
ApplySourceServerGroupCapacityStage applySourceServerGroupSnapshotStage

@Autowired
ResizeServerGroupStage resizeServerGroupStage
PinServerGroupStage pinServerGroupStage

@Autowired
TargetServerGroupResolver targetServerGroupResolver
Expand Down Expand Up @@ -101,7 +101,7 @@ class AwsDeployStagePreProcessor implements DeployStagePreProcessor {

stageDefinitions << new StageDefinition(
name: "Pin ${resizeContext.serverGroupName}",
stageDefinitionBuilder: resizeServerGroupStage,
stageDefinitionBuilder: pinServerGroupStage,
context: resizeContext
)
}
Expand Down Expand Up @@ -213,7 +213,7 @@ class AwsDeployStagePreProcessor implements DeployStagePreProcessor {

return new StageDefinition(
name: "Unpin ${resizeContext.serverGroupName} (deployFailed=${deployFailed})".toString(),
stageDefinitionBuilder: resizeServerGroupStage,
stageDefinitionBuilder: pinServerGroupStage,
context: resizeContext
)
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/*
* Copyright 2019 Netflix, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.netflix.spinnaker.orca.clouddriver.pipeline.servergroup

import com.netflix.spinnaker.orca.clouddriver.pipeline.servergroup.support.TargetServerGroupLinearStageSupport
import com.netflix.spinnaker.orca.clouddriver.tasks.DetermineHealthProvidersTask
import com.netflix.spinnaker.orca.clouddriver.tasks.MonitorKatoTask
import com.netflix.spinnaker.orca.clouddriver.tasks.servergroup.ResizeServerGroupTask
import com.netflix.spinnaker.orca.clouddriver.tasks.servergroup.ServerGroupCacheForceRefreshTask
import com.netflix.spinnaker.orca.pipeline.TaskNode
import com.netflix.spinnaker.orca.pipeline.model.Stage
import groovy.util.logging.Slf4j
import org.springframework.stereotype.Component

/**
* PinServerGroupStage updates the capacity (min/max/desired) on a server group but does not wait for capacity match,
* unlike ResizeServerGroup.
* Furthermore, it doesn't enabled/disable scaling processes. It simply sets the min/max/desired sizes on an ASG.
*
* The intent of the stage is to pin to prevent the ASG from scaling down, or, less usefully, up. As such,
* the expectation is that the right number of instances are already up and no need to wait for the capacity to match.
* That's why we can also ignore reenabling/disabling the scaling processes during this operation.
* This stage is used with rolling red black.
*/
@Component
@Slf4j
class PinServerGroupStage extends TargetServerGroupLinearStageSupport {
public static final String TYPE = getType(PinServerGroupStage)

@Override
protected void taskGraphInternal(Stage stage, TaskNode.Builder builder) {
builder
.withTask("resizeServerGroup", ResizeServerGroupTask)
.withTask("monitorServerGroup", MonitorKatoTask)
.withTask("forceCacheRefresh", ServerGroupCacheForceRefreshTask)
}
}

0 comments on commit 50360e3

Please sign in to comment.