Skip to content

Commit

Permalink
feat(provider/aws): Fix lifecycle hooks racing condition (#4082)
Browse files Browse the repository at this point in the history
  • Loading branch information
costimuraru authored and robzienert committed Oct 10, 2019
1 parent a51aa94 commit 7db3aca
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import com.amazonaws.services.autoscaling.model.Tag
import com.amazonaws.services.autoscaling.model.UpdateAutoScalingGroupRequest
import com.amazonaws.services.ec2.model.DescribeSubnetsResult
import com.amazonaws.services.ec2.model.Subnet
import com.netflix.spinnaker.clouddriver.aws.model.AmazonAsgLifecycleHook
import com.netflix.spinnaker.clouddriver.aws.model.AmazonBlockDevice
import com.netflix.spinnaker.clouddriver.aws.model.AutoScalingProcessType
import com.netflix.spinnaker.clouddriver.aws.model.SubnetData
Expand Down Expand Up @@ -95,6 +96,7 @@ class AutoScalingWorker {
private List<String> availabilityZones
private List<AmazonBlockDevice> blockDevices
private Map<String, String> tags
private List<AmazonAsgLifecycleHook> lifecycleHooks

private int minInstances
private int maxInstances
Expand Down Expand Up @@ -267,11 +269,23 @@ class AutoScalingWorker {
throw ex
}

if (lifecycleHooks != null && !lifecycleHooks.isEmpty()) {
Exception e = retrySupport.retry({ ->
task.updateStatus AWS_PHASE, "Creating lifecycle hooks for: $asgName"
regionScopedProvider.asgLifecycleHookWorker.attach(task, lifecycleHooks, asgName)
}, 10, 1000, false)

if (e != null) {
task.updateStatus AWS_PHASE, "Unable to attach lifecycle hooks to ASG ($asgName): ${e.message}"
}
}

if (suspendedProcesses) {
retrySupport.retry({ ->
autoScaling.suspendProcesses(new SuspendProcessesRequest(autoScalingGroupName: asgName, scalingProcesses: suspendedProcesses))
}, 10, 1000, false)
}

if (enabledMetrics && instanceMonitoring) {
task.updateStatus AWS_PHASE, "Enabling metrics collection for: $asgName"
retrySupport.retry({ ->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,8 @@ class BasicAmazonDeployHandler implements DeployHandler<BasicAmazonDeployDescrip
regionScopedProvider: regionScopedProvider,
base64UserData: description.base64UserData,
legacyUdf: description.legacyUdf,
tags: applyAppStackDetailTags(deployDefaults, description).tags
tags: applyAppStackDetailTags(deployDefaults, description).tags,
lifecycleHooks: getLifecycleHooks(account, description)
)

def asgName = autoScalingWorker.deploy()
Expand All @@ -315,8 +316,6 @@ class BasicAmazonDeployHandler implements DeployHandler<BasicAmazonDeployDescrip
)
}

createLifecycleHooks(task, regionScopedProvider, account, description, asgName)

description.events << new CreateServerGroupEvent(
AmazonCloudProvider.ID, account.accountId, region, asgName
)
Expand Down Expand Up @@ -455,24 +454,6 @@ class BasicAmazonDeployHandler implements DeployHandler<BasicAmazonDeployDescrip
asgReferenceCopier.copyScheduledActionsForAsg(task, sourceAsgName, targetAsgName)
}

@VisibleForTesting
@PackageScope
void createLifecycleHooks(Task task,
RegionScopedProviderFactory.RegionScopedProvider targetRegionScopedProvider,
NetflixAmazonCredentials targetCredentials,
BasicAmazonDeployDescription description,
String targetAsgName) {

try {
List<AmazonAsgLifecycleHook> lifecycleHooks = getLifecycleHooks(targetCredentials, description)
if (lifecycleHooks.size() > 0) {
targetRegionScopedProvider.asgLifecycleHookWorker.attach(task, lifecycleHooks, targetAsgName)
}
} catch (Exception e) {
task.updateStatus(BASE_PHASE, "Unable to attach lifecycle hooks to ASG ($targetAsgName): ${e.message}")
}
}

@VisibleForTesting
@PackageScope
static List<AmazonAsgLifecycleHook> getLifecycleHooks(NetflixAmazonCredentials credentials, BasicAmazonDeployDescription description) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import com.amazonaws.services.autoscaling.model.DescribeAutoScalingGroupsResult
import com.amazonaws.services.ec2.AmazonEC2
import com.amazonaws.services.ec2.model.Subnet
import com.netflix.spinnaker.clouddriver.aws.TestCredential
import com.netflix.spinnaker.clouddriver.aws.model.AmazonAsgLifecycleHook
import com.netflix.spinnaker.clouddriver.aws.services.AsgService
import com.netflix.spinnaker.clouddriver.aws.services.RegionScopedProviderFactory
import com.netflix.spinnaker.clouddriver.data.task.DefaultTask
Expand All @@ -38,6 +39,9 @@ import spock.lang.Unroll
import java.time.Instant
import java.time.temporal.ChronoUnit

import static com.netflix.spinnaker.clouddriver.aws.model.AmazonAsgLifecycleHook.DefaultResult.CONTINUE
import static com.netflix.spinnaker.clouddriver.aws.model.AmazonAsgLifecycleHook.Transition.EC2InstanceLaunching

class AutoScalingWorkerUnitSpec extends Specification {

@Autowired
Expand All @@ -48,6 +52,7 @@ class AutoScalingWorkerUnitSpec extends Specification {
def autoScaling = Mock(AmazonAutoScaling)
def clusterProvider = Mock(ClusterProvider)
def amazonEC2 = Mock(AmazonEC2)
def asgLifecycleHookWorker = Mock(AsgLifecycleHookWorker)
def awsServerGroupNameResolver = new AWSServerGroupNameResolver('test', 'us-east-1', asgService, [clusterProvider])
def credential = TestCredential.named('foo')
def regionScopedProvider = Stub(RegionScopedProviderFactory.RegionScopedProvider) {
Expand All @@ -56,6 +61,7 @@ class AutoScalingWorkerUnitSpec extends Specification {
getAsgService() >> asgService
getAWSServerGroupNameResolver() >> awsServerGroupNameResolver
getAmazonEC2() >> amazonEC2
getAsgLifecycleHookWorker() >> asgLifecycleHookWorker
}

def setup() {
Expand Down Expand Up @@ -141,6 +147,42 @@ class AutoScalingWorkerUnitSpec extends Specification {
0 * autoScaling.enableMetricsCollection(_)
}

void "creates lifecycle hooks before scaling out asg"() {
setup:
def hooks = [getHook(), getHook()]
def autoScalingWorker = new AutoScalingWorker(
enabledMetrics: [],
instanceMonitoring: true,
regionScopedProvider: regionScopedProvider,
credentials: credential,
application: "myasg",
region: "us-east-1",
lifecycleHooks: hooks
)

when:
autoScalingWorker.deploy()

then:
1 * autoScaling.createAutoScalingGroup(_)
then:
1 * regionScopedProvider.getAsgLifecycleHookWorker().attach(_, hooks, "myasg-v000")
then: "validate that scale out happens after lifecycle hooks are attached"
1 * autoScaling.updateAutoScalingGroup(*_)
}

def getHook() {
new AmazonAsgLifecycleHook(
name: "hook-name-" + new Random().nextInt(),
roleARN: "role-rn",
notificationTargetARN: "target-arn",
notificationMetadata: null,
lifecycleTransition: EC2InstanceLaunching,
heartbeatTimeout: 300,
defaultResult: CONTINUE
)
}

void "does not enable metrics collection when instanceMonitoring is set to false"() {
setup:
def autoScalingWorker = new AutoScalingWorker(
Expand Down

0 comments on commit 7db3aca

Please sign in to comment.