diff --git a/orca-clouddriver/src/main/groovy/com/netflix/spinnaker/orca/clouddriver/pipeline/servergroup/rollback/ExplicitRollback.groovy b/orca-clouddriver/src/main/groovy/com/netflix/spinnaker/orca/clouddriver/pipeline/servergroup/rollback/ExplicitRollback.groovy index 1961cc28ac..3a52983440 100644 --- a/orca-clouddriver/src/main/groovy/com/netflix/spinnaker/orca/clouddriver/pipeline/servergroup/rollback/ExplicitRollback.groovy +++ b/orca-clouddriver/src/main/groovy/com/netflix/spinnaker/orca/clouddriver/pipeline/servergroup/rollback/ExplicitRollback.groovy @@ -17,6 +17,7 @@ package com.netflix.spinnaker.orca.clouddriver.pipeline.servergroup.rollback import com.fasterxml.jackson.annotation.JsonIgnore +import com.netflix.spinnaker.kork.core.RetrySupport import com.netflix.spinnaker.orca.clouddriver.pipeline.providers.aws.ApplySourceServerGroupCapacityStage import com.netflix.spinnaker.orca.clouddriver.pipeline.providers.aws.CaptureSourceServerGroupCapacityStage import com.netflix.spinnaker.orca.clouddriver.pipeline.servergroup.DisableServerGroupStage @@ -28,10 +29,15 @@ import com.netflix.spinnaker.orca.kato.pipeline.support.ResizeStrategy import com.netflix.spinnaker.orca.pipeline.WaitStage import com.netflix.spinnaker.orca.pipeline.model.Stage import com.netflix.spinnaker.orca.pipeline.model.SyntheticStageOwner +import groovy.transform.TimedInterrupt import groovy.util.logging.Slf4j import org.springframework.beans.factory.annotation.Autowired import javax.annotation.Nullable +import java.util.concurrent.Callable +import java.util.concurrent.ExecutorService +import java.util.concurrent.Executors +import java.util.concurrent.TimeUnit import static com.netflix.spinnaker.orca.pipeline.StageDefinitionBuilder.newStage @@ -44,6 +50,9 @@ class ExplicitRollback implements Rollback { Boolean disableOnly Boolean enableAndDisableOnly + @JsonIgnore + private final ExecutorService executor = java.util.concurrent.Executors.newSingleThreadExecutor() + @Autowired @JsonIgnore EnableServerGroupStage enableServerGroupStage @@ -72,6 +81,10 @@ class ExplicitRollback implements Rollback { @JsonIgnore OortHelper oortHelper + @Autowired + @JsonIgnore + RetrySupport retrySupport + @JsonIgnore @Override List buildStages(Stage parentStage) { @@ -139,12 +152,17 @@ class ExplicitRollback implements Rollback { def fromContext = parentStage.mapTo(ResizeStrategy.Source) try { - // TODO: throw some retries in there? - return oortHelper.getTargetServerGroup( - fromContext.credentials, - serverGroupName, - fromContext.location - ).orElse(null) + // we use an executor+future to timebox how long we can spend in this remote call + // because we are in a StartStage message and need to response quickly + // this is a bit of a hack, the proper long term fix would be to encapsulate this remote call in a task + executor.submit((Callable) { + return oortHelper.getTargetServerGroup( + fromContext.credentials, + serverGroupName, + fromContext.location + ) + }).get(5, TimeUnit.SECONDS) + .get() // not sure what would cause the Optional to not be present but we would catch and log it } catch(Exception e) { log.error('Skipping resize stage because there was an error looking up {}', serverGroupName, e) return null @@ -182,6 +200,7 @@ class ExplicitRollback implements Rollback { action : ResizeStrategy.ResizeAction.scale_exact.toString(), capacity : newRestoreCapacity.asMap(), asgName : restoreServerGroupName, + serverGroupName : restoreServerGroupName, targetLocation : restoreServerGroup.getLocation(), account : restoreServerGroup.credentials, cloudProvider : restoreServerGroup.cloudProvider, @@ -190,7 +209,7 @@ class ExplicitRollback implements Rollback { ] return newStage(parentStage.execution, resizeServerGroupStage.type, - "Resize Restore Server Group to (min: ${newRestoreCapacity.min}, max: ${newRestoreCapacity.max}, desired: ${newRestoreCapacity.desired})", + "Resize Server Group: ${restoreServerGroupName} to (min: ${newRestoreCapacity.min}, max: ${newRestoreCapacity.max}, desired: ${newRestoreCapacity.desired})", resizeServerGroupContext, parentStage, SyntheticStageOwner.STAGE_AFTER) } diff --git a/orca-clouddriver/src/test/groovy/com/netflix/spinnaker/orca/clouddriver/pipeline/servergroup/rollback/ExplicitRollbackSpec.groovy b/orca-clouddriver/src/test/groovy/com/netflix/spinnaker/orca/clouddriver/pipeline/servergroup/rollback/ExplicitRollbackSpec.groovy index c41abcce3b..7dde321b83 100644 --- a/orca-clouddriver/src/test/groovy/com/netflix/spinnaker/orca/clouddriver/pipeline/servergroup/rollback/ExplicitRollbackSpec.groovy +++ b/orca-clouddriver/src/test/groovy/com/netflix/spinnaker/orca/clouddriver/pipeline/servergroup/rollback/ExplicitRollbackSpec.groovy @@ -158,6 +158,7 @@ class ExplicitRollbackSpec extends Specification { afterStages[2].context == stage.context + [ action : "scale_exact", asgName : restoreServerGroupName, + serverGroupName : restoreServerGroupName, pinMinimumCapacity: true, targetHealthyDeployPercentage: 95, targetLocation: Location.region("us-west-1"),