Skip to content

Commit

Permalink
add instance connection draining for NLBs
Browse files Browse the repository at this point in the history
  • Loading branch information
heybronson committed Dec 20, 2021
1 parent cc26c87 commit fa956e3
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 10 deletions.
1 change: 1 addition & 0 deletions upup/pkg/fi/cloudup/awsup/BUILD.bazel

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

85 changes: 75 additions & 10 deletions upup/pkg/fi/cloudup/awsup/aws_cloud.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
"github.com/aws/aws-sdk-go/service/eventbridge/eventbridgeiface"
"github.com/aws/aws-sdk-go/service/sqs"
"github.com/aws/aws-sdk-go/service/sqs/sqsiface"
"golang.org/x/sync/errgroup"

"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/arn"
Expand Down Expand Up @@ -547,7 +548,7 @@ func deleteInstance(c AWSCloud, i *cloudinstances.CloudInstance) error {
}

if i.CloudInstanceGroup.InstanceGroup.Spec.Manager != kops.InstanceManagerKarpenter {
err := deregisterInstanceFromClassicLoadBalancer(c, i)
err := deregisterInstance(c, i)
if err != nil {
return fmt.Errorf("failed to deregister instance from loadBalancer before terminating: %v", err)
}
Expand All @@ -570,9 +571,8 @@ func deleteInstance(c AWSCloud, i *cloudinstances.CloudInstance) error {
return nil
}

// deregisterInstanceFromClassicLoadBalancer ensures that connectionDraining completes for the associated loadBalancer to ensure no dropped connections.
// if instance is associated with an NLB, this method no-ops.
func deregisterInstanceFromClassicLoadBalancer(c AWSCloud, i *cloudinstances.CloudInstance) error {
// deregisterInstance ensures that the instance is fully drained/removed from all associated loadBalancers and targetGroups before termination.
func deregisterInstance(c AWSCloud, i *cloudinstances.CloudInstance) error {
asg := i.CloudInstanceGroup.Raw.(*autoscaling.Group)

asgDetails, err := c.Autoscaling().DescribeAutoScalingGroups(&autoscaling.DescribeAutoScalingGroupsInput{
Expand All @@ -587,17 +587,41 @@ func deregisterInstanceFromClassicLoadBalancer(c AWSCloud, i *cloudinstances.Clo
}

// there will always be only one ASG in the DescribeAutoScalingGroups response.
loadBalancerNames := asgDetails.AutoScalingGroups[0].LoadBalancerNames
loadBalancerNames := aws.StringValueSlice(asgDetails.AutoScalingGroups[0].LoadBalancerNames)
targetGroupArns := aws.StringValueSlice(asgDetails.AutoScalingGroups[0].TargetGroupARNs)

klog.Infof("Deregistering instance from classic loadBalancers: %v", aws.StringValueSlice(loadBalancerNames))
eg, _ := errgroup.WithContext(aws.BackgroundContext())

if len(loadBalancerNames) != 0 {
eg.Go(func() error {
return deregisterInstanceFromClassicLoadBalancer(c, loadBalancerNames, i.ID)
})
}

if len(targetGroupArns) != 0 {
eg.Go(func() error {
return deregisterInstanceFromTargetGroups(c, targetGroupArns, i.ID)
})
}

if err := eg.Wait(); err != nil {
return fmt.Errorf("failed to deregister instance from load balancers: %v", err)
}

return nil
}

// deregisterInstanceFromClassicLoadBalancer ensures that connectionDraining completes for the associated classic loadBalancer to ensure no dropped connections.
func deregisterInstanceFromClassicLoadBalancer(c AWSCloud, loadBalancerNames []string, instanceId string) error {
klog.Infof("Deregistering instance from classic loadBalancers: %v", loadBalancerNames)

for {
instanceDraining := false
for _, loadBalancerName := range loadBalancerNames {
response, err := c.ELB().DescribeInstanceHealth(&elb.DescribeInstanceHealthInput{
LoadBalancerName: loadBalancerName,
LoadBalancerName: aws.String(loadBalancerName),
Instances: []*elb.Instance{{
InstanceId: aws.String(i.ID),
InstanceId: aws.String(instanceId),
}},
})
if err != nil {
Expand All @@ -612,9 +636,9 @@ func deregisterInstanceFromClassicLoadBalancer(c AWSCloud, i *cloudinstances.Clo
// there will be only one instance in the DescribeInstanceHealth response.
if aws.StringValue(response.InstanceStates[0].State) == instanceInServiceState {
c.ELB().DeregisterInstancesFromLoadBalancer(&elb.DeregisterInstancesFromLoadBalancerInput{
LoadBalancerName: loadBalancerName,
LoadBalancerName: aws.String(loadBalancerName),
Instances: []*elb.Instance{{
InstanceId: aws.String(i.ID),
InstanceId: aws.String(instanceId),
}},
})
instanceDraining = true
Expand All @@ -627,7 +651,48 @@ func deregisterInstanceFromClassicLoadBalancer(c AWSCloud, i *cloudinstances.Clo

time.Sleep(5 * time.Second)
}
return nil
}

// deregisterInstanceFromTargetGroups ensures that instances are fully unused in the corresponding targetGroups before instance termination.
// this ensures that connections are fully drained from the instance before terminating.
func deregisterInstanceFromTargetGroups(c AWSCloud, targetGroupArns []string, instanceId string) error {
klog.Infof("Deregistering instance from targetGroups: %v", targetGroupArns)

for {
instanceDraining := false
for _, targetGroupArn := range targetGroupArns {
response, err := c.ELBV2().DescribeTargetHealth(&elbv2.DescribeTargetHealthInput{
TargetGroupArn: aws.String(targetGroupArn),
Targets: []*elbv2.TargetDescription{{
Id: aws.String(instanceId),
}},
})

if err != nil {
return fmt.Errorf("error describing target health: %v", err)
}

// there will be only one target in the DescribeTargetHealth response.
// DescribeTargetHealth response will contain a target even if the targetId doesn't exist.
// all other states besides TargetHealthStateUnused means that the instance may still be serving traffic.
if aws.StringValue(response.TargetHealthDescriptions[0].TargetHealth.State) != elbv2.TargetHealthStateEnumUnused {
c.ELBV2().DeregisterTargets(&elbv2.DeregisterTargetsInput{
TargetGroupArn: aws.String(targetGroupArn),
Targets: []*elbv2.TargetDescription{{
Id: aws.String(instanceId),
}},
})
instanceDraining = true
}
}

if !instanceDraining {
break
}

time.Sleep(5 * time.Second)
}
return nil
}

Expand Down

0 comments on commit fa956e3

Please sign in to comment.