Skip to content

Commit

Permalink
Merge pull request #89894 from johanneswuerbach/automated-cherry-pick…
Browse files Browse the repository at this point in the history
…-of-#84181-#85675-upstream-release-1.16

Automated cherry pick of #84181: Lower AWS DescribeVolume frequency #85675: Fix AWS eventual consistency of AttachDisk
  • Loading branch information
k8s-ci-robot committed Apr 28, 2020
2 parents bafb40a + caba491 commit 7f72444
Showing 1 changed file with 61 additions and 19 deletions.
80 changes: 61 additions & 19 deletions staging/src/k8s.io/legacy-cloud-providers/aws/aws.go
Original file line number Diff line number Diff line change
Expand Up @@ -219,13 +219,15 @@ const nodeWithImpairedVolumes = "NodeWithImpairedVolumes"
const (
// volumeAttachmentConsecutiveErrorLimit is the number of consecutive errors we will ignore when waiting for a volume to attach/detach
volumeAttachmentStatusConsecutiveErrorLimit = 10
// most attach/detach operations on AWS finish within 1-4 seconds
// By using 1 second starting interval with a backoff of 1.8
// we get - [1, 1.8, 3.24, 5.832000000000001, 10.4976]
// in total we wait for 2601 seconds
volumeAttachmentStatusInitialDelay = 1 * time.Second
volumeAttachmentStatusFactor = 1.8
volumeAttachmentStatusSteps = 13

// Attach typically takes 2-5 seconds (average is 2). Asking before 2 seconds is just waste of API quota.
volumeAttachmentStatusInitialDelay = 2 * time.Second
// Detach typically takes 5-10 seconds (average is 6). Asking before 5 seconds is just waste of API quota.
volumeDetachmentStatusInitialDelay = 5 * time.Second
// After the initial delay, poll attach/detach with exponential backoff (2046 seconds total)
volumeAttachmentStatusPollDelay = 2 * time.Second
volumeAttachmentStatusFactor = 2
volumeAttachmentStatusSteps = 11

// createTag* is configuration of exponential backoff for CreateTag call. We
// retry mainly because if we create an object, we cannot tag it until it is
Expand Down Expand Up @@ -2069,17 +2071,24 @@ func (c *Cloud) applyUnSchedulableTaint(nodeName types.NodeName, reason string)

// waitForAttachmentStatus polls until the attachment status is the expected value
// On success, it returns the last attachment state.
func (d *awsDisk) waitForAttachmentStatus(status string) (*ec2.VolumeAttachment, error) {
func (d *awsDisk) waitForAttachmentStatus(status string, expectedInstance, expectedDevice string) (*ec2.VolumeAttachment, error) {
backoff := wait.Backoff{
Duration: volumeAttachmentStatusInitialDelay,
Duration: volumeAttachmentStatusPollDelay,
Factor: volumeAttachmentStatusFactor,
Steps: volumeAttachmentStatusSteps,
}

// Because of rate limiting, we often see errors from describeVolume
// Because of rate limiting, we often see errors from describeVolume.
// Or AWS eventual consistency returns unexpected data.
// So we tolerate a limited number of failures.
// But once we see more than 10 errors in a row, we return the error
describeErrorCount := 0
// But once we see more than 10 errors in a row, we return the error.
errorCount := 0

// Attach/detach usually takes time. It does not make sense to start
// polling DescribeVolumes before some initial delay to let AWS
// process the request.
time.Sleep(getInitialAttachDetachDelay(status))

var attachment *ec2.VolumeAttachment

err := wait.ExponentialBackoff(backoff, func() (bool, error) {
Expand All @@ -2102,8 +2111,8 @@ func (d *awsDisk) waitForAttachmentStatus(status string) (*ec2.VolumeAttachment,
return false, err
}
}
describeErrorCount++
if describeErrorCount > volumeAttachmentStatusConsecutiveErrorLimit {
errorCount++
if errorCount > volumeAttachmentStatusConsecutiveErrorLimit {
// report the error
return false, err
}
Expand All @@ -2112,8 +2121,6 @@ func (d *awsDisk) waitForAttachmentStatus(status string) (*ec2.VolumeAttachment,
return false, nil
}

describeErrorCount = 0

if len(info.Attachments) > 1 {
// Shouldn't happen; log so we know if it is
klog.Warningf("Found multiple attachments for volume %q: %v", d.awsID, info)
Expand All @@ -2135,15 +2142,42 @@ func (d *awsDisk) waitForAttachmentStatus(status string) (*ec2.VolumeAttachment,
if attachmentStatus == "" {
attachmentStatus = "detached"
}
if attachment != nil {
// AWS eventual consistency can go back in time.
// For example, we're waiting for a volume to be attached as /dev/xvdba, but AWS can tell us it's
// attached as /dev/xvdbb, where it was attached before and it was already detached.
// Retry couple of times, hoping AWS starts reporting the right status.
device := aws.StringValue(attachment.Device)
if expectedDevice != "" && device != "" && device != expectedDevice {
klog.Warningf("Expected device %s %s for volume %s, but found device %s %s", expectedDevice, status, d.name, device, attachmentStatus)
errorCount++
if errorCount > volumeAttachmentStatusConsecutiveErrorLimit {
// report the error
return false, fmt.Errorf("attachment of disk %q failed: requested device %q but found %q", d.name, expectedDevice, device)
}
return false, nil
}
instanceID := aws.StringValue(attachment.InstanceId)
if expectedInstance != "" && instanceID != "" && instanceID != expectedInstance {
klog.Warningf("Expected instance %s/%s for volume %s, but found instance %s/%s", expectedInstance, status, d.name, instanceID, attachmentStatus)
errorCount++
if errorCount > volumeAttachmentStatusConsecutiveErrorLimit {
// report the error
return false, fmt.Errorf("attachment of disk %q failed: requested device %q but found %q", d.name, expectedDevice, device)
}
return false, nil
}
}

if attachmentStatus == status {
// Attachment is in requested state, finish waiting
return true, nil
}
// continue waiting
errorCount = 0
klog.V(2).Infof("Waiting for volume %q state: actual=%s, desired=%s", d.awsID, attachmentStatus, status)
return false, nil
})

return attachment, err
}

Expand Down Expand Up @@ -2280,7 +2314,7 @@ func (c *Cloud) AttachDisk(diskName KubernetesVolumeID, nodeName types.NodeName)
klog.V(2).Infof("AttachVolume volume=%q instance=%q request returned %v", disk.awsID, awsInstance.awsID, attachResponse)
}

attachment, err := disk.waitForAttachmentStatus("attached")
attachment, err := disk.waitForAttachmentStatus("attached", awsInstance.awsID, ec2Device)

if err != nil {
if err == wait.ErrWaitTimeout {
Expand All @@ -2300,6 +2334,7 @@ func (c *Cloud) AttachDisk(diskName KubernetesVolumeID, nodeName types.NodeName)
return "", fmt.Errorf("unexpected state: attachment nil after attached %q to %q", diskName, nodeName)
}
if ec2Device != aws.StringValue(attachment.Device) {
// Already checked in waitForAttachmentStatus(), but just to be sure...
return "", fmt.Errorf("disk attachment of %q to %q failed: requested device %q but found %q", diskName, nodeName, ec2Device, aws.StringValue(attachment.Device))
}
if awsInstance.awsID != aws.StringValue(attachment.InstanceId) {
Expand Down Expand Up @@ -2357,7 +2392,7 @@ func (c *Cloud) DetachDisk(diskName KubernetesVolumeID, nodeName types.NodeName)
return "", errors.New("no response from DetachVolume")
}

attachment, err := diskInfo.disk.waitForAttachmentStatus("detached")
attachment, err := diskInfo.disk.waitForAttachmentStatus("detached", awsInstance.awsID, "")
if err != nil {
return "", err
}
Expand Down Expand Up @@ -4619,3 +4654,10 @@ func setNodeDisk(
}
volumeMap[volumeID] = check
}

func getInitialAttachDetachDelay(status string) time.Duration {
if status == "detached" {
return volumeDetachmentStatusInitialDelay
}
return volumeAttachmentStatusInitialDelay
}

0 comments on commit 7f72444

Please sign in to comment.