Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix #59601: AWS: Check error code returned from describeVolume #59756

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
59 changes: 51 additions & 8 deletions pkg/cloudprovider/providers/aws/aws.go
Original file line number Diff line number Diff line change
Expand Up @@ -1660,6 +1660,20 @@ func newAWSDisk(aws *Cloud, name KubernetesVolumeID) (*awsDisk, error) {
return disk, nil
}

// Helper function for describeVolume callers. Tries to retype given error to AWS error
// and returns true in case the AWS error is "InvalidVolume.NotFound", false otherwise
func isAWSErrorVolumeNotFound(err error) bool {
if err != nil {
if awsError, ok := err.(awserr.Error); ok {
// https://docs.aws.amazon.com/AWSEC2/latest/APIReference/errors-overview.html
if awsError.Code() == "InvalidVolume.NotFound" {
return true
}
}
}
return false
}

// Gets the full information about this volume from the EC2 API
func (d *awsDisk) describeVolume() (*ec2.Volume, error) {
volumeID := d.awsID
Expand All @@ -1670,13 +1684,13 @@ func (d *awsDisk) describeVolume() (*ec2.Volume, error) {

volumes, err := d.ec2.DescribeVolumes(request)
if err != nil {
return nil, fmt.Errorf("error querying ec2 for volume %q: %q", volumeID, err)
return nil, err
}
if len(volumes) == 0 {
return nil, fmt.Errorf("no volumes found for volume %q", volumeID)
return nil, fmt.Errorf("no volumes found")
}
if len(volumes) > 1 {
return nil, fmt.Errorf("multiple volumes found for volume %q", volumeID)
return nil, fmt.Errorf("multiple volumes found")
}
return volumes[0], nil
}
Expand Down Expand Up @@ -1780,12 +1794,29 @@ func (d *awsDisk) waitForAttachmentStatus(status string) (*ec2.VolumeAttachment,
err := wait.ExponentialBackoff(backoff, func() (bool, error) {
info, err := d.describeVolume()
if err != nil {
// The VolumeNotFound error is special -- we don't need to wait for it to repeat
if isAWSErrorVolumeNotFound(err) {
if status == "detached" {
// The disk doesn't exist, assume it's detached, log warning and stop waiting
glog.Warningf("Waiting for volume %q to be detached but the volume does not exist", d.awsID)
stateStr := "detached"
attachment = &ec2.VolumeAttachment{
State: &stateStr,
}
return true, nil
}
if status == "attached" {
// The disk doesn't exist, complain, give up waiting and report error
glog.Warningf("Waiting for volume %q to be attached but the volume does not exist", d.awsID)
return false, err
}
}
describeErrorCount++
if describeErrorCount > volumeAttachmentStatusConsecutiveErrorLimit {
// report the error
return false, err
} else {
glog.Warningf("Ignoring error from describe volume; will retry: %q", err)
glog.Warningf("Ignoring error from describe volume for volume %q; will retry: %q", d.awsID, err)
return false, nil
}
} else {
Expand Down Expand Up @@ -2013,7 +2044,13 @@ func (c *Cloud) AttachDisk(diskName KubernetesVolumeID, nodeName types.NodeName,
func (c *Cloud) DetachDisk(diskName KubernetesVolumeID, nodeName types.NodeName) (string, error) {
diskInfo, attached, err := c.checkIfAttachedToNode(diskName, nodeName)
if err != nil {
return "", err
if isAWSErrorVolumeNotFound(err) {
// Someone deleted the volume being detached; complain, but do nothing else and return success
glog.Warningf("DetachDisk %s called for node %s but volume does not exist; assuming the volume is detached", diskName, nodeName)
return "", nil
} else {
return "", err
}
}

if !attached && diskInfo.ec2Instance != nil {
Expand Down Expand Up @@ -2308,9 +2345,15 @@ func (c *Cloud) GetDiskPath(volumeName KubernetesVolumeID) (string, error) {

// DiskIsAttached implements Volumes.DiskIsAttached
func (c *Cloud) DiskIsAttached(diskName KubernetesVolumeID, nodeName types.NodeName) (bool, error) {
diskInfo, attached, err := c.checkIfAttachedToNode(diskName, nodeName)
if diskInfo == nil {
return true, err
_, attached, err := c.checkIfAttachedToNode(diskName, nodeName)
if err != nil {
if isAWSErrorVolumeNotFound(err) {
// The disk doesn't exist, can't be attached
glog.Warningf("DiskIsAttached called for volume %s on node %s but the volume does not exist", diskName, nodeName)
return false, nil
} else {
return true, err
}
}

return attached, nil
Expand Down
5 changes: 2 additions & 3 deletions pkg/cloudprovider/providers/aws/volumes.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,10 +119,9 @@ func (c *Cloud) checkIfAttachedToNode(diskName KubernetesVolumeID, nodeName type
info, err := disk.describeVolume()

if err != nil {
describeError := fmt.Errorf("Error describing volume %s with %v", diskName, err)
glog.Warning(describeError)
glog.Warning("Error describing volume %s with %v", diskName, err)
awsDiskInfo.volumeState = "unknown"
return awsDiskInfo, false, describeError
return awsDiskInfo, false, err
}

awsDiskInfo.volumeState = aws.StringValue(info.State)
Expand Down