New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix FibreChannel volume plugin corrupting filesystem on detach #97013
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
|
@@ -27,6 +27,7 @@ import ( | |||
|
||||
"k8s.io/klog/v2" | ||||
"k8s.io/mount-utils" | ||||
utilexec "k8s.io/utils/exec" | ||||
|
||||
"k8s.io/kubernetes/pkg/volume" | ||||
volumeutil "k8s.io/kubernetes/pkg/volume/util" | ||||
|
@@ -111,6 +112,16 @@ func findDiskWWIDs(wwid string, io ioHandler, deviceUtil volumeutil.DeviceUtil) | |||
return "", "" | ||||
} | ||||
|
||||
// Flushes any outstanding I/O to the device | ||||
func flushDevice(deviceName string, exec utilexec.Interface) { | ||||
out, err := exec.Command("blockdev", "--flushbufs", deviceName).CombinedOutput() | ||||
if err != nil { | ||||
// Ignore the error and continue deleting the device. There is will be no retry on error. | ||||
klog.Warningf("Failed to flush device %s: %s\n%s", deviceName, err, string(out)) | ||||
} | ||||
klog.V(4).Infof("Flushed device %s", deviceName) | ||||
} | ||||
|
||||
// Removes a scsi device based upon /dev/sdX name | ||||
func removeFromScsiSubsystem(deviceName string, io ioHandler) { | ||||
fileName := "/sys/block/" + deviceName + "/device/delete" | ||||
|
@@ -257,14 +268,17 @@ func (util *fcUtil) DetachDisk(c fcDiskUnmounter, devicePath string) error { | |||
// Find slave | ||||
if strings.HasPrefix(dstPath, "/dev/dm-") { | ||||
devices = c.deviceUtil.FindSlaveDevicesOnMultipath(dstPath) | ||||
if err := util.deleteMultipathDevice(c.exec, dstPath); err != nil { | ||||
return err | ||||
} | ||||
} else { | ||||
// Add single devicepath to devices | ||||
devices = append(devices, dstPath) | ||||
} | ||||
klog.V(4).Infof("fc: DetachDisk devicePath: %v, dstPath: %v, devices: %v", devicePath, dstPath, devices) | ||||
var lastErr error | ||||
for _, device := range devices { | ||||
err := util.detachFCDisk(c.io, device) | ||||
err := util.detachFCDisk(c.io, c.exec, device) | ||||
if err != nil { | ||||
klog.Errorf("fc: detachFCDisk failed. device: %v err: %v", device, err) | ||||
lastErr = fmt.Errorf("fc: detachFCDisk failed. device: %v err: %v", device, err) | ||||
|
@@ -278,11 +292,12 @@ func (util *fcUtil) DetachDisk(c fcDiskUnmounter, devicePath string) error { | |||
} | ||||
|
||||
// detachFCDisk removes scsi device file such as /dev/sdX from the node. | ||||
func (util *fcUtil) detachFCDisk(io ioHandler, devicePath string) error { | ||||
func (util *fcUtil) detachFCDisk(io ioHandler, exec utilexec.Interface, devicePath string) error { | ||||
// Remove scsi device from the node. | ||||
if !strings.HasPrefix(devicePath, "/dev/") { | ||||
return fmt.Errorf("fc detach disk: invalid device name: %s", devicePath) | ||||
} | ||||
flushDevice(devicePath, exec) | ||||
arr := strings.Split(devicePath, "/") | ||||
dev := arr[len(arr)-1] | ||||
removeFromScsiSubsystem(dev, io) | ||||
|
@@ -354,13 +369,16 @@ func (util *fcUtil) DetachBlockFCDisk(c fcDiskUnmapper, mapPath, devicePath stri | |||
if len(dm) != 0 { | ||||
// Find all devices which are managed by multipath | ||||
devices = c.deviceUtil.FindSlaveDevicesOnMultipath(dm) | ||||
if err := util.deleteMultipathDevice(c.exec, dm); err != nil { | ||||
return err | ||||
} | ||||
} else { | ||||
// Add single device path to devices | ||||
devices = append(devices, dstPath) | ||||
} | ||||
var lastErr error | ||||
for _, device := range devices { | ||||
err = util.detachFCDisk(c.io, device) | ||||
err = util.detachFCDisk(c.io, c.exec, device) | ||||
if err != nil { | ||||
klog.Errorf("fc: detachFCDisk failed. device: %v err: %v", device, err) | ||||
lastErr = fmt.Errorf("fc: detachFCDisk failed. device: %v err: %v", device, err) | ||||
|
@@ -373,6 +391,15 @@ func (util *fcUtil) DetachBlockFCDisk(c fcDiskUnmapper, mapPath, devicePath stri | |||
return nil | ||||
} | ||||
|
||||
func (util *fcUtil) deleteMultipathDevice(exec utilexec.Interface, dmDevice string) error { | ||||
out, err := exec.Command("multipath", "-f", dmDevice).CombinedOutput() | ||||
if err != nil { | ||||
return fmt.Errorf("failed to flush multipath device %s: %s\n%s", dmDevice, err, string(out)) | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. just want to confirm whether in certain cases, it should continue instead of return error (e.g., the error means device no longer exist?) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good catch! That's why iscsi volume plugin ignores the error code... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If the device is missing:
iscsi volume plugin actually ignores the error code from "multipath -f", continuing deleting the device paths. Which may result in volume corruption, if the multipath device still exists.. Now is the question what is actually correct - try to remove the individual paths to fully detach the devices and risk corruption OR do not remove the paths at all? The paths should just sit in /dev, not used by anything. My gut feeling is to copy iscsi approach and ignore the exit code. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. according to manpage So if There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: return fmt.Errorf("failed to flush multipath device %s: %v\n%s", dmDevice, err, string(out)) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, |
||||
} | ||||
klog.V(4).Infof("Flushed multipath device: %s", dmDevice) | ||||
return nil | ||||
} | ||||
|
||||
func checkPathExists(path string) (bool, error) { | ||||
if pathExists, pathErr := mount.PathExists(path); pathErr != nil { | ||||
return pathExists, fmt.Errorf("Error checking if path exists: %v", pathErr) | ||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Same link - https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/7/html/storage_administration_guide/removing_devices#:~:text=Run%20multipath%20%2Dl%20command%20to,to%20remove%20the%20multipath%20device. recommends that for raw block devices we should call
blockdev --flushbufs device
before removing the device. Should we do that as well?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added as a separate commit.
BTW, we should refactor FC and iSCSI to use the same flush / device deletion. But that's for a separate PR.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yeah lets file that as a separate issue I think.