Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix FibreChannel volume plugin corrupting filesystem on detach #97013

Merged
merged 2 commits into from Dec 9, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
7 changes: 5 additions & 2 deletions pkg/volume/fc/attacher.go
Expand Up @@ -132,6 +132,7 @@ func (attacher *fcAttacher) MountDevice(spec *volume.Spec, devicePath string, de
type fcDetacher struct {
mounter mount.Interface
manager diskManager
host volume.VolumeHost
}

var _ volume.Detacher = &fcDetacher{}
Expand All @@ -142,6 +143,7 @@ func (plugin *fcPlugin) NewDetacher() (volume.Detacher, error) {
return &fcDetacher{
mounter: plugin.host.GetMounter(plugin.GetPluginName()),
manager: &fcUtil{},
host: plugin.host,
}, nil
}

Expand Down Expand Up @@ -170,7 +172,7 @@ func (detacher *fcDetacher) UnmountDevice(deviceMountPath string) error {
if devName == "" {
return nil
}
unMounter := volumeSpecToUnmounter(detacher.mounter)
unMounter := volumeSpecToUnmounter(detacher.mounter, detacher.host)
err = detacher.manager.DetachDisk(*unMounter, devName)
if err != nil {
return fmt.Errorf("fc: failed to detach disk: %s\nError: %v", devName, err)
Expand Down Expand Up @@ -230,12 +232,13 @@ func volumeSpecToMounter(spec *volume.Spec, host volume.VolumeHost) (*fcDiskMoun
}, nil
}

func volumeSpecToUnmounter(mounter mount.Interface) *fcDiskUnmounter {
func volumeSpecToUnmounter(mounter mount.Interface, host volume.VolumeHost) *fcDiskUnmounter {
return &fcDiskUnmounter{
fcDisk: &fcDisk{
io: &osIOHandler{},
},
mounter: mounter,
deviceUtil: volumeutil.NewDeviceHandler(volumeutil.NewIOHandler()),
exec: host.GetExec(fcPluginName),
}
}
12 changes: 8 additions & 4 deletions pkg/volume/fc/fc.go
Expand Up @@ -189,10 +189,10 @@ func (plugin *fcPlugin) newBlockVolumeMapperInternal(spec *volume.Spec, podUID t

func (plugin *fcPlugin) NewUnmounter(volName string, podUID types.UID) (volume.Unmounter, error) {
// Inject real implementations here, test through the internal function.
return plugin.newUnmounterInternal(volName, podUID, &fcUtil{}, plugin.host.GetMounter(plugin.GetPluginName()))
return plugin.newUnmounterInternal(volName, podUID, &fcUtil{}, plugin.host.GetMounter(plugin.GetPluginName()), plugin.host.GetExec(plugin.GetPluginName()))
}

func (plugin *fcPlugin) newUnmounterInternal(volName string, podUID types.UID, manager diskManager, mounter mount.Interface) (volume.Unmounter, error) {
func (plugin *fcPlugin) newUnmounterInternal(volName string, podUID types.UID, manager diskManager, mounter mount.Interface, exec utilexec.Interface) (volume.Unmounter, error) {
return &fcDiskUnmounter{
fcDisk: &fcDisk{
podUID: podUID,
Expand All @@ -203,14 +203,15 @@ func (plugin *fcPlugin) newUnmounterInternal(volName string, podUID types.UID, m
},
mounter: mounter,
deviceUtil: util.NewDeviceHandler(util.NewIOHandler()),
exec: exec,
}, nil
}

func (plugin *fcPlugin) NewBlockVolumeUnmapper(volName string, podUID types.UID) (volume.BlockVolumeUnmapper, error) {
return plugin.newUnmapperInternal(volName, podUID, &fcUtil{})
return plugin.newUnmapperInternal(volName, podUID, &fcUtil{}, plugin.host.GetExec(plugin.GetPluginName()))
}

func (plugin *fcPlugin) newUnmapperInternal(volName string, podUID types.UID, manager diskManager) (volume.BlockVolumeUnmapper, error) {
func (plugin *fcPlugin) newUnmapperInternal(volName string, podUID types.UID, manager diskManager, exec utilexec.Interface) (volume.BlockVolumeUnmapper, error) {
return &fcDiskUnmapper{
fcDisk: &fcDisk{
podUID: podUID,
Expand All @@ -219,6 +220,7 @@ func (plugin *fcPlugin) newUnmapperInternal(volName string, podUID types.UID, ma
plugin: plugin,
io: &osIOHandler{},
},
exec: exec,
deviceUtil: util.NewDeviceHandler(util.NewIOHandler()),
}, nil
}
Expand Down Expand Up @@ -373,6 +375,7 @@ type fcDiskUnmounter struct {
*fcDisk
mounter mount.Interface
deviceUtil util.DeviceUtil
exec utilexec.Interface
}

var _ volume.Unmounter = &fcDiskUnmounter{}
Expand Down Expand Up @@ -400,6 +403,7 @@ var _ volume.BlockVolumeMapper = &fcDiskMapper{}
type fcDiskUnmapper struct {
*fcDisk
deviceUtil util.DeviceUtil
exec utilexec.Interface
}

var _ volume.BlockVolumeUnmapper = &fcDiskUnmapper{}
Expand Down
2 changes: 1 addition & 1 deletion pkg/volume/fc/fc_test.go
Expand Up @@ -190,7 +190,7 @@ func doTestPlugin(t *testing.T, spec *volume.Spec) {

fakeManager2 := newFakeDiskManager()
defer fakeManager2.Cleanup()
unmounter, err := plug.(*fcPlugin).newUnmounterInternal("vol1", types.UID("poduid"), fakeManager2, fakeMounter)
unmounter, err := plug.(*fcPlugin).newUnmounterInternal("vol1", types.UID("poduid"), fakeManager2, fakeMounter, fakeExec)
if err != nil {
t.Errorf("Failed to make a new Unmounter: %v", err)
}
Expand Down
33 changes: 30 additions & 3 deletions pkg/volume/fc/fc_util.go
Expand Up @@ -27,6 +27,7 @@ import (

"k8s.io/klog/v2"
"k8s.io/mount-utils"
utilexec "k8s.io/utils/exec"

"k8s.io/kubernetes/pkg/volume"
volumeutil "k8s.io/kubernetes/pkg/volume/util"
Expand Down Expand Up @@ -111,6 +112,16 @@ func findDiskWWIDs(wwid string, io ioHandler, deviceUtil volumeutil.DeviceUtil)
return "", ""
}

// Flushes any outstanding I/O to the device
func flushDevice(deviceName string, exec utilexec.Interface) {
out, err := exec.Command("blockdev", "--flushbufs", deviceName).CombinedOutput()
if err != nil {
// Ignore the error and continue deleting the device. There is will be no retry on error.
klog.Warningf("Failed to flush device %s: %s\n%s", deviceName, err, string(out))
}
klog.V(4).Infof("Flushed device %s", deviceName)
}

// Removes a scsi device based upon /dev/sdX name
func removeFromScsiSubsystem(deviceName string, io ioHandler) {
fileName := "/sys/block/" + deviceName + "/device/delete"
Expand Down Expand Up @@ -257,14 +268,17 @@ func (util *fcUtil) DetachDisk(c fcDiskUnmounter, devicePath string) error {
// Find slave
if strings.HasPrefix(dstPath, "/dev/dm-") {
devices = c.deviceUtil.FindSlaveDevicesOnMultipath(dstPath)
if err := util.deleteMultipathDevice(c.exec, dstPath); err != nil {
return err
}
} else {
// Add single devicepath to devices
devices = append(devices, dstPath)
}
klog.V(4).Infof("fc: DetachDisk devicePath: %v, dstPath: %v, devices: %v", devicePath, dstPath, devices)
var lastErr error
for _, device := range devices {
err := util.detachFCDisk(c.io, device)
err := util.detachFCDisk(c.io, c.exec, device)
if err != nil {
klog.Errorf("fc: detachFCDisk failed. device: %v err: %v", device, err)
lastErr = fmt.Errorf("fc: detachFCDisk failed. device: %v err: %v", device, err)
Expand All @@ -278,11 +292,12 @@ func (util *fcUtil) DetachDisk(c fcDiskUnmounter, devicePath string) error {
}

// detachFCDisk removes scsi device file such as /dev/sdX from the node.
func (util *fcUtil) detachFCDisk(io ioHandler, devicePath string) error {
func (util *fcUtil) detachFCDisk(io ioHandler, exec utilexec.Interface, devicePath string) error {
// Remove scsi device from the node.
if !strings.HasPrefix(devicePath, "/dev/") {
return fmt.Errorf("fc detach disk: invalid device name: %s", devicePath)
}
flushDevice(devicePath, exec)
arr := strings.Split(devicePath, "/")
dev := arr[len(arr)-1]
removeFromScsiSubsystem(dev, io)
Expand Down Expand Up @@ -354,13 +369,16 @@ func (util *fcUtil) DetachBlockFCDisk(c fcDiskUnmapper, mapPath, devicePath stri
if len(dm) != 0 {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same link - https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/7/html/storage_administration_guide/removing_devices#:~:text=Run%20multipath%20%2Dl%20command%20to,to%20remove%20the%20multipath%20device. recommends that for raw block devices we should call blockdev --flushbufs device before removing the device. Should we do that as well?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added as a separate commit.
BTW, we should refactor FC and iSCSI to use the same flush / device deletion. But that's for a separate PR.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah lets file that as a separate issue I think.

// Find all devices which are managed by multipath
devices = c.deviceUtil.FindSlaveDevicesOnMultipath(dm)
if err := util.deleteMultipathDevice(c.exec, dm); err != nil {
return err
}
} else {
// Add single device path to devices
devices = append(devices, dstPath)
}
var lastErr error
for _, device := range devices {
err = util.detachFCDisk(c.io, device)
err = util.detachFCDisk(c.io, c.exec, device)
if err != nil {
klog.Errorf("fc: detachFCDisk failed. device: %v err: %v", device, err)
lastErr = fmt.Errorf("fc: detachFCDisk failed. device: %v err: %v", device, err)
Expand All @@ -373,6 +391,15 @@ func (util *fcUtil) DetachBlockFCDisk(c fcDiskUnmapper, mapPath, devicePath stri
return nil
}

func (util *fcUtil) deleteMultipathDevice(exec utilexec.Interface, dmDevice string) error {
out, err := exec.Command("multipath", "-f", dmDevice).CombinedOutput()
if err != nil {
return fmt.Errorf("failed to flush multipath device %s: %s\n%s", dmDevice, err, string(out))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just want to confirm whether in certain cases, it should continue instead of return error (e.g., the error means device no longer exist?)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch! That's why iscsi volume plugin ignores the error code...

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the device is missing:

  • Filesystem volumes: operation_generator.go will stop with "The path isn't device path or doesn't exist. Skip checking device path: /dev/dm-0" and it won't call volume plugin at all. Therefore, if the multipath device is removed, but the individual paths are still there, nothing will remove them.

  • Block volumes: if the device is missing, the next DetachBlockFCDisk will stop in checkPathExists, if not earlier in operation_generator.go (did not test it).

    if pathExists, pathErr := checkPathExists(devicePath); !pathExists || pathErr != nil {

iscsi volume plugin actually ignores the error code from "multipath -f", continuing deleting the device paths. Which may result in volume corruption, if the multipath device still exists.. Now is the question what is actually correct - try to remove the individual paths to fully detach the devices and risk corruption OR do not remove the paths at all? The paths should just sit in /dev, not used by anything.

My gut feeling is to copy iscsi approach and ignore the exit code.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

according to manpage
-f Flush (remove) a multipath device map specified as parameter, if unused.

So if multipath -f fails, there is a good chance the device is still being used and removing it could cause data corruption. In this sense, the implementation in iscsi is indeed risky.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit:

return fmt.Errorf("failed to flush multipath device %s: %v\n%s", dmDevice, err, string(out))

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, %s is fine for errror type.

}
klog.V(4).Infof("Flushed multipath device: %s", dmDevice)
return nil
}

func checkPathExists(path string) (bool, error) {
if pathExists, pathErr := mount.PathExists(path); pathErr != nil {
return pathExists, fmt.Errorf("Error checking if path exists: %v", pathErr)
Expand Down