Skip to content

Commit

Permalink
Fixed races in Cinder volume attach/detach.
Browse files Browse the repository at this point in the history
Add a mutex to guard SetUpAt() and TearDownAt() calls - they should not
run in parallel.  There is a race in these calls when there are two pods
using the same volume, one of them is dying and the other one starting.

TearDownAt() checks that a volume is not needed by any pods and detaches the
volume. It does so by counting how many times is the volume mounted
(GetMountRefs() call below).

When SetUpAt() of the starting pod already attached the volume and did not mount
it yet, TearDownAt() of the dying pod will detach it - GetMountRefs() does not
count with this volume.

These two threads run in parallel:

 dying pod.TearDownAt("myVolume")          starting pod.SetUpAt("myVolume")
   |                                       |
   |                                       AttachDisk("myVolume")
   refs, err := mount.GetMountRefs()       |
   Unmount("myDir")                        |
   if refs == 1 {                          |
   |  |                                    Mount("myVolume", "myDir")
   |  |                                    |
   |  DetachDisk("myVolume")               |
   |                                       start containers - OOPS! The volume is detached!
   |
   finish the pod cleanup


Also, add some logs to cinder plugin for easier debugging in the future, add
a test and update the fake mounter to know about bind mounts.
  • Loading branch information
jsafrane committed Feb 2, 2016
1 parent f7cab1f commit 220163f
Show file tree
Hide file tree
Showing 3 changed files with 269 additions and 15 deletions.
58 changes: 56 additions & 2 deletions pkg/util/mount/fake.go
Expand Up @@ -16,10 +16,19 @@ limitations under the License.

package mount

import (
"sync"

"github.com/golang/glog"
)

// FakeMounter implements mount.Interface for tests.
type FakeMounter struct {
MountPoints []MountPoint
Log []FakeAction
// Some tests run things in parallel, make sure the mounter does not produce
// any golang's DATA RACE warnings.
mutex sync.Mutex
}

var _ Interface = &FakeMounter{}
Expand All @@ -37,36 +46,81 @@ type FakeAction struct {
}

func (f *FakeMounter) ResetLog() {
f.mutex.Lock()
defer f.mutex.Unlock()

f.Log = []FakeAction{}
}

func (f *FakeMounter) Mount(source string, target string, fstype string, options []string) error {
f.mutex.Lock()
defer f.mutex.Unlock()

// find 'bind' option
for _, option := range options {
if option == "bind" {
// This is a bind-mount. In order to mimic linux behaviour, we must
// use the original device of the bind-mount as the real source.
// E.g. when mounted /dev/sda like this:
// $ mount /dev/sda /mnt/test
// $ mount -o bind /mnt/test /mnt/bound
// then /proc/mount contains:
// /dev/sda /mnt/test
// /dev/sda /mnt/bound
// (and not /mnt/test /mnt/bound)
// I.e. we must use /dev/sda as source instead of /mnt/test in the
// bind mount.
for _, mnt := range f.MountPoints {
if source == mnt.Path {
source = mnt.Device
break
}
}
break
}
}

f.MountPoints = append(f.MountPoints, MountPoint{Device: source, Path: target, Type: fstype})
glog.V(5).Infof("Fake mounter: mouted %s to %s", source, target)
f.Log = append(f.Log, FakeAction{Action: FakeActionMount, Target: target, Source: source, FSType: fstype})
return nil
}

func (f *FakeMounter) Unmount(target string) error {
f.mutex.Lock()
defer f.mutex.Unlock()

newMountpoints := []MountPoint{}
for _, mp := range f.MountPoints {
if mp.Path != target {
newMountpoints = append(newMountpoints, MountPoint{Device: mp.Device, Path: mp.Path, Type: mp.Type})
if mp.Path == target {
glog.V(5).Infof("Fake mounter: unmouted %s from %s", mp.Device, target)
// Don't copy it to newMountpoints
continue
}
newMountpoints = append(newMountpoints, MountPoint{Device: mp.Device, Path: mp.Path, Type: mp.Type})
}
f.MountPoints = newMountpoints
f.Log = append(f.Log, FakeAction{Action: FakeActionUnmount, Target: target})
return nil
}

func (f *FakeMounter) List() ([]MountPoint, error) {
f.mutex.Lock()
defer f.mutex.Unlock()

return f.MountPoints, nil
}

func (f *FakeMounter) IsLikelyNotMountPoint(file string) (bool, error) {
f.mutex.Lock()
defer f.mutex.Unlock()

for _, mp := range f.MountPoints {
if mp.Path == file {
glog.V(5).Infof("isLikelyMountPoint for %s: monted %s, false", file, mp.Path)
return false, nil
}
}
glog.V(5).Infof("isLikelyMountPoint for %s: true", file)
return true, nil
}
53 changes: 48 additions & 5 deletions pkg/volume/cinder/cinder.go
Expand Up @@ -28,18 +28,21 @@ import (
"k8s.io/kubernetes/pkg/cloudprovider/providers/openstack"
"k8s.io/kubernetes/pkg/types"
"k8s.io/kubernetes/pkg/util/exec"
"k8s.io/kubernetes/pkg/util/keymutex"
"k8s.io/kubernetes/pkg/util/mount"
"k8s.io/kubernetes/pkg/util/strings"
"k8s.io/kubernetes/pkg/volume"
)

// This is the primary entrypoint for volume plugins.
func ProbeVolumePlugins() []volume.VolumePlugin {
return []volume.VolumePlugin{&cinderPlugin{nil}}
return []volume.VolumePlugin{&cinderPlugin{}}
}

type cinderPlugin struct {
host volume.VolumeHost
// Guarding SetUp and TearDown operations
volumeLocks keymutex.KeyMutex
}

var _ volume.VolumePlugin = &cinderPlugin{}
Expand All @@ -53,6 +56,7 @@ const (

func (plugin *cinderPlugin) Init(host volume.VolumeHost) error {
plugin.host = host
plugin.volumeLocks = keymutex.NewKeyMutex()
return nil
}

Expand Down Expand Up @@ -228,19 +232,27 @@ func (b *cinderVolumeBuilder) SetUp(fsGroup *int64) error {

// SetUp attaches the disk and bind mounts to the volume path.
func (b *cinderVolumeBuilder) SetUpAt(dir string, fsGroup *int64) error {
glog.V(5).Infof("Cinder SetUp %s to %s", b.pdName, dir)

b.plugin.volumeLocks.LockKey(b.pdName)
defer b.plugin.volumeLocks.UnlockKey(b.pdName)

// TODO: handle failed mounts here.
notmnt, err := b.mounter.IsLikelyNotMountPoint(dir)
glog.V(4).Infof("PersistentDisk set up: %s %v %v", dir, !notmnt, err)
if err != nil && !os.IsNotExist(err) {
glog.V(4).Infof("IsLikelyNotMountPoint failed: %v", err)
return err
}
if !notmnt {
glog.V(4).Infof("Something is already mounted to target %s", dir)
return nil
}
globalPDPath := makeGlobalPDName(b.plugin.host, b.pdName)
if err := b.manager.AttachDisk(b, globalPDPath); err != nil {
glog.V(4).Infof("AttachDisk failed: %v", err)
return err
}
glog.V(3).Infof("Cinder volume %s attached", b.pdName)

options := []string{"bind"}
if b.readOnly {
Expand All @@ -249,13 +261,15 @@ func (b *cinderVolumeBuilder) SetUpAt(dir string, fsGroup *int64) error {

if err := os.MkdirAll(dir, 0750); err != nil {
// TODO: we should really eject the attach/detach out into its own control loop.
glog.V(4).Infof("Could not create directory %s: %v", dir, err)
detachDiskLogError(b.cinderVolume)
return err
}

// Perform a bind mount to the full path to allow duplicate mounts of the same PD.
err = b.mounter.Mount(globalPDPath, dir, "", options)
if err != nil {
glog.V(4).Infof("Mount failed: %v", err)
notmnt, mntErr := b.mounter.IsLikelyNotMountPoint(dir)
if mntErr != nil {
glog.Errorf("IsLikelyNotMountPoint check failed: %v", mntErr)
Expand Down Expand Up @@ -286,6 +300,7 @@ func (b *cinderVolumeBuilder) SetUpAt(dir string, fsGroup *int64) error {
if !b.readOnly {
volume.SetVolumeOwnership(b, fsGroup)
}
glog.V(3).Infof("Cinder volume %s mounted to %s", b.pdName, dir)

return nil
}
Expand All @@ -312,37 +327,65 @@ func (c *cinderVolumeCleaner) TearDown() error {
// Unmounts the bind mount, and detaches the disk only if the PD
// resource was the last reference to that disk on the kubelet.
func (c *cinderVolumeCleaner) TearDownAt(dir string) error {
glog.V(5).Infof("Cinder TearDown of %s", dir)
notmnt, err := c.mounter.IsLikelyNotMountPoint(dir)
if err != nil {
glog.V(4).Infof("IsLikelyNotMountPoint check failed: %v", err)
return err
}
if notmnt {
glog.V(4).Infof("Nothing is mounted to %s, ignoring", dir)
return os.Remove(dir)
}

// Find Cinder volumeID to lock the right volume
// TODO: refactor VolumePlugin.NewCleaner to get full volume.Spec just like
// NewBuilder. We could then find volumeID there without probing MountRefs.
refs, err := mount.GetMountRefs(c.mounter, dir)
if err != nil {
glog.V(4).Infof("GetMountRefs failed: %v", err)
return err
}
if len(refs) == 0 {
glog.V(4).Infof("Directory %s is not mounted", dir)
return fmt.Errorf("directory %s is not mounted", dir)
}
c.pdName = path.Base(refs[0])
glog.V(4).Infof("Found volume %s mounted to %s", c.pdName, dir)

// lock the volume (and thus wait for any concurrrent SetUpAt to finish)
c.plugin.volumeLocks.LockKey(c.pdName)
defer c.plugin.volumeLocks.UnlockKey(c.pdName)

// Reload list of references, there might be SetUpAt finished in the meantime
refs, err = mount.GetMountRefs(c.mounter, dir)
if err != nil {
glog.V(4).Infof("GetMountRefs failed: %v", err)
return err
}
if err := c.mounter.Unmount(dir); err != nil {
glog.V(4).Infof("Unmount failed: %v", err)
return err
}
glog.Infof("successfully unmounted: %s\n", dir)
glog.V(3).Infof("Successfully unmounted: %s\n", dir)

// If refCount is 1, then all bind mounts have been removed, and the
// remaining reference is the global mount. It is safe to detach.
if len(refs) == 1 {
c.pdName = path.Base(refs[0])
if err := c.manager.DetachDisk(c); err != nil {
glog.V(4).Infof("DetachDisk failed: %v", err)
return err
}
glog.V(3).Infof("Volume %s detached", c.pdName)
}
notmnt, mntErr := c.mounter.IsLikelyNotMountPoint(dir)
if mntErr != nil {
glog.Errorf("IsLikelyNotMountPoint check failed: %v", mntErr)
return err
}
if !notmnt {
if notmnt {
if err := os.Remove(dir); err != nil {
glog.V(4).Infof("Failed to remove directory after unmount: %v", err)
return err
}
}
Expand Down

0 comments on commit 220163f

Please sign in to comment.