Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prevent detach before node status update #27837

Merged
merged 1 commit into from
Jun 22, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
19 changes: 15 additions & 4 deletions pkg/controller/volume/reconciler/reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,21 @@ func (rc *reconciler) reconciliationLoopFunc() func() {
if !rc.desiredStateOfWorld.VolumeExists(
attachedVolume.VolumeName, attachedVolume.NodeName) {
// Volume exists in actual state of world but not desired

// Mark desire to detach
timeElapsed, err := rc.actualStateOfWorld.MarkDesireToDetach(attachedVolume.VolumeName, attachedVolume.NodeName)
if err != nil {
glog.Errorf("Unexpected error actualStateOfWorld.MarkDesireToDetach(): %v", err)
}

// Update Node Status to indicate volume is no longer safe to mount.
err = rc.nodeStatusUpdater.UpdateNodeStatuses()
if err != nil {
// Skip detaching this volume if unable to update node status
glog.Infof("UpdateNodeStatuses failed with: %v", err)
continue
}

if !attachedVolume.MountedByNode {
glog.V(5).Infof("Attempting to start DetachVolume for volume %q from node %q", attachedVolume.VolumeName, attachedVolume.NodeName)
err := rc.attacherDetacher.DetachVolume(attachedVolume.AttachedVolume, rc.actualStateOfWorld)
Expand All @@ -112,10 +127,6 @@ func (rc *reconciler) reconciliationLoopFunc() func() {
}
} else {
// If volume is not safe to detach (is mounted) wait a max amount of time before detaching any way.
timeElapsed, err := rc.actualStateOfWorld.MarkDesireToDetach(attachedVolume.VolumeName, attachedVolume.NodeName)
if err != nil {
glog.Errorf("Unexpected error actualStateOfWorld.MarkDesireToDetach(): %v", err)
}
if timeElapsed > rc.maxWaitForUnmountDuration {
glog.V(5).Infof("Attempting to start DetachVolume for volume %q from node %q. Volume is not safe to detach, but maxWaitForUnmountDuration expired.", attachedVolume.VolumeName, attachedVolume.NodeName)
err := rc.attacherDetacher.DetachVolume(attachedVolume.AttachedVolume, rc.actualStateOfWorld)
Expand Down
81 changes: 69 additions & 12 deletions pkg/controller/volume/reconciler/reconciler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,7 @@ func Test_Run_Positive_OneDesiredVolumeAttach(t *testing.T) {
asw := cache.NewActualStateOfWorld(volumePluginMgr)
fakeKubeClient := controllervolumetesting.CreateTestClient()
ad := operationexecutor.NewOperationExecutor(fakeKubeClient, volumePluginMgr)
nodeInformer := informers.CreateSharedNodeIndexInformer(
fakeKubeClient, resyncPeriod)
nsu := statusupdater.NewNodeStatusUpdater(
fakeKubeClient, nodeInformer, asw)
nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */)
reconciler := NewReconciler(
reconcilerLoopPeriod, maxWaitForUnmountDuration, dsw, asw, ad, nsu)
podName := "pod-uid"
Expand Down Expand Up @@ -121,10 +118,7 @@ func Test_Run_Positive_OneDesiredVolumeAttachThenDetachWithUnmountedVolume(t *te
asw := cache.NewActualStateOfWorld(volumePluginMgr)
fakeKubeClient := controllervolumetesting.CreateTestClient()
ad := operationexecutor.NewOperationExecutor(fakeKubeClient, volumePluginMgr)
nodeInformer := informers.CreateSharedNodeIndexInformer(
fakeKubeClient, resyncPeriod)
nsu := statusupdater.NewNodeStatusUpdater(
fakeKubeClient, nodeInformer, asw)
nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */)
reconciler := NewReconciler(
reconcilerLoopPeriod, maxWaitForUnmountDuration, dsw, asw, ad, nsu)
podName := "pod-uid"
Expand Down Expand Up @@ -188,10 +182,7 @@ func Test_Run_Positive_OneDesiredVolumeAttachThenDetachWithMountedVolume(t *test
asw := cache.NewActualStateOfWorld(volumePluginMgr)
fakeKubeClient := controllervolumetesting.CreateTestClient()
ad := operationexecutor.NewOperationExecutor(fakeKubeClient, volumePluginMgr)
nodeInformer := informers.CreateSharedNodeIndexInformer(
fakeKubeClient, resyncPeriod)
nsu := statusupdater.NewNodeStatusUpdater(
fakeKubeClient, nodeInformer, asw)
nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */)
reconciler := NewReconciler(
reconcilerLoopPeriod, maxWaitForUnmountDuration, dsw, asw, ad, nsu)
podName := "pod-uid"
Expand Down Expand Up @@ -241,6 +232,72 @@ func Test_Run_Positive_OneDesiredVolumeAttachThenDetachWithMountedVolume(t *test
waitForDetachCallCount(t, 1 /* expectedDetachCallCount */, fakePlugin)
}

// Populates desiredStateOfWorld cache with one node/volume/pod tuple.
// Has node update fail
// Calls Run()
// Verifies there is one attach call and no detach calls.
// Marks the node/volume as unmounted.
// Deletes the node/volume/pod tuple from desiredStateOfWorld cache.
// Verifies there are NO detach call and no (new) attach calls.
func Test_Run_Negative_OneDesiredVolumeAttachThenDetachWithUnmountedVolumeUpdateStatusFail(t *testing.T) {
// Arrange
volumePluginMgr, fakePlugin := volumetesting.GetTestVolumePluginMgr(t)
dsw := cache.NewDesiredStateOfWorld(volumePluginMgr)
asw := cache.NewActualStateOfWorld(volumePluginMgr)
fakeKubeClient := controllervolumetesting.CreateTestClient()
ad := operationexecutor.NewOperationExecutor(fakeKubeClient, volumePluginMgr)
nsu := statusupdater.NewFakeNodeStatusUpdater(true /* returnError */)
reconciler := NewReconciler(
reconcilerLoopPeriod, maxWaitForUnmountDuration, dsw, asw, ad, nsu)
podName := "pod-uid"
volumeName := api.UniqueVolumeName("volume-name")
volumeSpec := controllervolumetesting.GetTestVolumeSpec(string(volumeName), volumeName)
nodeName := "node-name"
dsw.AddNode(nodeName)
volumeExists := dsw.VolumeExists(volumeName, nodeName)
if volumeExists {
t.Fatalf(
"Volume %q/node %q should not exist, but it does.",
volumeName,
nodeName)
}

generatedVolumeName, podAddErr := dsw.AddPod(types.UniquePodName(podName), controllervolumetesting.NewPod(podName, podName), volumeSpec, nodeName)
if podAddErr != nil {
t.Fatalf("AddPod failed. Expected: <no error> Actual: <%v>", podAddErr)
}

// Act
go reconciler.Run(wait.NeverStop)

// Assert
waitForNewAttacherCallCount(t, 1 /* expectedCallCount */, fakePlugin)
verifyNewAttacherCallCount(t, false /* expectZeroNewAttacherCallCount */, fakePlugin)
waitForAttachCallCount(t, 1 /* expectedAttachCallCount */, fakePlugin)
verifyNewDetacherCallCount(t, true /* expectZeroNewDetacherCallCount */, fakePlugin)
waitForDetachCallCount(t, 0 /* expectedDetachCallCount */, fakePlugin)

// Act
dsw.DeletePod(types.UniquePodName(podName), generatedVolumeName, nodeName)
volumeExists = dsw.VolumeExists(generatedVolumeName, nodeName)
if volumeExists {
t.Fatalf(
"Deleted pod %q from volume %q/node %q. Volume should also be deleted but it still exists.",
podName,
generatedVolumeName,
nodeName)
}
asw.SetVolumeMountedByNode(generatedVolumeName, nodeName, true /* mounted */)
asw.SetVolumeMountedByNode(generatedVolumeName, nodeName, false /* mounted */)

// Assert
verifyNewDetacherCallCount(t, true /* expectZeroNewDetacherCallCount */, fakePlugin)
verifyNewAttacherCallCount(t, false /* expectZeroNewAttacherCallCount */, fakePlugin)
waitForAttachCallCount(t, 1 /* expectedAttachCallCount */, fakePlugin)
verifyNewDetacherCallCount(t, false /* expectZeroNewDetacherCallCount */, fakePlugin)
waitForDetachCallCount(t, 0 /* expectedDetachCallCount */, fakePlugin)
}

func waitForNewAttacherCallCount(
t *testing.T,
expectedCallCount int,
Expand Down
39 changes: 39 additions & 0 deletions pkg/controller/volume/statusupdater/fake_node_status_updater.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/*
Copyright 2016 The Kubernetes Authors All rights reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package statusupdater

import (
"fmt"
)

func NewFakeNodeStatusUpdater(returnError bool) NodeStatusUpdater {
return &fakeNodeStatusUpdater{
returnError: returnError,
}
}

type fakeNodeStatusUpdater struct {
returnError bool
}

func (fnsu *fakeNodeStatusUpdater) UpdateNodeStatuses() error {
if fnsu.returnError {
return fmt.Errorf("fake error on update node status")
}

return nil
}