Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Automated cherry pick of #122211: Fix device uncertain errors on reboot #122398

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 8 additions & 0 deletions pkg/kubelet/volumemanager/cache/actual_state_of_world.go
Original file line number Diff line number Diff line change
Expand Up @@ -391,6 +391,13 @@ func (asw *actualStateOfWorld) IsVolumeReconstructed(volumeName v1.UniqueVolumeN
return foundPod
}

func (asw *actualStateOfWorld) IsVolumeDeviceReconstructed(volumeName v1.UniqueVolumeName) bool {
asw.RLock()
defer asw.RUnlock()
_, ok := asw.foundDuringReconstruction[volumeName]
return ok
}

func (asw *actualStateOfWorld) CheckAndMarkVolumeAsUncertainViaReconstruction(opts operationexecutor.MarkVolumeOpts) (bool, error) {
asw.Lock()
defer asw.Unlock()
Expand Down Expand Up @@ -766,6 +773,7 @@ func (asw *actualStateOfWorld) SetDeviceMountState(
volumeObj.seLinuxMountContext = &seLinuxMountContext
}
}

asw.attachedVolumes[volumeName] = volumeObj
return nil
}
Expand Down
9 changes: 6 additions & 3 deletions pkg/kubelet/volumemanager/reconciler/reconciler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2245,15 +2245,18 @@ func getInlineFakePod(podName, podUUID, outerName, innerName string) *v1.Pod {
return pod
}

func getReconciler(kubeletDir string, t *testing.T, volumePaths []string) (Reconciler, *volumetesting.FakeVolumePlugin) {
func getReconciler(kubeletDir string, t *testing.T, volumePaths []string, kubeClient *fake.Clientset) (Reconciler, *volumetesting.FakeVolumePlugin) {
node := getFakeNode()
volumePluginMgr, fakePlugin := volumetesting.GetTestKubeletVolumePluginMgrWithNodeAndRoot(t, node, kubeletDir)
tmpKubeletPodDir := filepath.Join(kubeletDir, "pods")
seLinuxTranslator := util.NewFakeSELinuxLabelTranslator()

dsw := cache.NewDesiredStateOfWorld(volumePluginMgr, seLinuxTranslator)
asw := cache.NewActualStateOfWorld(nodeName, volumePluginMgr)
kubeClient := createTestClient()
if kubeClient == nil {
kubeClient = createTestClient()
}

fakeRecorder := &record.FakeRecorder{}
fakeHandler := volumetesting.NewBlockVolumePathHandler()
oex := operationexecutor.NewOperationExecutor(operationexecutor.NewOperationGenerator(
Expand Down Expand Up @@ -2424,7 +2427,7 @@ func TestSyncStates(t *testing.T) {
os.MkdirAll(vp, 0755)
}

rc, fakePlugin := getReconciler(tmpKubeletDir, t, mountPaths)
rc, fakePlugin := getReconciler(tmpKubeletDir, t, mountPaths, nil /*custom kubeclient*/)
rcInstance, _ := rc.(*reconciler)

for _, tpodInfo := range tc.podInfos {
Expand Down
99 changes: 84 additions & 15 deletions pkg/kubelet/volumemanager/reconciler/reconstruct_new_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"testing"

v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/sets"
utilfeature "k8s.io/apiserver/pkg/util/feature"
featuregatetesting "k8s.io/component-base/featuregate/testing"
Expand Down Expand Up @@ -105,7 +106,7 @@ func TestReconstructVolumes(t *testing.T) {
os.MkdirAll(vp, 0755)
}

rc, fakePlugin := getReconciler(tmpKubeletDir, t, mountPaths)
rc, fakePlugin := getReconciler(tmpKubeletDir, t, mountPaths, nil /*custom kubeclient*/)
rcInstance, _ := rc.(*reconciler)

// Act
Expand Down Expand Up @@ -203,7 +204,7 @@ func TestCleanOrphanVolumes(t *testing.T) {

mountPaths := []string{}

rc, fakePlugin := getReconciler(tmpKubeletDir, t, mountPaths)
rc, fakePlugin := getReconciler(tmpKubeletDir, t, mountPaths, nil /*custom kubeclient*/)
rcInstance, _ := rc.(*reconciler)
rcInstance.volumesFailedReconstruction = tc.volumesFailedReconstruction

Expand Down Expand Up @@ -265,21 +266,31 @@ func TestReconstructVolumesMount(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SELinuxMountReadWriteOncePod, true)()

tests := []struct {
name string
volumePath string
expectMount bool
name string
volumePath string
expectMount bool
volumeMode v1.PersistentVolumeMode
deviceMountPath string
}{
{
name: "reconstructed volume is mounted",
volumePath: path.Join("pod1uid", "volumes", "fake-plugin", "volumename"),

expectMount: true,
volumeMode: v1.PersistentVolumeFilesystem,
},
{
name: "reconstructed volume fails to mount",
// FailOnSetupVolumeName: MountDevice succeeds, SetUp fails
volumePath: path.Join("pod1uid", "volumes", "fake-plugin", volumetesting.FailOnSetupVolumeName),
expectMount: false,
volumeMode: v1.PersistentVolumeFilesystem,
},
{
name: "reconstructed volume device map fails",
volumePath: filepath.Join("pod1uid", "volumeDevices", "fake-plugin", volumetesting.FailMountDeviceVolumeName),
volumeMode: v1.PersistentVolumeBlock,
deviceMountPath: filepath.Join("plugins", "fake-plugin", "volumeDevices", "pluginDependentPath"),
},
}
for _, tc := range tests {
Expand All @@ -299,7 +310,16 @@ func TestReconstructVolumesMount(t *testing.T) {
mountPaths := []string{vp}
os.MkdirAll(vp, 0755)

rc, fakePlugin := getReconciler(tmpKubeletDir, t, mountPaths)
// Arrange 2 - populate DSW
outerName := filepath.Base(tc.volumePath)
pod, pv, pvc := getPodPVCAndPV(tc.volumeMode, "pod1", outerName, "pvc1")
volumeSpec := &volume.Spec{PersistentVolume: pv}
kubeClient := createtestClientWithPVPVC(pv, pvc, v1.AttachedVolume{
Name: v1.UniqueVolumeName(fmt.Sprintf("fake-plugin/%s", outerName)),
DevicePath: "fake/path",
})

rc, fakePlugin := getReconciler(tmpKubeletDir, t, mountPaths, kubeClient /*custom kubeclient*/)
rcInstance, _ := rc.(*reconciler)

// Act 1 - reconstruction
Expand All @@ -315,10 +335,6 @@ func TestReconstructVolumesMount(t *testing.T) {
t.Errorf("expected 1 uncertain volume in asw, got %+v", allPods)
}

// Arrange 2 - populate DSW
outerName := filepath.Base(tc.volumePath)
pod := getInlineFakePod("pod1", "pod1uid", outerName, outerName)
volumeSpec := &volume.Spec{Volume: &pod.Spec.Volumes[0]}
podName := util.GetUniquePodName(pod)
volumeName, err := rcInstance.desiredStateOfWorld.AddPodToVolume(
podName, pod, volumeSpec, volumeSpec.Name(), "" /* volumeGidValue */, nil /* SELinuxContext */)
Expand All @@ -341,12 +357,15 @@ func TestReconstructVolumesMount(t *testing.T) {
// MountDevice was attempted
var lastErr error
err = retryWithExponentialBackOff(testOperationBackOffDuration, func() (bool, error) {
// MountDevice should always be called and succeed
if err := volumetesting.VerifyMountDeviceCallCount(1, fakePlugin); err != nil {
lastErr = err
return false, nil
if tc.volumeMode == v1.PersistentVolumeFilesystem {
if err := volumetesting.VerifyMountDeviceCallCount(1, fakePlugin); err != nil {
lastErr = err
return false, nil
}
return true, nil
} else {
return true, nil
}
return true, nil
})
if err != nil {
t.Errorf("Error waiting for volumes to get mounted: %s: %s", err, lastErr)
Expand Down Expand Up @@ -376,10 +395,60 @@ func TestReconstructVolumesMount(t *testing.T) {
if len(allPods) != 1 {
t.Errorf("expected 1 mounted or uncertain volumes after reconcile, got %+v", allPods)
}
if tc.deviceMountPath != "" {
expectedDeviceMountPath := filepath.Join(tmpKubeletDir, tc.deviceMountPath)
deviceMountPath := allPods[0].DeviceMountPath
if expectedDeviceMountPath != deviceMountPath {
t.Errorf("expected deviceMountPath to be %s, got %s", expectedDeviceMountPath, deviceMountPath)
}
}

}

// Unmount was *not* attempted in any case
verifyTearDownCalls(fakePlugin, 0)
})
}
}

func getPodPVCAndPV(volumeMode v1.PersistentVolumeMode, podName, pvName, pvcName string) (*v1.Pod, *v1.PersistentVolume, *v1.PersistentVolumeClaim) {
pv := &v1.PersistentVolume{
ObjectMeta: metav1.ObjectMeta{
Name: pvName,
UID: "pvuid",
},
Spec: v1.PersistentVolumeSpec{
ClaimRef: &v1.ObjectReference{Name: pvcName},
VolumeMode: &volumeMode,
},
}
pvc := &v1.PersistentVolumeClaim{
ObjectMeta: metav1.ObjectMeta{
Name: pvcName,
UID: "pvcuid",
},
Spec: v1.PersistentVolumeClaimSpec{
VolumeName: pvName,
VolumeMode: &volumeMode,
},
}
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: podName,
UID: "pod1uid",
},
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
Name: "volume-name",
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: pvc.Name,
},
},
},
},
},
}
return pod, pv, pvc
}
4 changes: 2 additions & 2 deletions pkg/volume/testing/testing.go
Original file line number Diff line number Diff line change
Expand Up @@ -857,8 +857,8 @@ func (fv *FakeVolume) GetSetUpDeviceCallCount() int {

// Block volume support
func (fv *FakeVolume) GetGlobalMapPath(spec *volume.Spec) (string, error) {
fv.RLock()
defer fv.RUnlock()
fv.Lock()
defer fv.Unlock()
fv.GlobalMapPathCallCount++
return fv.getGlobalMapPath()
}
Expand Down
4 changes: 4 additions & 0 deletions pkg/volume/util/operationexecutor/operation_executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,10 @@ type ActualStateOfWorldMounterUpdater interface {
// IsVolumeReconstructed returns true if volume currently added to actual state of the world
// was found during reconstruction.
IsVolumeReconstructed(volumeName v1.UniqueVolumeName, podName volumetypes.UniquePodName) bool

// IsVolumeDeviceReconstructed returns true if volume device identified by volumeName has been
// found during reconstruction.
IsVolumeDeviceReconstructed(volumeName v1.UniqueVolumeName) bool
}

// ActualStateOfWorldAttacherUpdater defines a set of operations updating the
Expand Down
6 changes: 6 additions & 0 deletions pkg/volume/util/operationexecutor/operation_generator.go
Original file line number Diff line number Diff line change
Expand Up @@ -803,6 +803,12 @@ func (og *operationGenerator) checkForFailedMount(volumeToMount VolumeToMount, m
func (og *operationGenerator) markDeviceErrorState(volumeToMount VolumeToMount, devicePath, deviceMountPath string, mountError error, actualStateOfWorld ActualStateOfWorldMounterUpdater) {
if volumetypes.IsOperationFinishedError(mountError) &&
actualStateOfWorld.GetDeviceMountState(volumeToMount.VolumeName) == DeviceMountUncertain {

if actualStateOfWorld.IsVolumeDeviceReconstructed(volumeToMount.VolumeName) {
klog.V(2).InfoS("MountVolume.markDeviceErrorState leaving volume uncertain", "volumeName", volumeToMount.VolumeName)
return
}

// Only devices which were uncertain can be marked as unmounted
markDeviceUnmountError := actualStateOfWorld.MarkDeviceAsUnmounted(volumeToMount.VolumeName)
if markDeviceUnmountError != nil {
Expand Down