Skip to content

Commit

Permalink
UPSTREAM: 122211: Fix device uncertain errors on reboot
Browse files Browse the repository at this point in the history
Fixes OCPBUGS-23896
  • Loading branch information
gnufied authored and bertinatto committed Jan 4, 2024
1 parent 4ad3e1b commit 10a02b8
Show file tree
Hide file tree
Showing 6 changed files with 110 additions and 20 deletions.
8 changes: 8 additions & 0 deletions pkg/kubelet/volumemanager/cache/actual_state_of_world.go
Original file line number Diff line number Diff line change
Expand Up @@ -391,6 +391,13 @@ func (asw *actualStateOfWorld) IsVolumeReconstructed(volumeName v1.UniqueVolumeN
return foundPod
}

func (asw *actualStateOfWorld) IsVolumeDeviceReconstructed(volumeName v1.UniqueVolumeName) bool {
asw.RLock()
defer asw.RUnlock()
_, ok := asw.foundDuringReconstruction[volumeName]
return ok
}

func (asw *actualStateOfWorld) CheckAndMarkVolumeAsUncertainViaReconstruction(opts operationexecutor.MarkVolumeOpts) (bool, error) {
asw.Lock()
defer asw.Unlock()
Expand Down Expand Up @@ -766,6 +773,7 @@ func (asw *actualStateOfWorld) SetDeviceMountState(
volumeObj.seLinuxMountContext = &seLinuxMountContext
}
}

asw.attachedVolumes[volumeName] = volumeObj
return nil
}
Expand Down
9 changes: 6 additions & 3 deletions pkg/kubelet/volumemanager/reconciler/reconciler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2245,15 +2245,18 @@ func getInlineFakePod(podName, podUUID, outerName, innerName string) *v1.Pod {
return pod
}

func getReconciler(kubeletDir string, t *testing.T, volumePaths []string) (Reconciler, *volumetesting.FakeVolumePlugin) {
func getReconciler(kubeletDir string, t *testing.T, volumePaths []string, kubeClient *fake.Clientset) (Reconciler, *volumetesting.FakeVolumePlugin) {
node := getFakeNode()
volumePluginMgr, fakePlugin := volumetesting.GetTestKubeletVolumePluginMgrWithNodeAndRoot(t, node, kubeletDir)
tmpKubeletPodDir := filepath.Join(kubeletDir, "pods")
seLinuxTranslator := util.NewFakeSELinuxLabelTranslator()

dsw := cache.NewDesiredStateOfWorld(volumePluginMgr, seLinuxTranslator)
asw := cache.NewActualStateOfWorld(nodeName, volumePluginMgr)
kubeClient := createTestClient()
if kubeClient == nil {
kubeClient = createTestClient()
}

fakeRecorder := &record.FakeRecorder{}
fakeHandler := volumetesting.NewBlockVolumePathHandler()
oex := operationexecutor.NewOperationExecutor(operationexecutor.NewOperationGenerator(
Expand Down Expand Up @@ -2424,7 +2427,7 @@ func TestSyncStates(t *testing.T) {
os.MkdirAll(vp, 0755)
}

rc, fakePlugin := getReconciler(tmpKubeletDir, t, mountPaths)
rc, fakePlugin := getReconciler(tmpKubeletDir, t, mountPaths, nil /*custom kubeclient*/)
rcInstance, _ := rc.(*reconciler)

for _, tpodInfo := range tc.podInfos {
Expand Down
99 changes: 84 additions & 15 deletions pkg/kubelet/volumemanager/reconciler/reconstruct_new_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"testing"

v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/sets"
utilfeature "k8s.io/apiserver/pkg/util/feature"
featuregatetesting "k8s.io/component-base/featuregate/testing"
Expand Down Expand Up @@ -105,7 +106,7 @@ func TestReconstructVolumes(t *testing.T) {
os.MkdirAll(vp, 0755)
}

rc, fakePlugin := getReconciler(tmpKubeletDir, t, mountPaths)
rc, fakePlugin := getReconciler(tmpKubeletDir, t, mountPaths, nil /*custom kubeclient*/)
rcInstance, _ := rc.(*reconciler)

// Act
Expand Down Expand Up @@ -203,7 +204,7 @@ func TestCleanOrphanVolumes(t *testing.T) {

mountPaths := []string{}

rc, fakePlugin := getReconciler(tmpKubeletDir, t, mountPaths)
rc, fakePlugin := getReconciler(tmpKubeletDir, t, mountPaths, nil /*custom kubeclient*/)
rcInstance, _ := rc.(*reconciler)
rcInstance.volumesFailedReconstruction = tc.volumesFailedReconstruction

Expand Down Expand Up @@ -265,21 +266,31 @@ func TestReconstructVolumesMount(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SELinuxMountReadWriteOncePod, true)()

tests := []struct {
name string
volumePath string
expectMount bool
name string
volumePath string
expectMount bool
volumeMode v1.PersistentVolumeMode
deviceMountPath string
}{
{
name: "reconstructed volume is mounted",
volumePath: path.Join("pod1uid", "volumes", "fake-plugin", "volumename"),

expectMount: true,
volumeMode: v1.PersistentVolumeFilesystem,
},
{
name: "reconstructed volume fails to mount",
// FailOnSetupVolumeName: MountDevice succeeds, SetUp fails
volumePath: path.Join("pod1uid", "volumes", "fake-plugin", volumetesting.FailOnSetupVolumeName),
expectMount: false,
volumeMode: v1.PersistentVolumeFilesystem,
},
{
name: "reconstructed volume device map fails",
volumePath: filepath.Join("pod1uid", "volumeDevices", "fake-plugin", volumetesting.FailMountDeviceVolumeName),
volumeMode: v1.PersistentVolumeBlock,
deviceMountPath: filepath.Join("plugins", "fake-plugin", "volumeDevices", "pluginDependentPath"),
},
}
for _, tc := range tests {
Expand All @@ -299,7 +310,16 @@ func TestReconstructVolumesMount(t *testing.T) {
mountPaths := []string{vp}
os.MkdirAll(vp, 0755)

rc, fakePlugin := getReconciler(tmpKubeletDir, t, mountPaths)
// Arrange 2 - populate DSW
outerName := filepath.Base(tc.volumePath)
pod, pv, pvc := getPodPVCAndPV(tc.volumeMode, "pod1", outerName, "pvc1")
volumeSpec := &volume.Spec{PersistentVolume: pv}
kubeClient := createtestClientWithPVPVC(pv, pvc, v1.AttachedVolume{
Name: v1.UniqueVolumeName(fmt.Sprintf("fake-plugin/%s", outerName)),
DevicePath: "fake/path",
})

rc, fakePlugin := getReconciler(tmpKubeletDir, t, mountPaths, kubeClient /*custom kubeclient*/)
rcInstance, _ := rc.(*reconciler)

// Act 1 - reconstruction
Expand All @@ -315,10 +335,6 @@ func TestReconstructVolumesMount(t *testing.T) {
t.Errorf("expected 1 uncertain volume in asw, got %+v", allPods)
}

// Arrange 2 - populate DSW
outerName := filepath.Base(tc.volumePath)
pod := getInlineFakePod("pod1", "pod1uid", outerName, outerName)
volumeSpec := &volume.Spec{Volume: &pod.Spec.Volumes[0]}
podName := util.GetUniquePodName(pod)
volumeName, err := rcInstance.desiredStateOfWorld.AddPodToVolume(
podName, pod, volumeSpec, volumeSpec.Name(), "" /* volumeGidValue */, nil /* SELinuxContext */)
Expand All @@ -341,12 +357,15 @@ func TestReconstructVolumesMount(t *testing.T) {
// MountDevice was attempted
var lastErr error
err = retryWithExponentialBackOff(testOperationBackOffDuration, func() (bool, error) {
// MountDevice should always be called and succeed
if err := volumetesting.VerifyMountDeviceCallCount(1, fakePlugin); err != nil {
lastErr = err
return false, nil
if tc.volumeMode == v1.PersistentVolumeFilesystem {
if err := volumetesting.VerifyMountDeviceCallCount(1, fakePlugin); err != nil {
lastErr = err
return false, nil
}
return true, nil
} else {
return true, nil
}
return true, nil
})
if err != nil {
t.Errorf("Error waiting for volumes to get mounted: %s: %s", err, lastErr)
Expand Down Expand Up @@ -376,10 +395,60 @@ func TestReconstructVolumesMount(t *testing.T) {
if len(allPods) != 1 {
t.Errorf("expected 1 mounted or uncertain volumes after reconcile, got %+v", allPods)
}
if tc.deviceMountPath != "" {
expectedDeviceMountPath := filepath.Join(tmpKubeletDir, tc.deviceMountPath)
deviceMountPath := allPods[0].DeviceMountPath
if expectedDeviceMountPath != deviceMountPath {
t.Errorf("expected deviceMountPath to be %s, got %s", expectedDeviceMountPath, deviceMountPath)
}
}

}

// Unmount was *not* attempted in any case
verifyTearDownCalls(fakePlugin, 0)
})
}
}

func getPodPVCAndPV(volumeMode v1.PersistentVolumeMode, podName, pvName, pvcName string) (*v1.Pod, *v1.PersistentVolume, *v1.PersistentVolumeClaim) {
pv := &v1.PersistentVolume{
ObjectMeta: metav1.ObjectMeta{
Name: pvName,
UID: "pvuid",
},
Spec: v1.PersistentVolumeSpec{
ClaimRef: &v1.ObjectReference{Name: pvcName},
VolumeMode: &volumeMode,
},
}
pvc := &v1.PersistentVolumeClaim{
ObjectMeta: metav1.ObjectMeta{
Name: pvcName,
UID: "pvcuid",
},
Spec: v1.PersistentVolumeClaimSpec{
VolumeName: pvName,
VolumeMode: &volumeMode,
},
}
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: podName,
UID: "pod1uid",
},
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
Name: "volume-name",
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: pvc.Name,
},
},
},
},
},
}
return pod, pv, pvc
}
4 changes: 2 additions & 2 deletions pkg/volume/testing/testing.go
Original file line number Diff line number Diff line change
Expand Up @@ -857,8 +857,8 @@ func (fv *FakeVolume) GetSetUpDeviceCallCount() int {

// Block volume support
func (fv *FakeVolume) GetGlobalMapPath(spec *volume.Spec) (string, error) {
fv.RLock()
defer fv.RUnlock()
fv.Lock()
defer fv.Unlock()
fv.GlobalMapPathCallCount++
return fv.getGlobalMapPath()
}
Expand Down
4 changes: 4 additions & 0 deletions pkg/volume/util/operationexecutor/operation_executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,10 @@ type ActualStateOfWorldMounterUpdater interface {
// IsVolumeReconstructed returns true if volume currently added to actual state of the world
// was found during reconstruction.
IsVolumeReconstructed(volumeName v1.UniqueVolumeName, podName volumetypes.UniquePodName) bool

// IsVolumeDeviceReconstructed returns true if volume device identified by volumeName has been
// found during reconstruction.
IsVolumeDeviceReconstructed(volumeName v1.UniqueVolumeName) bool
}

// ActualStateOfWorldAttacherUpdater defines a set of operations updating the
Expand Down
6 changes: 6 additions & 0 deletions pkg/volume/util/operationexecutor/operation_generator.go
Original file line number Diff line number Diff line change
Expand Up @@ -803,6 +803,12 @@ func (og *operationGenerator) checkForFailedMount(volumeToMount VolumeToMount, m
func (og *operationGenerator) markDeviceErrorState(volumeToMount VolumeToMount, devicePath, deviceMountPath string, mountError error, actualStateOfWorld ActualStateOfWorldMounterUpdater) {
if volumetypes.IsOperationFinishedError(mountError) &&
actualStateOfWorld.GetDeviceMountState(volumeToMount.VolumeName) == DeviceMountUncertain {

if actualStateOfWorld.IsVolumeDeviceReconstructed(volumeToMount.VolumeName) {
klog.V(2).InfoS("MountVolume.markDeviceErrorState leaving volume uncertain", "volumeName", volumeToMount.VolumeName)
return
}

// Only devices which were uncertain can be marked as unmounted
markDeviceUnmountError := actualStateOfWorld.MarkDeviceAsUnmounted(volumeToMount.VolumeName)
if markDeviceUnmountError != nil {
Expand Down

0 comments on commit 10a02b8

Please sign in to comment.