diff --git a/test/e2e/storage/csi_mock/base.go b/test/e2e/storage/csi_mock/base.go index 0a4a72b58979a..0dd0ca3edc62a 100644 --- a/test/e2e/storage/csi_mock/base.go +++ b/test/e2e/storage/csi_mock/base.go @@ -89,14 +89,15 @@ type testParameters struct { enableResizing bool // enable resizing for both CSI mock driver and storageClass. enableNodeExpansion bool // enable node expansion for CSI mock driver // just disable resizing on driver it overrides enableResizing flag for CSI mock driver - disableResizingOnDriver bool - enableSnapshot bool - enableVolumeMountGroup bool // enable the VOLUME_MOUNT_GROUP node capability in the CSI mock driver. - hooks *drivers.Hooks - tokenRequests []storagev1.TokenRequest - requiresRepublish *bool - fsGroupPolicy *storagev1.FSGroupPolicy - enableSELinuxMount *bool + disableResizingOnDriver bool + enableSnapshot bool + enableVolumeMountGroup bool // enable the VOLUME_MOUNT_GROUP node capability in the CSI mock driver. + hooks *drivers.Hooks + tokenRequests []storagev1.TokenRequest + requiresRepublish *bool + fsGroupPolicy *storagev1.FSGroupPolicy + enableSELinuxMount *bool + enableRecoverExpansionFailure bool } type mockDriverSetup struct { @@ -148,20 +149,21 @@ func (m *mockDriverSetup) init(tp testParameters) { var err error driverOpts := drivers.CSIMockDriverOpts{ - RegisterDriver: tp.registerDriver, - PodInfo: tp.podInfo, - StorageCapacity: tp.storageCapacity, - EnableTopology: tp.enableTopology, - AttachLimit: tp.attachLimit, - DisableAttach: tp.disableAttach, - EnableResizing: tp.enableResizing, - EnableNodeExpansion: tp.enableNodeExpansion, - EnableSnapshot: tp.enableSnapshot, - EnableVolumeMountGroup: tp.enableVolumeMountGroup, - TokenRequests: tp.tokenRequests, - RequiresRepublish: tp.requiresRepublish, - FSGroupPolicy: tp.fsGroupPolicy, - EnableSELinuxMount: tp.enableSELinuxMount, + RegisterDriver: tp.registerDriver, + PodInfo: tp.podInfo, + StorageCapacity: tp.storageCapacity, + EnableTopology: tp.enableTopology, + AttachLimit: tp.attachLimit, + DisableAttach: tp.disableAttach, + EnableResizing: tp.enableResizing, + EnableNodeExpansion: tp.enableNodeExpansion, + EnableSnapshot: tp.enableSnapshot, + EnableVolumeMountGroup: tp.enableVolumeMountGroup, + TokenRequests: tp.tokenRequests, + RequiresRepublish: tp.requiresRepublish, + FSGroupPolicy: tp.fsGroupPolicy, + EnableSELinuxMount: tp.enableSELinuxMount, + EnableRecoverExpansionFailure: tp.enableRecoverExpansionFailure, } // At the moment, only tests which need hooks are diff --git a/test/e2e/storage/csi_mock/csi_volume_expansion.go b/test/e2e/storage/csi_mock/csi_volume_expansion.go index a36bf78f4b45b..a33f79232e446 100644 --- a/test/e2e/storage/csi_mock/csi_volume_expansion.go +++ b/test/e2e/storage/csi_mock/csi_volume_expansion.go @@ -18,18 +18,55 @@ package csi_mock import ( "context" + "fmt" + "time" + csipbv1 "github.com/container-storage-interface/spec/lib/go/csi" "github.com/onsi/ginkgo/v2" "github.com/onsi/gomega" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" + clientset "k8s.io/client-go/kubernetes" "k8s.io/kubernetes/test/e2e/framework" e2epod "k8s.io/kubernetes/test/e2e/framework/pod" + "k8s.io/kubernetes/test/e2e/storage/drivers" "k8s.io/kubernetes/test/e2e/storage/testsuites" "k8s.io/kubernetes/test/e2e/storage/utils" admissionapi "k8s.io/pod-security-admission/api" ) +type expansionStatus int + +const ( + expansionSuccess = iota + expansionFailed + expansionFailedOnController + expansionFailedOnNode +) + +const ( + resizePollInterval = 2 * time.Second +) + +var ( + maxControllerSizeLimit = resource.MustParse("10Gi") + + maxNodeExpansionLimit = resource.MustParse("8Gi") +) + +type recoveryTest struct { + name string + pvcRequestSize string + allocatedResource string + simulatedCSIDriverError expansionStatus + expectedResizeStatus v1.PersistentVolumeClaimResizeStatus + recoverySize resource.Quantity +} + var _ = utils.SIGDescribe("CSI Mock volume expansion", func() { f := framework.NewDefaultFramework("csi-mock-volumes-expansion") f.NamespacePodSecurityEnforceLevel = admissionapi.LevelPrivileged @@ -210,4 +247,224 @@ var _ = utils.SIGDescribe("CSI Mock volume expansion", func() { }) } }) + + ginkgo.Context("Expansion with recovery[Feature:RecoverVolumeExpansionFailure]", func() { + tests := []recoveryTest{ + { + name: "should record target size in allocated resources", + pvcRequestSize: "4Gi", + allocatedResource: "4Gi", + simulatedCSIDriverError: expansionSuccess, + expectedResizeStatus: v1.PersistentVolumeClaimNoExpansionInProgress, + }, + { + name: "should allow recovery if controller expansion fails with final error", + pvcRequestSize: "11Gi", // expansion to 11Gi will cause expansion to fail on controller + allocatedResource: "11Gi", + simulatedCSIDriverError: expansionFailedOnController, + expectedResizeStatus: v1.PersistentVolumeClaimControllerExpansionFailed, + recoverySize: resource.MustParse("4Gi"), + }, + { + name: "recovery should not be possible in partially expanded volumes", + pvcRequestSize: "9Gi", // expansion to 9Gi will cause expansion to fail on node + allocatedResource: "9Gi", + simulatedCSIDriverError: expansionFailedOnNode, + expectedResizeStatus: v1.PersistentVolumeClaimNodeExpansionFailed, + recoverySize: resource.MustParse("5Gi"), + }, + } + + for _, t := range tests { + test := t + ginkgo.It(test.name, func(ctx context.Context) { + var err error + params := testParameters{enableResizing: true, enableNodeExpansion: true, enableRecoverExpansionFailure: true} + + if test.simulatedCSIDriverError != expansionSuccess { + params.hooks = createExpansionHook(test.simulatedCSIDriverError) + } + + m.init(params) + ginkgo.DeferCleanup(m.cleanup) + + sc, pvc, pod := m.createPod(pvcReference) + gomega.Expect(pod).NotTo(gomega.BeNil(), "while creating pod for resizing") + + if !*sc.AllowVolumeExpansion { + framework.Fail("failed creating sc with allowed expansion") + } + + err = e2epod.WaitForPodNameRunningInNamespace(m.cs, pod.Name, pod.Namespace) + framework.ExpectNoError(err, "Failed to start pod1: %v", err) + + ginkgo.By("Expanding current pvc") + newSize := resource.MustParse(test.pvcRequestSize) + newPVC, err := testsuites.ExpandPVCSize(pvc, newSize, m.cs) + framework.ExpectNoError(err, "While updating pvc for more size") + pvc = newPVC + gomega.Expect(pvc).NotTo(gomega.BeNil()) + + pvcSize := pvc.Spec.Resources.Requests[v1.ResourceStorage] + if pvcSize.Cmp(newSize) != 0 { + framework.Failf("error updating pvc size %q", pvc.Name) + } + + if test.simulatedCSIDriverError == expansionSuccess { + validateExpansionSuccess(pvc, m, test, test.allocatedResource) + } else { + validateRecoveryBehaviour(pvc, m, test) + } + }) + } + + }) }) + +func validateRecoveryBehaviour(pvc *v1.PersistentVolumeClaim, m *mockDriverSetup, test recoveryTest) { + var err error + ginkgo.By("Waiting for resizer to set allocated resource") + err = waitForAllocatedResource(pvc, m, test.allocatedResource) + framework.ExpectNoError(err, "While waiting for allocated resource to be updated") + + ginkgo.By("Waiting for resizer to set resize status") + err = waitForResizeStatus(pvc, m.cs, test.expectedResizeStatus) + framework.ExpectNoError(err, "While waiting for resize status to be set") + + ginkgo.By("Recover pvc size") + newPVC, err := testsuites.ExpandPVCSize(pvc, test.recoverySize, m.cs) + framework.ExpectNoError(err, "While updating pvc for more size") + pvc = newPVC + gomega.Expect(pvc).NotTo(gomega.BeNil()) + + pvcSize := pvc.Spec.Resources.Requests[v1.ResourceStorage] + if pvcSize.Cmp(test.recoverySize) != 0 { + framework.Failf("error updating pvc size %q", pvc.Name) + } + + // if expansion failed on controller with final error, then recovery should be possible + if test.simulatedCSIDriverError == expansionFailedOnController { + validateExpansionSuccess(pvc, m, test, test.recoverySize.String()) + return + } + + // if expansion succeeded on controller but failed on the node + if test.simulatedCSIDriverError == expansionFailedOnNode { + ginkgo.By("Wait for expansion to fail on node again") + err = waitForResizeStatus(pvc, m.cs, v1.PersistentVolumeClaimNodeExpansionFailed) + framework.ExpectNoError(err, "While waiting for resize status to be set to expansion-failed-on-node") + + ginkgo.By("verify allocated resources after recovery") + pvc, err = m.cs.CoreV1().PersistentVolumeClaims(pvc.Namespace).Get(context.TODO(), pvc.Name, metav1.GetOptions{}) + framework.ExpectNoError(err, "while fetching pvc") + actualAllocatedResource := pvc.Status.AllocatedResources.Storage() + + if actualAllocatedResource.Equal(test.recoverySize) { + framework.Failf("unexpected allocated resource size %s after node expansion failure", actualAllocatedResource.String()) + } + + if !actualAllocatedResource.Equal(resource.MustParse(test.allocatedResource)) { + framework.Failf("expected allocated resources to be %s got %s", test.allocatedResource, actualAllocatedResource.String()) + } + } +} + +func validateExpansionSuccess(pvc *v1.PersistentVolumeClaim, m *mockDriverSetup, test recoveryTest, expectedAllocatedSize string) { + var err error + ginkgo.By("Waiting for persistent volume resize to finish") + err = testsuites.WaitForControllerVolumeResize(pvc, m.cs, csiResizeWaitPeriod) + framework.ExpectNoError(err, "While waiting for PV resize to finish") + + ginkgo.By("Waiting for PVC resize to finish") + pvc, err = testsuites.WaitForFSResize(pvc, m.cs) + framework.ExpectNoError(err, "while waiting for PVC to finish") + + pvcConditions := pvc.Status.Conditions + framework.ExpectEqual(len(pvcConditions), 0, "pvc should not have conditions") + allocatedResource := pvc.Status.AllocatedResources.Storage() + gomega.Expect(allocatedResource).NotTo(gomega.BeNil()) + expectedAllocatedResource := resource.MustParse(expectedAllocatedSize) + if allocatedResource.Cmp(expectedAllocatedResource) != 0 { + framework.Failf("expected allocated Resources to be %s got %s", expectedAllocatedResource.String(), allocatedResource.String()) + } + + resizeStatus := pvc.Status.ResizeStatus + gomega.Expect(resizeStatus).NotTo(gomega.BeNil(), "resize status should not be nil") + framework.ExpectEqual(*resizeStatus, v1.PersistentVolumeClaimNoExpansionInProgress, "resize status should be empty") +} + +func waitForResizeStatus(pvc *v1.PersistentVolumeClaim, c clientset.Interface, expectedStates ...v1.PersistentVolumeClaimResizeStatus) error { + var actualResizeStatus *v1.PersistentVolumeClaimResizeStatus + waitErr := wait.PollImmediate(resizePollInterval, csiResizeWaitPeriod, func() (bool, error) { + var err error + updatedPVC, err := c.CoreV1().PersistentVolumeClaims(pvc.Namespace).Get(context.TODO(), pvc.Name, metav1.GetOptions{}) + + if err != nil { + return false, fmt.Errorf("error fetching pvc %q for checking for resize status: %v", pvc.Name, err) + } + + actualResizeStatus = updatedPVC.Status.ResizeStatus + if actualResizeStatus != nil { + for _, s := range expectedStates { + if s == *actualResizeStatus { + return true, nil + } + } + } + return false, nil + }) + if waitErr != nil { + return fmt.Errorf("error while waiting for resize status to sync to %+v, actualStatus %s: %v", expectedStates, *actualResizeStatus, waitErr) + } + return nil +} + +func waitForAllocatedResource(pvc *v1.PersistentVolumeClaim, m *mockDriverSetup, expectedSize string) error { + expectedQuantity := resource.MustParse(expectedSize) + waitErr := wait.PollImmediate(resizePollInterval, csiResizeWaitPeriod, func() (bool, error) { + var err error + updatedPVC, err := m.cs.CoreV1().PersistentVolumeClaims(pvc.Namespace).Get(context.TODO(), pvc.Name, metav1.GetOptions{}) + + if err != nil { + return false, fmt.Errorf("error fetching pvc %q for checking for resize status: %v", pvc.Name, err) + } + actualAllocatedSize := updatedPVC.Status.AllocatedResources.Storage() + if actualAllocatedSize != nil && actualAllocatedSize.Equal(expectedQuantity) { + return true, nil + } + return false, nil + + }) + if waitErr != nil { + return fmt.Errorf("error while waiting for allocatedSize to sync to %s: %v", expectedSize, waitErr) + } + return nil +} + +func createExpansionHook(expectedExpansionStatus expansionStatus) *drivers.Hooks { + return &drivers.Hooks{ + Pre: func(ctx context.Context, method string, request interface{}) (reply interface{}, err error) { + switch expectedExpansionStatus { + case expansionFailedOnController: + expansionRequest, ok := request.(*csipbv1.ControllerExpandVolumeRequest) + if ok { + requestedSize := resource.NewQuantity(expansionRequest.CapacityRange.RequiredBytes, resource.BinarySI) + if requestedSize.Cmp(maxControllerSizeLimit) > 0 { + return nil, status.Error(codes.InvalidArgument, "invalid expansion request") + } + } + case expansionFailedOnNode: + expansionRequest, ok := request.(*csipbv1.NodeExpandVolumeRequest) + if ok { + requestedSize := resource.NewQuantity(expansionRequest.CapacityRange.RequiredBytes, resource.BinarySI) + if requestedSize.Cmp(maxNodeExpansionLimit) > 0 { + return nil, status.Error(codes.InvalidArgument, "invalid node expansion request") + } + + } + } + + return nil, nil + }, + } +} diff --git a/test/e2e/storage/drivers/csi.go b/test/e2e/storage/drivers/csi.go index 538086358211f..b134a34a65b88 100644 --- a/test/e2e/storage/drivers/csi.go +++ b/test/e2e/storage/drivers/csi.go @@ -292,23 +292,24 @@ func (h *hostpathCSIDriver) PrepareTest(f *framework.Framework) *storageframewor // mockCSI type mockCSIDriver struct { - driverInfo storageframework.DriverInfo - manifests []string - podInfo *bool - storageCapacity *bool - attachable bool - attachLimit int - enableTopology bool - enableNodeExpansion bool - hooks Hooks - tokenRequests []storagev1.TokenRequest - requiresRepublish *bool - fsGroupPolicy *storagev1.FSGroupPolicy - enableVolumeMountGroup bool - embedded bool - calls MockCSICalls - embeddedCSIDriver *mockdriver.CSIDriver - enableSELinuxMount *bool + driverInfo storageframework.DriverInfo + manifests []string + podInfo *bool + storageCapacity *bool + attachable bool + attachLimit int + enableTopology bool + enableNodeExpansion bool + hooks Hooks + tokenRequests []storagev1.TokenRequest + requiresRepublish *bool + fsGroupPolicy *storagev1.FSGroupPolicy + enableVolumeMountGroup bool + embedded bool + calls MockCSICalls + embeddedCSIDriver *mockdriver.CSIDriver + enableSELinuxMount *bool + enableRecoverExpansionFailure bool // Additional values set during PrepareTest clientSet clientset.Interface @@ -342,20 +343,21 @@ type MockCSITestDriver interface { // CSIMockDriverOpts defines options used for csi driver type CSIMockDriverOpts struct { - RegisterDriver bool - DisableAttach bool - PodInfo *bool - StorageCapacity *bool - AttachLimit int - EnableTopology bool - EnableResizing bool - EnableNodeExpansion bool - EnableSnapshot bool - EnableVolumeMountGroup bool - TokenRequests []storagev1.TokenRequest - RequiresRepublish *bool - FSGroupPolicy *storagev1.FSGroupPolicy - EnableSELinuxMount *bool + RegisterDriver bool + DisableAttach bool + PodInfo *bool + StorageCapacity *bool + AttachLimit int + EnableTopology bool + EnableResizing bool + EnableNodeExpansion bool + EnableSnapshot bool + EnableVolumeMountGroup bool + TokenRequests []storagev1.TokenRequest + RequiresRepublish *bool + FSGroupPolicy *storagev1.FSGroupPolicy + EnableSELinuxMount *bool + EnableRecoverExpansionFailure bool // Embedded defines whether the CSI mock driver runs // inside the cluster (false, the default) or just a proxy @@ -497,20 +499,21 @@ func InitMockCSIDriver(driverOpts CSIMockDriverOpts) MockCSITestDriver { storageframework.CapMultiplePVsSameID: true, }, }, - manifests: driverManifests, - podInfo: driverOpts.PodInfo, - storageCapacity: driverOpts.StorageCapacity, - enableTopology: driverOpts.EnableTopology, - attachable: !driverOpts.DisableAttach, - attachLimit: driverOpts.AttachLimit, - enableNodeExpansion: driverOpts.EnableNodeExpansion, - tokenRequests: driverOpts.TokenRequests, - requiresRepublish: driverOpts.RequiresRepublish, - fsGroupPolicy: driverOpts.FSGroupPolicy, - enableVolumeMountGroup: driverOpts.EnableVolumeMountGroup, - enableSELinuxMount: driverOpts.EnableSELinuxMount, - embedded: driverOpts.Embedded, - hooks: driverOpts.Hooks, + manifests: driverManifests, + podInfo: driverOpts.PodInfo, + storageCapacity: driverOpts.StorageCapacity, + enableTopology: driverOpts.EnableTopology, + attachable: !driverOpts.DisableAttach, + attachLimit: driverOpts.AttachLimit, + enableNodeExpansion: driverOpts.EnableNodeExpansion, + tokenRequests: driverOpts.TokenRequests, + requiresRepublish: driverOpts.RequiresRepublish, + fsGroupPolicy: driverOpts.FSGroupPolicy, + enableVolumeMountGroup: driverOpts.EnableVolumeMountGroup, + enableSELinuxMount: driverOpts.EnableSELinuxMount, + enableRecoverExpansionFailure: driverOpts.EnableRecoverExpansionFailure, + embedded: driverOpts.Embedded, + hooks: driverOpts.Hooks, } } @@ -660,6 +663,11 @@ func (m *mockCSIDriver) PrepareTest(f *framework.Framework) *storageframework.Pe RequiresRepublish: m.requiresRepublish, FSGroupPolicy: m.fsGroupPolicy, SELinuxMount: m.enableSELinuxMount, + Features: map[string][]string{}, + } + + if m.enableRecoverExpansionFailure { + o.Features["csi-resizer"] = []string{"RecoverVolumeExpansionFailure=true"} } err = utils.CreateFromManifests(f, m.driverNamespace, func(item interface{}) error { if err := utils.PatchCSIDeployment(config.Framework, o, item); err != nil { diff --git a/test/e2e/storage/utils/deployment.go b/test/e2e/storage/utils/deployment.go index 6e03e4070f899..f36905cf569e0 100644 --- a/test/e2e/storage/utils/deployment.go +++ b/test/e2e/storage/utils/deployment.go @@ -17,6 +17,7 @@ limitations under the License. package utils import ( + "fmt" "path" "strings" @@ -94,6 +95,11 @@ func PatchCSIDeployment(f *e2eframework.Framework, o PatchCSIOptions, object int container.VolumeMounts[e].MountPath = substKubeletRootDir(container.VolumeMounts[e].MountPath) } + if len(o.Features) > 0 && len(o.Features[container.Name]) > 0 { + featuregateString := strings.Join(o.Features[container.Name], ",") + container.Args = append(container.Args, fmt.Sprintf("--feature-gates=%s", featuregateString)) + } + // Overwrite driver name resp. provider name // by appending a parameter with the right // value. @@ -218,4 +224,10 @@ type PatchCSIOptions struct { // field *if* the driver deploys a CSIDriver object. Ignored // otherwise. SELinuxMount *bool + // If not nil, the values will be used for setting feature arguments to + // specific sidecar. + // Feature is a map - where key is sidecar name such as: + // -- key: resizer + // -- value: []string{feature-gates} + Features map[string][]string }