diff --git a/cluster/centos/config-default.sh b/cluster/centos/config-default.sh index 90d13bee12b0..eca05cb3cc9d 100755 --- a/cluster/centos/config-default.sh +++ b/cluster/centos/config-default.sh @@ -120,7 +120,7 @@ export FLANNEL_NET=${FLANNEL_NET:-"172.16.0.0/16"} # Admission Controllers to invoke prior to persisting objects in cluster # If we included ResourceQuota, we should keep it at the end of the list to prevent incrementing quota usage prematurely. -export ADMISSION_CONTROL=${ADMISSION_CONTROL:-"Initializers,NamespaceLifecycle,LimitRanger,ServiceAccount,PersistentVolumeClaimResize,DefaultTolerationSeconds,Priority,ResourceQuota"} +export ADMISSION_CONTROL=${ADMISSION_CONTROL:-"Initializers,NamespaceLifecycle,LimitRanger,ServiceAccount,PersistentVolumeClaimResize,DefaultTolerationSeconds,Priority,PVCProtection,ResourceQuota"} # Extra options to set on the Docker command line. # This is useful for setting --insecure-registry for local registries. diff --git a/cluster/gce/config-default.sh b/cluster/gce/config-default.sh index f1dc45feecbf..7b2bddf77358 100755 --- a/cluster/gce/config-default.sh +++ b/cluster/gce/config-default.sh @@ -294,7 +294,7 @@ if [[ -n "${GCE_GLBC_IMAGE:-}" ]]; then fi # Admission Controllers to invoke prior to persisting objects in cluster -ADMISSION_CONTROL=Initializers,NamespaceLifecycle,LimitRanger,ServiceAccount,PersistentVolumeLabel,DefaultStorageClass,PersistentVolumeClaimResize,DefaultTolerationSeconds,NodeRestriction,Priority +ADMISSION_CONTROL=Initializers,NamespaceLifecycle,LimitRanger,ServiceAccount,PersistentVolumeLabel,DefaultStorageClass,PersistentVolumeClaimResize,DefaultTolerationSeconds,NodeRestriction,Priority,PVCProtection if [[ "${ENABLE_POD_SECURITY_POLICY:-}" == "true" ]]; then ADMISSION_CONTROL="${ADMISSION_CONTROL},PodSecurityPolicy" diff --git a/cluster/libvirt-coreos/util.sh b/cluster/libvirt-coreos/util.sh index 9fe2170ea4d0..545d98504535 100644 --- a/cluster/libvirt-coreos/util.sh +++ b/cluster/libvirt-coreos/util.sh @@ -27,7 +27,7 @@ source "$KUBE_ROOT/cluster/common.sh" export LIBVIRT_DEFAULT_URI=qemu:///system export SERVICE_ACCOUNT_LOOKUP=${SERVICE_ACCOUNT_LOOKUP:-true} -export ADMISSION_CONTROL=${ADMISSION_CONTROL:-Initializers,NamespaceLifecycle,LimitRanger,ServiceAccount,DefaultStorageClass,PersistentVolumeClaimResize,DefaultTolerationSeconds,ResourceQuota} +export ADMISSION_CONTROL=${ADMISSION_CONTROL:-Initializers,NamespaceLifecycle,LimitRanger,ServiceAccount,DefaultStorageClass,PersistentVolumeClaimResize,DefaultTolerationSeconds,PVCProtection,ResourceQuota} readonly POOL=kubernetes readonly POOL_PATH=/var/lib/libvirt/images/kubernetes diff --git a/cluster/vagrant/config-default.sh b/cluster/vagrant/config-default.sh index 7eea6e8e77e5..63b49146db2e 100755 --- a/cluster/vagrant/config-default.sh +++ b/cluster/vagrant/config-default.sh @@ -56,7 +56,7 @@ MASTER_PASSWD="${MASTER_PASSWD:-vagrant}" # Admission Controllers to invoke prior to persisting objects in cluster # If we included ResourceQuota, we should keep it at the end of the list to prevent incrementing quota usage prematurely. -ADMISSION_CONTROL=Initializers,NamespaceLifecycle,LimitRanger,ServiceAccount,DefaultStorageClass,DefaultTolerationSeconds,ResourceQuota +ADMISSION_CONTROL=Initializers,NamespaceLifecycle,LimitRanger,ServiceAccount,DefaultStorageClass,DefaultTolerationSeconds,PVCProtection,ResourceQuota # Optional: Enable node logging. ENABLE_NODE_LOGGING=false @@ -120,4 +120,3 @@ E2E_STORAGE_TEST_ENVIRONMENT=${KUBE_E2E_STORAGE_TEST_ENVIRONMENT:-false} # Default fallback NETWORK_IF_NAME, will be used in case when no 'VAGRANT-BEGIN' comments were defined in network-script export DEFAULT_NETWORK_IF_NAME="eth0" - diff --git a/cmd/kube-apiserver/app/options/BUILD b/cmd/kube-apiserver/app/options/BUILD index 2ad00991da8b..7c46be368bf8 100644 --- a/cmd/kube-apiserver/app/options/BUILD +++ b/cmd/kube-apiserver/app/options/BUILD @@ -40,6 +40,7 @@ go_library( "//plugin/pkg/admission/noderestriction:go_default_library", "//plugin/pkg/admission/persistentvolume/label:go_default_library", "//plugin/pkg/admission/persistentvolume/resize:go_default_library", + "//plugin/pkg/admission/persistentvolumeclaim/pvcprotection:go_default_library", "//plugin/pkg/admission/podnodeselector:go_default_library", "//plugin/pkg/admission/podpreset:go_default_library", "//plugin/pkg/admission/podtolerationrestriction:go_default_library", diff --git a/cmd/kube-apiserver/app/options/plugins.go b/cmd/kube-apiserver/app/options/plugins.go index 30ed306485d2..a0d2502e7f58 100644 --- a/cmd/kube-apiserver/app/options/plugins.go +++ b/cmd/kube-apiserver/app/options/plugins.go @@ -42,6 +42,7 @@ import ( "k8s.io/kubernetes/plugin/pkg/admission/noderestriction" "k8s.io/kubernetes/plugin/pkg/admission/persistentvolume/label" "k8s.io/kubernetes/plugin/pkg/admission/persistentvolume/resize" + "k8s.io/kubernetes/plugin/pkg/admission/persistentvolumeclaim/pvcprotection" "k8s.io/kubernetes/plugin/pkg/admission/podnodeselector" "k8s.io/kubernetes/plugin/pkg/admission/podpreset" "k8s.io/kubernetes/plugin/pkg/admission/podtolerationrestriction" @@ -81,4 +82,5 @@ func RegisterAllAdmissionPlugins(plugins *admission.Plugins) { serviceaccount.Register(plugins) setdefault.Register(plugins) resize.Register(plugins) + pvcprotection.Register(plugins) } diff --git a/cmd/kube-controller-manager/app/BUILD b/cmd/kube-controller-manager/app/BUILD index 4fc8f2fe8491..4c374b31a4e2 100644 --- a/cmd/kube-controller-manager/app/BUILD +++ b/cmd/kube-controller-manager/app/BUILD @@ -79,6 +79,7 @@ go_library( "//pkg/controller/volume/attachdetach:go_default_library", "//pkg/controller/volume/expand:go_default_library", "//pkg/controller/volume/persistentvolume:go_default_library", + "//pkg/controller/volume/pvcprotection:go_default_library", "//pkg/features:go_default_library", "//pkg/quota/generic:go_default_library", "//pkg/quota/install:go_default_library", diff --git a/cmd/kube-controller-manager/app/controllermanager.go b/cmd/kube-controller-manager/app/controllermanager.go index 9add80e01a00..d08e9658a8df 100644 --- a/cmd/kube-controller-manager/app/controllermanager.go +++ b/cmd/kube-controller-manager/app/controllermanager.go @@ -359,6 +359,7 @@ func NewControllerInitializers() map[string]InitFunc { controllers["attachdetach"] = startAttachDetachController controllers["persistentvolume-expander"] = startVolumeExpandController controllers["clusterrole-aggregation"] = startClusterRoleAggregrationController + controllers["pvc-protection"] = startPVCProtectionController return controllers } diff --git a/cmd/kube-controller-manager/app/core.go b/cmd/kube-controller-manager/app/core.go index 7c4a1268a12b..819d18e0f013 100644 --- a/cmd/kube-controller-manager/app/core.go +++ b/cmd/kube-controller-manager/app/core.go @@ -53,6 +53,7 @@ import ( "k8s.io/kubernetes/pkg/controller/volume/attachdetach" "k8s.io/kubernetes/pkg/controller/volume/expand" persistentvolumecontroller "k8s.io/kubernetes/pkg/controller/volume/persistentvolume" + "k8s.io/kubernetes/pkg/controller/volume/pvcprotection" "k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/quota/generic" quotainstall "k8s.io/kubernetes/pkg/quota/install" @@ -376,3 +377,15 @@ func startGarbageCollectorController(ctx ControllerContext) (bool, error) { return true, nil } + +func startPVCProtectionController(ctx ControllerContext) (bool, error) { + if utilfeature.DefaultFeatureGate.Enabled(features.PVCProtection) { + go pvcprotection.NewPVCProtectionController( + ctx.InformerFactory.Core().V1().PersistentVolumeClaims(), + ctx.InformerFactory.Core().V1().Pods(), + ctx.ClientBuilder.ClientOrDie("pvc-protection-controller"), + ).Run(1, ctx.Stop) + return true, nil + } + return false, nil +} diff --git a/pkg/controller/BUILD b/pkg/controller/BUILD index 287ff8d32bd1..fe5a3adc085a 100644 --- a/pkg/controller/BUILD +++ b/pkg/controller/BUILD @@ -133,6 +133,7 @@ filegroup( "//pkg/controller/volume/events:all-srcs", "//pkg/controller/volume/expand:all-srcs", "//pkg/controller/volume/persistentvolume:all-srcs", + "//pkg/controller/volume/pvcprotection:all-srcs", ], tags = ["automanaged"], ) diff --git a/pkg/controller/volume/pvcprotection/BUILD b/pkg/controller/volume/pvcprotection/BUILD new file mode 100644 index 000000000000..5c713a259d2b --- /dev/null +++ b/pkg/controller/volume/pvcprotection/BUILD @@ -0,0 +1,61 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") + +go_library( + name = "go_default_library", + srcs = ["pvc_protection_controller.go"], + importpath = "k8s.io/kubernetes/pkg/controller/volume/pvcprotection", + visibility = ["//visibility:public"], + deps = [ + "//pkg/controller:go_default_library", + "//pkg/util/metrics:go_default_library", + "//pkg/volume/util:go_default_library", + "//pkg/volume/util/volumehelper:go_default_library", + "//vendor/github.com/golang/glog:go_default_library", + "//vendor/k8s.io/api/core/v1:go_default_library", + "//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library", + "//vendor/k8s.io/apimachinery/pkg/labels:go_default_library", + "//vendor/k8s.io/apimachinery/pkg/util/runtime:go_default_library", + "//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library", + "//vendor/k8s.io/client-go/informers/core/v1:go_default_library", + "//vendor/k8s.io/client-go/kubernetes:go_default_library", + "//vendor/k8s.io/client-go/listers/core/v1:go_default_library", + "//vendor/k8s.io/client-go/tools/cache:go_default_library", + "//vendor/k8s.io/client-go/util/workqueue:go_default_library", + ], +) + +go_test( + name = "go_default_test", + srcs = ["pvc_protection_controller_test.go"], + importpath = "k8s.io/kubernetes/pkg/controller/volume/pvcprotection", + library = ":go_default_library", + deps = [ + "//pkg/controller:go_default_library", + "//pkg/volume/util:go_default_library", + "//vendor/github.com/davecgh/go-spew/spew:go_default_library", + "//vendor/github.com/golang/glog:go_default_library", + "//vendor/k8s.io/api/core/v1:go_default_library", + "//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library", + "//vendor/k8s.io/apimachinery/pkg/api/meta:go_default_library", + "//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", + "//vendor/k8s.io/apimachinery/pkg/runtime:go_default_library", + "//vendor/k8s.io/apimachinery/pkg/runtime/schema:go_default_library", + "//vendor/k8s.io/client-go/informers:go_default_library", + "//vendor/k8s.io/client-go/kubernetes/fake:go_default_library", + "//vendor/k8s.io/client-go/testing:go_default_library", + ], +) + +filegroup( + name = "package-srcs", + srcs = glob(["**"]), + tags = ["automanaged"], + visibility = ["//visibility:private"], +) + +filegroup( + name = "all-srcs", + srcs = [":package-srcs"], + tags = ["automanaged"], + visibility = ["//visibility:public"], +) diff --git a/pkg/controller/volume/pvcprotection/pvc_protection_controller.go b/pkg/controller/volume/pvcprotection/pvc_protection_controller.go new file mode 100644 index 000000000000..40bf3e5de5c1 --- /dev/null +++ b/pkg/controller/volume/pvcprotection/pvc_protection_controller.go @@ -0,0 +1,284 @@ +/* +Copyright 2017 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package pvcprotection + +import ( + "fmt" + "time" + + "github.com/golang/glog" + "k8s.io/api/core/v1" + apierrs "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/labels" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/apimachinery/pkg/util/wait" + coreinformers "k8s.io/client-go/informers/core/v1" + clientset "k8s.io/client-go/kubernetes" + corelisters "k8s.io/client-go/listers/core/v1" + "k8s.io/client-go/tools/cache" + "k8s.io/client-go/util/workqueue" + "k8s.io/kubernetes/pkg/controller" + "k8s.io/kubernetes/pkg/util/metrics" + volumeutil "k8s.io/kubernetes/pkg/volume/util" + "k8s.io/kubernetes/pkg/volume/util/volumehelper" +) + +// Controller is controller that removes PVCProtectionFinalizer +// from PVCs that are used by no pods. +type Controller struct { + client clientset.Interface + + pvcLister corelisters.PersistentVolumeClaimLister + pvcListerSynced cache.InformerSynced + + podLister corelisters.PodLister + podListerSynced cache.InformerSynced + + queue workqueue.RateLimitingInterface +} + +// NewPVCProtectionController returns a new *{VCProtectionController. +func NewPVCProtectionController(pvcInformer coreinformers.PersistentVolumeClaimInformer, podInformer coreinformers.PodInformer, cl clientset.Interface) *Controller { + e := &Controller{ + client: cl, + queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "pvcprotection"), + } + if cl != nil && cl.CoreV1().RESTClient().GetRateLimiter() != nil { + metrics.RegisterMetricAndTrackRateLimiterUsage("persistentvolumeclaim_protection_controller", cl.CoreV1().RESTClient().GetRateLimiter()) + } + + e.pvcLister = pvcInformer.Lister() + e.pvcListerSynced = pvcInformer.Informer().HasSynced + pvcInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ + AddFunc: e.pvcAddedUpdated, + UpdateFunc: func(old, new interface{}) { + e.pvcAddedUpdated(new) + }, + }) + + e.podLister = podInformer.Lister() + e.podListerSynced = podInformer.Informer().HasSynced + podInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ + AddFunc: func(obj interface{}) { + e.podAddedDeletedUpdated(obj, false) + }, + DeleteFunc: func(obj interface{}) { + e.podAddedDeletedUpdated(obj, true) + }, + UpdateFunc: func(old, new interface{}) { + e.podAddedDeletedUpdated(new, false) + }, + }) + + return e +} + +// Run runs the controller goroutines. +func (c *Controller) Run(workers int, stopCh <-chan struct{}) { + defer utilruntime.HandleCrash() + defer c.queue.ShutDown() + + glog.Infof("Starting PVC protection controller") + defer glog.Infof("Shutting down PVC protection controller") + + if !controller.WaitForCacheSync("PVC protection", stopCh, c.pvcListerSynced, c.podListerSynced) { + return + } + + for i := 0; i < workers; i++ { + go wait.Until(c.runWorker, time.Second, stopCh) + } + + <-stopCh +} + +func (c *Controller) runWorker() { + for c.processNextWorkItem() { + } +} + +// processNextWorkItem deals with one pvcKey off the queue. It returns false when it's time to quit. +func (c *Controller) processNextWorkItem() bool { + pvcKey, quit := c.queue.Get() + if quit { + return false + } + defer c.queue.Done(pvcKey) + + pvcNamespace, pvcName, err := cache.SplitMetaNamespaceKey(pvcKey.(string)) + if err != nil { + utilruntime.HandleError(fmt.Errorf("Error parsing PVC key %q: %v", pvcKey, err)) + return true + } + + err = c.processPVC(pvcNamespace, pvcName) + if err == nil { + c.queue.Forget(pvcKey) + return true + } + + utilruntime.HandleError(fmt.Errorf("PVC %v failed with : %v", pvcKey, err)) + c.queue.AddRateLimited(pvcKey) + + return true +} + +func (c *Controller) processPVC(pvcNamespace, pvcName string) error { + glog.V(4).Infof("Processing PVC %s/%s", pvcNamespace, pvcName) + startTime := time.Now() + defer func() { + glog.V(4).Infof("Finished processing PVC %s/%s (%v)", pvcNamespace, pvcName, time.Now().Sub(startTime)) + }() + + pvc, err := c.pvcLister.PersistentVolumeClaims(pvcNamespace).Get(pvcName) + if apierrs.IsNotFound(err) { + glog.V(4).Infof("PVC %s/%s not found, ignoring", pvcNamespace, pvcName) + return nil + } + if err != nil { + return err + } + + if volumeutil.IsPVCBeingDeleted(pvc) && volumeutil.IsProtectionFinalizerPresent(pvc) { + // PVC should be deleted. Check if it's used and remove finalizer if + // it's not. + isUsed, err := c.isBeingUsed(pvc) + if err != nil { + return err + } + if !isUsed { + return c.removeFinalizer(pvc) + } + } + + if !volumeutil.IsPVCBeingDeleted(pvc) && !volumeutil.IsProtectionFinalizerPresent(pvc) { + // PVC is not being deleted -> it should have the finalizer. The + // finalizer should be added by admission plugin, this is just to add + // the finalizer to old PVCs that were created before the admission + // plugin was enabled. + return c.addFinalizer(pvc) + } + return nil +} + +func (c *Controller) addFinalizer(pvc *v1.PersistentVolumeClaim) error { + claimClone := pvc.DeepCopy() + volumeutil.AddProtectionFinalizer(claimClone) + _, err := c.client.CoreV1().PersistentVolumeClaims(claimClone.Namespace).Update(claimClone) + if err != nil { + glog.V(3).Infof("Error adding protection finalizer to PVC %s/%s: %v", pvc.Namespace, pvc.Name) + return err + } + glog.V(3).Infof("Added protection finalizer to PVC %s/%s", pvc.Namespace, pvc.Name) + return nil +} + +func (c *Controller) removeFinalizer(pvc *v1.PersistentVolumeClaim) error { + claimClone := pvc.DeepCopy() + volumeutil.RemoveProtectionFinalizer(claimClone) + _, err := c.client.CoreV1().PersistentVolumeClaims(claimClone.Namespace).Update(claimClone) + if err != nil { + glog.V(3).Infof("Error removing protection finalizer from PVC %s/%s: %v", pvc.Namespace, pvc.Name, err) + return err + } + glog.V(3).Infof("Removed protection finalizer from PVC %s/%s", pvc.Namespace, pvc.Name) + return nil +} + +func (c *Controller) isBeingUsed(pvc *v1.PersistentVolumeClaim) (bool, error) { + pods, err := c.podLister.Pods(pvc.Namespace).List(labels.Everything()) + if err != nil { + return false, err + } + for _, pod := range pods { + if pod.Spec.NodeName == "" { + // This pod is not scheduled. We have a predicated in scheduler that + // prevents scheduling pods with deletion timestamp, so we can be + // pretty sure it won't be scheduled in parallel to this check. + // Therefore this pod does not block the PVC from deletion. + glog.V(4).Infof("Skipping unscheduled pod %s when checking PVC %s/%s", pod.Name, pvc.Namespace, pvc.Name) + continue + } + if volumehelper.IsPodTerminated(pod, pod.Status) { + // This pod is being unmounted/detached or is already + // unmounted/detached. It does not block the PVC from deletion. + continue + } + for _, volume := range pod.Spec.Volumes { + if volume.PersistentVolumeClaim == nil { + continue + } + if volume.PersistentVolumeClaim.ClaimName == pvc.Name { + glog.V(2).Infof("Keeping PVC %s/%s, it is used by pod %s/%s", pvc.Namespace, pvc.Name, pod.Namespace, pod.Name) + return true, nil + } + } + } + + glog.V(3).Infof("PVC %s/%s is unused", pvc.Namespace, pvc.Name) + return false, nil +} + +// pvcAddedUpdated reacts to pvc added/updated/deleted events +func (c *Controller) pvcAddedUpdated(obj interface{}) { + pvc, ok := obj.(*v1.PersistentVolumeClaim) + if !ok { + utilruntime.HandleError(fmt.Errorf("PVC informer returned non-PVC object: %#v", obj)) + return + } + key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(pvc) + if err != nil { + utilruntime.HandleError(fmt.Errorf("Couldn't get key for Persistent Volume Claim %#v: %v", pvc, err)) + return + } + glog.V(4).Infof("Got event on PVC %s", key) + + if (!volumeutil.IsPVCBeingDeleted(pvc) && !volumeutil.IsProtectionFinalizerPresent(pvc)) || (volumeutil.IsPVCBeingDeleted(pvc) && volumeutil.IsProtectionFinalizerPresent(pvc)) { + c.queue.Add(key) + } +} + +// podAddedDeletedUpdated reacts to Pod events +func (c *Controller) podAddedDeletedUpdated(obj interface{}, deleted bool) { + pod, ok := obj.(*v1.Pod) + if !ok { + tombstone, ok := obj.(cache.DeletedFinalStateUnknown) + if !ok { + utilruntime.HandleError(fmt.Errorf("Couldn't get object from tombstone %#v", obj)) + return + } + pod, ok = tombstone.Obj.(*v1.Pod) + if !ok { + utilruntime.HandleError(fmt.Errorf("Tombstone contained object that is not a Pod %#v", obj)) + return + } + } + + // Filter out pods that can't help us to remove a finalizer on PVC + if !deleted && !volumehelper.IsPodTerminated(pod, pod.Status) && pod.Spec.NodeName != "" { + return + } + + glog.V(4).Infof("Got event on pod %s/%s", pod.Namespace, pod.Name) + + // Enqueue all PVCs that the pod uses + for _, volume := range pod.Spec.Volumes { + if volume.PersistentVolumeClaim != nil { + c.queue.Add(pod.Namespace + "/" + volume.PersistentVolumeClaim.ClaimName) + } + } +} diff --git a/pkg/controller/volume/pvcprotection/pvc_protection_controller_test.go b/pkg/controller/volume/pvcprotection/pvc_protection_controller_test.go new file mode 100644 index 000000000000..0d7a2f9302c4 --- /dev/null +++ b/pkg/controller/volume/pvcprotection/pvc_protection_controller_test.go @@ -0,0 +1,397 @@ +/* +Copyright 2017 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package pvcprotection + +import ( + "errors" + "reflect" + "testing" + "time" + + "github.com/davecgh/go-spew/spew" + "github.com/golang/glog" + + "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/client-go/informers" + "k8s.io/client-go/kubernetes/fake" + clienttesting "k8s.io/client-go/testing" + "k8s.io/kubernetes/pkg/controller" + volumeutil "k8s.io/kubernetes/pkg/volume/util" +) + +type reaction struct { + verb string + resource string + reactorfn clienttesting.ReactionFunc +} + +const ( + defaultNS = "default" + defaultPVCName = "pvc1" + defaultPodName = "pod1" + defaultNodeName = "node1" +) + +func pod() *v1.Pod { + return &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: defaultPodName, + Namespace: defaultNS, + }, + Spec: v1.PodSpec{ + NodeName: defaultNodeName, + }, + Status: v1.PodStatus{ + Phase: v1.PodPending, + }, + } +} + +func unscheduled(pod *v1.Pod) *v1.Pod { + pod.Spec.NodeName = "" + return pod +} + +func withPVC(pvcName string, pod *v1.Pod) *v1.Pod { + volume := v1.Volume{ + Name: pvcName, + VolumeSource: v1.VolumeSource{ + PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{ + ClaimName: pvcName, + }, + }, + } + pod.Spec.Volumes = append(pod.Spec.Volumes, volume) + return pod +} + +func withEmptyDir(pod *v1.Pod) *v1.Pod { + volume := v1.Volume{ + Name: "emptyDir", + VolumeSource: v1.VolumeSource{ + EmptyDir: &v1.EmptyDirVolumeSource{}, + }, + } + pod.Spec.Volumes = append(pod.Spec.Volumes, volume) + return pod +} + +func withStatus(phase v1.PodPhase, pod *v1.Pod) *v1.Pod { + pod.Status.Phase = phase + return pod +} + +func pvc() *v1.PersistentVolumeClaim { + return &v1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: defaultPVCName, + Namespace: defaultNS, + }, + } +} + +func withProtectionFinalizer(pvc *v1.PersistentVolumeClaim) *v1.PersistentVolumeClaim { + pvc.Finalizers = append(pvc.Finalizers, volumeutil.PVCProtectionFinalizer) + return pvc +} + +func deleted(pvc *v1.PersistentVolumeClaim) *v1.PersistentVolumeClaim { + pvc.DeletionTimestamp = &metav1.Time{} + return pvc +} + +func generateUpdateErrorFunc(t *testing.T, failures int) clienttesting.ReactionFunc { + i := 0 + return func(action clienttesting.Action) (bool, runtime.Object, error) { + i++ + if i <= failures { + // Update fails + update, ok := action.(clienttesting.UpdateAction) + + if !ok { + t.Fatalf("Reactor got non-update action: %+v", action) + } + acc, _ := meta.Accessor(update.GetObject()) + return true, nil, apierrors.NewForbidden(update.GetResource().GroupResource(), acc.GetName(), errors.New("Mock error")) + } + // Update succeeds + return false, nil, nil + } +} + +func TestPVCProtectionController(t *testing.T) { + pvcVer := schema.GroupVersionResource{ + Group: v1.GroupName, + Version: "v1", + Resource: "persistentvolumeclaims", + } + + tests := []struct { + name string + // Object to insert into fake kubeclient before the test starts. + initialObjects []runtime.Object + // Optional client reactors. + reactors []reaction + // PVC event to simulate. This PVC will be automatically added to + // initalObjects. + updatedPVC *v1.PersistentVolumeClaim + // Pod event to simulate. This Pod will be automatically added to + // initalObjects. + updatedPod *v1.Pod + // Pod event to similate. This Pod is *not* added to + // initalObjects. + deletedPod *v1.Pod + // List of expected kubeclient actions that should happen during the + // test. + expectedActions []clienttesting.Action + }{ + // + // PVC events + // + { + name: "PVC without finalizer -> finalizer is added", + updatedPVC: pvc(), + expectedActions: []clienttesting.Action{ + clienttesting.NewUpdateAction(pvcVer, defaultNS, withProtectionFinalizer(pvc())), + }, + }, + { + name: "PVC with finalizer -> no action", + updatedPVC: withProtectionFinalizer(pvc()), + expectedActions: []clienttesting.Action{}, + }, + { + name: "saving PVC finalizer fails -> controller retries", + updatedPVC: pvc(), + reactors: []reaction{ + { + verb: "update", + resource: "persistentvolumeclaims", + reactorfn: generateUpdateErrorFunc(t, 2 /* update fails twice*/), + }, + }, + expectedActions: []clienttesting.Action{ + // This fails + clienttesting.NewUpdateAction(pvcVer, defaultNS, withProtectionFinalizer(pvc())), + // This fails too + clienttesting.NewUpdateAction(pvcVer, defaultNS, withProtectionFinalizer(pvc())), + // This succeeds + clienttesting.NewUpdateAction(pvcVer, defaultNS, withProtectionFinalizer(pvc())), + }, + }, + { + name: "deleted PVC with finalizer -> finalizer is removed", + updatedPVC: deleted(withProtectionFinalizer(pvc())), + expectedActions: []clienttesting.Action{ + clienttesting.NewUpdateAction(pvcVer, defaultNS, deleted(pvc())), + }, + }, + { + name: "finalizer removal fails -> controller retries", + updatedPVC: deleted(withProtectionFinalizer(pvc())), + reactors: []reaction{ + { + verb: "update", + resource: "persistentvolumeclaims", + reactorfn: generateUpdateErrorFunc(t, 2 /* update fails twice*/), + }, + }, + expectedActions: []clienttesting.Action{ + // Fails + clienttesting.NewUpdateAction(pvcVer, defaultNS, deleted(pvc())), + // Fails too + clienttesting.NewUpdateAction(pvcVer, defaultNS, deleted(pvc())), + // Succeeds + clienttesting.NewUpdateAction(pvcVer, defaultNS, deleted(pvc())), + }, + }, + { + name: "deleted PVC with finalizer + pods with the PVC exists -> finalizer is not removed", + initialObjects: []runtime.Object{ + withPVC(defaultPVCName, pod()), + }, + updatedPVC: deleted(withProtectionFinalizer(pvc())), + expectedActions: []clienttesting.Action{}, + }, + { + name: "deleted PVC with finalizer + pods with unrelated PVC and EmptyDir exists -> finalizer is removed", + initialObjects: []runtime.Object{ + withEmptyDir(withPVC("unrelatedPVC", pod())), + }, + updatedPVC: deleted(withProtectionFinalizer(pvc())), + expectedActions: []clienttesting.Action{ + clienttesting.NewUpdateAction(pvcVer, defaultNS, deleted(pvc())), + }, + }, + { + name: "deleted PVC with finalizer + pods with the PVC andis finished -> finalizer is removed", + initialObjects: []runtime.Object{ + withStatus(v1.PodFailed, withPVC(defaultPVCName, pod())), + }, + updatedPVC: deleted(withProtectionFinalizer(pvc())), + expectedActions: []clienttesting.Action{ + clienttesting.NewUpdateAction(pvcVer, defaultNS, deleted(pvc())), + }, + }, + // + // Pod events + // + { + name: "updated running Pod -> no action", + initialObjects: []runtime.Object{ + deleted(withProtectionFinalizer(pvc())), + }, + updatedPod: withStatus(v1.PodRunning, withPVC(defaultPVCName, pod())), + expectedActions: []clienttesting.Action{}, + }, + { + name: "updated finished Pod -> finalizer is removed", + initialObjects: []runtime.Object{ + deleted(withProtectionFinalizer(pvc())), + }, + updatedPod: withStatus(v1.PodSucceeded, withPVC(defaultPVCName, pod())), + expectedActions: []clienttesting.Action{ + clienttesting.NewUpdateAction(pvcVer, defaultNS, deleted(pvc())), + }, + }, + { + name: "updated unscheduled Pod -> finalizer is removed", + initialObjects: []runtime.Object{ + deleted(withProtectionFinalizer(pvc())), + }, + updatedPod: unscheduled(withPVC(defaultPVCName, pod())), + expectedActions: []clienttesting.Action{ + clienttesting.NewUpdateAction(pvcVer, defaultNS, deleted(pvc())), + }, + }, + { + name: "deleted running Pod -> finalizer is removed", + initialObjects: []runtime.Object{ + deleted(withProtectionFinalizer(pvc())), + }, + deletedPod: withStatus(v1.PodRunning, withPVC(defaultPVCName, pod())), + expectedActions: []clienttesting.Action{ + clienttesting.NewUpdateAction(pvcVer, defaultNS, deleted(pvc())), + }, + }, + } + + for _, test := range tests { + // Create client with initial data + objs := test.initialObjects + if test.updatedPVC != nil { + objs = append(objs, test.updatedPVC) + } + if test.updatedPod != nil { + objs = append(objs, test.updatedPod) + } + client := fake.NewSimpleClientset(objs...) + + // Create informers + informers := informers.NewSharedInformerFactory(client, controller.NoResyncPeriodFunc()) + pvcInformer := informers.Core().V1().PersistentVolumeClaims() + podInformer := informers.Core().V1().Pods() + + // Populate the informers with initial objects so the controller can + // Get() and List() it. + for _, obj := range objs { + switch obj.(type) { + case *v1.PersistentVolumeClaim: + pvcInformer.Informer().GetStore().Add(obj) + case *v1.Pod: + podInformer.Informer().GetStore().Add(obj) + default: + t.Fatalf("Unknown initalObject type: %+v", obj) + } + } + + // Add reactor to inject test errors. + for _, reactor := range test.reactors { + client.Fake.PrependReactor(reactor.verb, reactor.resource, reactor.reactorfn) + } + + // Create the controller + ctrl := NewPVCProtectionController(pvcInformer, podInformer, client) + + // Start the test by simulating an event + if test.updatedPVC != nil { + ctrl.pvcAddedUpdated(test.updatedPVC) + } + if test.updatedPod != nil { + ctrl.podAddedDeletedUpdated(test.updatedPod, false) + } + if test.deletedPod != nil { + ctrl.podAddedDeletedUpdated(test.deletedPod, true) + } + + // Process the controller queue until we get expected results + timeout := time.Now().Add(10 * time.Second) + lastReportedActionCount := 0 + for { + if time.Now().After(timeout) { + t.Errorf("Test %q: timed out", test.name) + break + } + if ctrl.queue.Len() > 0 { + glog.V(5).Infof("Test %q: %d events queue, processing one", test.name, ctrl.queue.Len()) + ctrl.processNextWorkItem() + } + if ctrl.queue.Len() > 0 { + // There is still some work in the queue, process it now + continue + } + currentActionCount := len(client.Actions()) + if currentActionCount < len(test.expectedActions) { + // Do not log evey wait, only when the action count changes. + if lastReportedActionCount < currentActionCount { + glog.V(5).Infof("Test %q: got %d actions out of %d, waiting for the rest", test.name, currentActionCount, len(test.expectedActions)) + lastReportedActionCount = currentActionCount + } + // The test expected more to happen, wait for the actions. + // Most probably it's exponential backoff + time.Sleep(10 * time.Millisecond) + continue + } + break + } + actions := client.Actions() + for i, action := range actions { + if len(test.expectedActions) < i+1 { + t.Errorf("Test %q: %d unexpected actions: %+v", test.name, len(actions)-len(test.expectedActions), spew.Sdump(actions[i:])) + break + } + + expectedAction := test.expectedActions[i] + if !reflect.DeepEqual(expectedAction, action) { + t.Errorf("Test %q: action %d\nExpected:\n%s\ngot:\n%s", test.name, i, spew.Sdump(expectedAction), spew.Sdump(action)) + } + } + + if len(test.expectedActions) > len(actions) { + t.Errorf("Test %q: %d additional expected actions", test.name, len(test.expectedActions)-len(actions)) + for _, a := range test.expectedActions[len(actions):] { + t.Logf(" %+v", a) + } + } + + } +} diff --git a/pkg/features/kube_features.go b/pkg/features/kube_features.go index d9bd748dc985..1ed30a62fdaf 100644 --- a/pkg/features/kube_features.go +++ b/pkg/features/kube_features.go @@ -200,6 +200,12 @@ const ( // // Enable Block volume support in containers. BlockVolume utilfeature.Feature = "BlockVolume" + + // owner: @pospispa + // + // alpha: v1.9 + // Postpone deletion of a persistent volume claim in case it is used by a pod + PVCProtection utilfeature.Feature = "PVCProtection" ) func init() { @@ -237,6 +243,7 @@ var defaultKubernetesFeatureGates = map[utilfeature.Feature]utilfeature.FeatureS CSIPersistentVolume: {Default: false, PreRelease: utilfeature.Alpha}, CustomPodDNS: {Default: false, PreRelease: utilfeature.Alpha}, BlockVolume: {Default: false, PreRelease: utilfeature.Alpha}, + PVCProtection: {Default: false, PreRelease: utilfeature.Alpha}, // inherited features from generic apiserver, relisted here to get a conflict if it is changed // unintentionally on either side: diff --git a/pkg/kubelet/volumemanager/populator/BUILD b/pkg/kubelet/volumemanager/populator/BUILD index f8938c1ce3d7..a3bd36cc0577 100644 --- a/pkg/kubelet/volumemanager/populator/BUILD +++ b/pkg/kubelet/volumemanager/populator/BUILD @@ -19,6 +19,7 @@ go_library( "//pkg/kubelet/util/format:go_default_library", "//pkg/kubelet/volumemanager/cache:go_default_library", "//pkg/volume:go_default_library", + "//pkg/volume/util:go_default_library", "//pkg/volume/util/types:go_default_library", "//pkg/volume/util/volumehelper:go_default_library", "//vendor/github.com/golang/glog:go_default_library", diff --git a/pkg/kubelet/volumemanager/populator/desired_state_of_world_populator.go b/pkg/kubelet/volumemanager/populator/desired_state_of_world_populator.go index a9ecaaad69ec..ac762a040fa3 100644 --- a/pkg/kubelet/volumemanager/populator/desired_state_of_world_populator.go +++ b/pkg/kubelet/volumemanager/populator/desired_state_of_world_populator.go @@ -41,6 +41,7 @@ import ( "k8s.io/kubernetes/pkg/kubelet/util/format" "k8s.io/kubernetes/pkg/kubelet/volumemanager/cache" "k8s.io/kubernetes/pkg/volume" + volumeutil "k8s.io/kubernetes/pkg/volume/util" volumetypes "k8s.io/kubernetes/pkg/volume/util/types" "k8s.io/kubernetes/pkg/volume/util/volumehelper" ) @@ -419,7 +420,8 @@ func (dswp *desiredStateOfWorldPopulator) createVolumeSpec( } // getPVCExtractPV fetches the PVC object with the given namespace and name from -// the API server extracts the name of the PV it is pointing to and returns it. +// the API server, checks whether PVC is being deleted, extracts the name of the PV +// it is pointing to and returns it. // An error is returned if the PVC object's phase is not "Bound". func (dswp *desiredStateOfWorldPopulator) getPVCExtractPV( namespace string, claimName string) (string, types.UID, error) { @@ -433,6 +435,23 @@ func (dswp *desiredStateOfWorldPopulator) getPVCExtractPV( err) } + if utilfeature.DefaultFeatureGate.Enabled(features.PVCProtection) { + // Pods that uses a PVC that is being deleted must not be started. + // + // In case an old kubelet is running without this check or some kubelets + // have this feature disabled, the worst that can happen is that such + // pod is scheduled. This was the default behavior in 1.8 and earlier + // and users should not be that surprised. + // It should happen only in very rare case when scheduler schedules + // a pod and user deletes a PVC that's used by it at the same time. + if volumeutil.IsPVCBeingDeleted(pvc) { + return "", "", fmt.Errorf( + "can't start pod because PVC %s/%s is being deleted", + namespace, + claimName) + } + } + if pvc.Status.Phase != v1.ClaimBound || pvc.Spec.VolumeName == "" { return "", "", fmt.Errorf( diff --git a/pkg/volume/util/BUILD b/pkg/volume/util/BUILD index 3c94453fd45d..48a0b0ee6bf3 100644 --- a/pkg/volume/util/BUILD +++ b/pkg/volume/util/BUILD @@ -14,6 +14,7 @@ go_library( "device_util_unsupported.go", "doc.go", "error.go", + "finalizer.go", "fs_unsupported.go", "io_util.go", "metrics.go", @@ -61,6 +62,7 @@ go_library( go_test( name = "go_default_test", srcs = [ + "finalizer_test.go", "util_test.go", ] + select({ "@io_bazel_rules_go//go/platform:linux_amd64": [ @@ -74,6 +76,7 @@ go_test( deps = [ "//pkg/apis/core/install:go_default_library", "//pkg/apis/core/v1/helper:go_default_library", + "//vendor/github.com/davecgh/go-spew/spew:go_default_library", "//vendor/k8s.io/api/core/v1:go_default_library", "//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", "//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library", diff --git a/pkg/volume/util/finalizer.go b/pkg/volume/util/finalizer.go new file mode 100644 index 000000000000..846315450604 --- /dev/null +++ b/pkg/volume/util/finalizer.go @@ -0,0 +1,68 @@ +/* +Copyright 2017 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package util + +import ( + "k8s.io/api/core/v1" +) + +const ( + // Name of finalizer on PVCs that have a running pod. + PVCProtectionFinalizer = "kubernetes.io/pvc-protection" +) + +// IsPVCBeingDeleted returns: +// true: in case PVC is being deleted, i.e. ObjectMeta.DeletionTimestamp is set +// false: in case PVC is not being deleted, i.e. ObjectMeta.DeletionTimestamp is nil +func IsPVCBeingDeleted(pvc *v1.PersistentVolumeClaim) bool { + return pvc.ObjectMeta.DeletionTimestamp != nil +} + +// IsProtectionFinalizerPresent returns true in case PVCProtectionFinalizer is +// present among the pvc.Finalizers +func IsProtectionFinalizerPresent(pvc *v1.PersistentVolumeClaim) bool { + for _, finalizer := range pvc.Finalizers { + if finalizer == PVCProtectionFinalizer { + return true + } + } + return false +} + +// RemoveProtectionFinalizer returns pvc without PVCProtectionFinalizer in case +// it's present in pvc.Finalizers. It expects that pvc is writable (i.e. is not +// informer's cached copy.) +func RemoveProtectionFinalizer(pvc *v1.PersistentVolumeClaim) { + newFinalizers := make([]string, 0) + for _, finalizer := range pvc.Finalizers { + if finalizer != PVCProtectionFinalizer { + newFinalizers = append(newFinalizers, finalizer) + } + } + if len(newFinalizers) == 0 { + // Sanitize for unit tests so we don't need to distinguish empty array + // and nil. + newFinalizers = nil + } + pvc.Finalizers = newFinalizers +} + +// AddProtectionFinalizer adds PVCProtectionFinalizer to pvc. It expects that +// pvc is writable (i.e. is not informer's cached copy.) +func AddProtectionFinalizer(pvc *v1.PersistentVolumeClaim) { + pvc.Finalizers = append(pvc.Finalizers, PVCProtectionFinalizer) +} diff --git a/pkg/volume/util/finalizer_test.go b/pkg/volume/util/finalizer_test.go new file mode 100644 index 000000000000..210ea3b3e637 --- /dev/null +++ b/pkg/volume/util/finalizer_test.go @@ -0,0 +1,231 @@ +/* +Copyright 2017 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package util + +import ( + "reflect" + "testing" + "time" + + "github.com/davecgh/go-spew/spew" + "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +var ( + arbitraryTime = metav1.Date(2017, 11, 1, 14, 28, 47, 0, time.FixedZone("CET", 0)) +) + +func TestIsPVCBeingDeleted(t *testing.T) { + tests := []struct { + pvc *v1.PersistentVolumeClaim + want bool + }{ + { + pvc: &v1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + DeletionTimestamp: nil, + }, + }, + want: false, + }, + { + pvc: &v1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + DeletionTimestamp: &arbitraryTime, + }, + }, + want: true, + }, + } + for _, tt := range tests { + if got := IsPVCBeingDeleted(tt.pvc); got != tt.want { + t.Errorf("IsPVCBeingDeleted(%v) = %v WANT %v", tt.pvc, got, tt.want) + } + } +} + +func TestAddProtectionFinalizer(t *testing.T) { + tests := []struct { + name string + pvc *v1.PersistentVolumeClaim + want *v1.PersistentVolumeClaim + }{ + { + "PVC without finalizer", + &v1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pvc", + Namespace: "ns", + }, + }, + &v1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pvc", + Namespace: "ns", + Finalizers: []string{PVCProtectionFinalizer}, + }, + }, + }, + { + "PVC with some finalizers", + &v1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pvc", + Namespace: "ns", + Finalizers: []string{"1", "2", "3", PVCProtectionFinalizer + "suffix", "prefix" + PVCProtectionFinalizer}, + }, + }, + &v1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pvc", + Namespace: "ns", + Finalizers: []string{"1", "2", "3", PVCProtectionFinalizer + "suffix", "prefix" + PVCProtectionFinalizer, PVCProtectionFinalizer}, + }, + }, + }, + } + for _, test := range tests { + got := test.pvc.DeepCopy() + AddProtectionFinalizer(got) + if !reflect.DeepEqual(got, test.want) { + t.Errorf("Test %q: expected:\n%s\n\ngot:\n%s", test.name, spew.Sdump(test.want), spew.Sdump(got)) + } + } +} + +func TestRemoveProtectionFinalizer(t *testing.T) { + tests := []struct { + name string + pvc *v1.PersistentVolumeClaim + want *v1.PersistentVolumeClaim + }{ + { + "PVC without finalizer", + &v1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pvc", + Namespace: "ns", + }, + }, + &v1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pvc", + Namespace: "ns", + }, + }, + }, + { + "PVC with finalizer", + &v1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pvc", + Namespace: "ns", + Finalizers: []string{PVCProtectionFinalizer}, + }, + }, + &v1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pvc", + Namespace: "ns", + }, + }, + }, + { + "PVC with many finalizers", + &v1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pvc", + Namespace: "ns", + Finalizers: []string{"1", "2", "3", PVCProtectionFinalizer + "suffix", "prefix" + PVCProtectionFinalizer, PVCProtectionFinalizer}, + }, + }, + &v1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pvc", + Namespace: "ns", + Finalizers: []string{"1", "2", "3", PVCProtectionFinalizer + "suffix", "prefix" + PVCProtectionFinalizer}, + }, + }, + }, + } + for _, test := range tests { + got := test.pvc.DeepCopy() + RemoveProtectionFinalizer(got) + if !reflect.DeepEqual(got, test.want) { + t.Errorf("Test %q: expected:\n%s\n\ngot:\n%s", test.name, spew.Sdump(test.want), spew.Sdump(got)) + } + } +} + +func TestIsProtectionFinalizerPresent(t *testing.T) { + tests := []struct { + name string + pvc *v1.PersistentVolumeClaim + want bool + }{ + { + "PVC without finalizer", + &v1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pvc", + Namespace: "ns", + }, + }, + false, + }, + { + "PVC with many unrelated finalizers", + &v1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pvc", + Namespace: "ns", + Finalizers: []string{"1", "2", "3", PVCProtectionFinalizer + "suffix", "prefix" + PVCProtectionFinalizer}, + }, + }, + false, + }, + { + "PVC with many finalizers", + &v1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pvc", + Namespace: "ns", + Finalizers: []string{"1", "2", "3", PVCProtectionFinalizer + "suffix", "prefix" + PVCProtectionFinalizer, PVCProtectionFinalizer}, + }, + }, + true, + }, + { + "PVC with finalizer", + &v1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pvc", + Namespace: "ns", + Finalizers: []string{PVCProtectionFinalizer}, + }, + }, + true, + }, + } + for _, test := range tests { + got := IsProtectionFinalizerPresent(test.pvc) + if got != test.want { + t.Errorf("Test %q: expected %v, got %v", test.name, test.want, got) + } + } +} diff --git a/plugin/BUILD b/plugin/BUILD index 764a8e3283f3..03b74856108d 100644 --- a/plugin/BUILD +++ b/plugin/BUILD @@ -29,6 +29,7 @@ filegroup( "//plugin/pkg/admission/noderestriction:all-srcs", "//plugin/pkg/admission/persistentvolume/label:all-srcs", "//plugin/pkg/admission/persistentvolume/resize:all-srcs", + "//plugin/pkg/admission/persistentvolumeclaim/pvcprotection:all-srcs", "//plugin/pkg/admission/podnodeselector:all-srcs", "//plugin/pkg/admission/podpreset:all-srcs", "//plugin/pkg/admission/podtolerationrestriction:all-srcs", diff --git a/plugin/pkg/admission/persistentvolumeclaim/pvcprotection/BUILD b/plugin/pkg/admission/persistentvolumeclaim/pvcprotection/BUILD new file mode 100644 index 000000000000..e13f63e59349 --- /dev/null +++ b/plugin/pkg/admission/persistentvolumeclaim/pvcprotection/BUILD @@ -0,0 +1,51 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") + +go_library( + name = "go_default_library", + srcs = ["admission.go"], + importpath = "k8s.io/kubernetes/plugin/pkg/admission/persistentvolumeclaim/pvcprotection", + visibility = ["//visibility:public"], + deps = [ + "//pkg/apis/core:go_default_library", + "//pkg/client/informers/informers_generated/internalversion:go_default_library", + "//pkg/client/listers/core/internalversion:go_default_library", + "//pkg/features:go_default_library", + "//pkg/kubeapiserver/admission:go_default_library", + "//pkg/volume/util:go_default_library", + "//vendor/github.com/golang/glog:go_default_library", + "//vendor/k8s.io/apiserver/pkg/admission:go_default_library", + "//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library", + ], +) + +go_test( + name = "go_default_test", + srcs = ["admission_test.go"], + importpath = "k8s.io/kubernetes/plugin/pkg/admission/persistentvolumeclaim/pvcprotection", + library = ":go_default_library", + deps = [ + "//pkg/apis/core:go_default_library", + "//pkg/client/informers/informers_generated/internalversion:go_default_library", + "//pkg/controller:go_default_library", + "//pkg/volume/util:go_default_library", + "//vendor/github.com/davecgh/go-spew/spew:go_default_library", + "//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", + "//vendor/k8s.io/apimachinery/pkg/runtime:go_default_library", + "//vendor/k8s.io/apiserver/pkg/admission:go_default_library", + "//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library", + ], +) + +filegroup( + name = "package-srcs", + srcs = glob(["**"]), + tags = ["automanaged"], + visibility = ["//visibility:private"], +) + +filegroup( + name = "all-srcs", + srcs = [":package-srcs"], + tags = ["automanaged"], + visibility = ["//visibility:public"], +) diff --git a/plugin/pkg/admission/persistentvolumeclaim/pvcprotection/admission.go b/plugin/pkg/admission/persistentvolumeclaim/pvcprotection/admission.go new file mode 100644 index 000000000000..218bca4b829c --- /dev/null +++ b/plugin/pkg/admission/persistentvolumeclaim/pvcprotection/admission.go @@ -0,0 +1,111 @@ +/* +Copyright 2017 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package pvcprotection + +import ( + "fmt" + "io" + + "github.com/golang/glog" + + admission "k8s.io/apiserver/pkg/admission" + "k8s.io/apiserver/pkg/util/feature" + api "k8s.io/kubernetes/pkg/apis/core" + informers "k8s.io/kubernetes/pkg/client/informers/informers_generated/internalversion" + corelisters "k8s.io/kubernetes/pkg/client/listers/core/internalversion" + "k8s.io/kubernetes/pkg/features" + kubeapiserveradmission "k8s.io/kubernetes/pkg/kubeapiserver/admission" + volumeutil "k8s.io/kubernetes/pkg/volume/util" +) + +const ( + // PluginName is the name of this admission controller plugin + PluginName = "PVCProtection" +) + +// Register registers a plugin +func Register(plugins *admission.Plugins) { + plugins.Register(PluginName, func(config io.Reader) (admission.Interface, error) { + plugin := newPlugin() + return plugin, nil + }) +} + +// pvcProtectionPlugin holds state for and implements the admission plugin. +type pvcProtectionPlugin struct { + *admission.Handler + lister corelisters.PersistentVolumeClaimLister +} + +var _ admission.Interface = &pvcProtectionPlugin{} +var _ = kubeapiserveradmission.WantsInternalKubeInformerFactory(&pvcProtectionPlugin{}) + +// newPlugin creates a new admission plugin. +func newPlugin() *pvcProtectionPlugin { + return &pvcProtectionPlugin{ + Handler: admission.NewHandler(admission.Create), + } +} + +func (c *pvcProtectionPlugin) SetInternalKubeInformerFactory(f informers.SharedInformerFactory) { + informer := f.Core().InternalVersion().PersistentVolumeClaims() + c.lister = informer.Lister() + c.SetReadyFunc(informer.Informer().HasSynced) +} + +// ValidateInitialization ensures lister is set. +func (c *pvcProtectionPlugin) ValidateInitialization() error { + if c.lister == nil { + return fmt.Errorf("missing lister") + } + return nil +} + +// Admit sets finalizer on all PVCs. The finalizer is removed by +// PVCProtectionController when it's not referenced by any pod. +// +// This prevents users from deleting a PVC that's used by a running pod. +func (c *pvcProtectionPlugin) Admit(a admission.Attributes) error { + if !feature.DefaultFeatureGate.Enabled(features.PVCProtection) { + return nil + } + + if a.GetResource().GroupResource() != api.Resource("persistentvolumeclaims") { + return nil + } + + if len(a.GetSubresource()) != 0 { + return nil + } + + pvc, ok := a.GetObject().(*api.PersistentVolumeClaim) + // if we can't convert then we don't handle this object so just return + if !ok { + return nil + } + + for _, f := range pvc.Finalizers { + if f == volumeutil.PVCProtectionFinalizer { + // Finalizer is already present, nothing to do + return nil + } + } + + glog.V(4).Infof("adding PVC protection finalizer to %s/%s", pvc.Namespace, pvc.Name) + pvc.Finalizers = append(pvc.Finalizers, volumeutil.PVCProtectionFinalizer) + return nil +} diff --git a/plugin/pkg/admission/persistentvolumeclaim/pvcprotection/admission_test.go b/plugin/pkg/admission/persistentvolumeclaim/pvcprotection/admission_test.go new file mode 100644 index 000000000000..0815c40615b8 --- /dev/null +++ b/plugin/pkg/admission/persistentvolumeclaim/pvcprotection/admission_test.go @@ -0,0 +1,106 @@ +/* +Copyright 2017 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package pvcprotection + +import ( + "fmt" + "reflect" + "testing" + + "github.com/davecgh/go-spew/spew" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apiserver/pkg/admission" + "k8s.io/apiserver/pkg/util/feature" + api "k8s.io/kubernetes/pkg/apis/core" + informers "k8s.io/kubernetes/pkg/client/informers/informers_generated/internalversion" + "k8s.io/kubernetes/pkg/controller" + volumeutil "k8s.io/kubernetes/pkg/volume/util" +) + +func TestAdmit(t *testing.T) { + claim := &api.PersistentVolumeClaim{ + TypeMeta: metav1.TypeMeta{ + Kind: "PersistentVolumeClaim", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "claim", + Namespace: "ns", + }, + } + claimWithFinalizer := claim.DeepCopy() + claimWithFinalizer.Finalizers = []string{volumeutil.PVCProtectionFinalizer} + + tests := []struct { + name string + object runtime.Object + expectedObject runtime.Object + featureEnabled bool + }{ + { + "create -> add finalizer", + claim, + claimWithFinalizer, + true, + }, + { + "finalizer already exists -> no new finalizer", + claimWithFinalizer, + claimWithFinalizer, + true, + }, + { + "disabled feature -> no finalizer", + claim, + claim, + false, + }, + } + + ctrl := newPlugin() + informerFactory := informers.NewSharedInformerFactory(nil, controller.NoResyncPeriodFunc()) + ctrl.SetInternalKubeInformerFactory(informerFactory) + + for _, test := range tests { + feature.DefaultFeatureGate.Set(fmt.Sprintf("PVCProtection=%v", test.featureEnabled)) + obj := test.object.DeepCopyObject() + attrs := admission.NewAttributesRecord( + obj, // new object + obj.DeepCopyObject(), // old object, copy to be sure it's not modified + api.Kind("PersistentVolumeClaim").WithVersion("version"), + claim.Namespace, + claim.Name, + api.Resource("persistentvolumeclaims").WithVersion("version"), + "", // subresource + admission.Create, + nil, // userInfo + ) + + err := ctrl.Admit(attrs) + if err != nil { + t.Errorf("Test %q: got unexpected error: %v", test.name, err) + } + if !reflect.DeepEqual(test.expectedObject, obj) { + t.Errorf("Test %q: Expected object:\n%s\ngot:\n%s", test.name, spew.Sdump(test.expectedObject), spew.Sdump(obj)) + } + } + + // Disable the feature for rest of the tests. + // TODO: remove after alpha + feature.DefaultFeatureGate.Set("PVCProtection=false") +} diff --git a/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/controller_policy.go b/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/controller_policy.go index f1270f611b78..cc0813f5de1f 100644 --- a/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/controller_policy.go +++ b/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/controller_policy.go @@ -315,6 +315,16 @@ func buildControllerRoles() ([]rbac.ClusterRole, []rbac.ClusterRoleBinding) { eventsRule(), }, }) + if utilfeature.DefaultFeatureGate.Enabled(features.PVCProtection) { + addControllerRole(&controllerRoles, &controllerRoleBindings, rbac.ClusterRole{ + ObjectMeta: metav1.ObjectMeta{Name: saRolePrefix + "pvc-protection-controller"}, + Rules: []rbac.PolicyRule{ + rbac.NewRule("get", "list", "watch", "update").Groups(legacyGroup).Resources("persistentvolumeclaims").RuleOrDie(), + rbac.NewRule("list", "watch", "get").Groups(legacyGroup).Resources("pods").RuleOrDie(), + eventsRule(), + }, + }) + } return controllerRoles, controllerRoleBindings }