diff --git a/README.md b/README.md index 364a80005..92148220e 100644 --- a/README.md +++ b/README.md @@ -134,7 +134,9 @@ Read more about how to install the example webhook [here](deploy/kubernetes/webh * `--retry-interval-start`: Initial retry interval of failed volume snapshot creation or deletion. It doubles with each failure, up to retry-interval-max. Default value is 1 second. -*`--retry-interval-max`: Maximum retry interval of failed volume snapshot creation or deletion. Default value is 5 minutes. +* `--retry-interval-max`: Maximum retry interval of failed volume snapshot creation or deletion. Default value is 5 minutes. + +* `--enable-distributed-snapshotting` : Enables each node to handle snapshots for the volumes local to that node. Off by default. It should be set to true only if `--node-deployment` parameter for the csi external snapshotter sidecar is set to true. #### Other recognized arguments * `--kubeconfig `: Path to Kubernetes client configuration that the snapshot controller uses to connect to Kubernetes API server. When omitted, default token provided by Kubernetes will be used. This option is useful only when the snapshot controller does not run as a Kubernetes pod, e.g. for debugging. @@ -172,11 +174,11 @@ Read more about how to install the example webhook [here](deploy/kubernetes/webh * `--worker-threads`: Number of worker threads for running create snapshot and delete snapshot operations. Default value is 10. -* `--node-deployment`: Enables deploying the sidecar controller together with a CSI driver on nodes to manage node-local volumes. Off by default. +* `--node-deployment`: Enables deploying the sidecar controller together with a CSI driver on nodes to manage node-local volumes. Off by default. This should be set to true along with the `--enable-distributed-snapshotting` in the snapshot controller parameters to make use of distributed snapshotting. * `--retry-interval-start`: Initial retry interval of failed volume snapshot creation or deletion. It doubles with each failure, up to retry-interval-max. Default value is 1 second. -*`--retry-interval-max`: Maximum retry interval of failed volume snapshot creation or deletion. Default value is 5 minutes. +* `--retry-interval-max`: Maximum retry interval of failed volume snapshot creation or deletion. Default value is 5 minutes. #### Other recognized arguments * `--kubeconfig `: Path to Kubernetes client configuration that the CSI external-snapshotter uses to connect to Kubernetes API server. When omitted, default token provided by Kubernetes will be used. This option is useful only when the external-snapshotter does not run as a Kubernetes pod, e.g. for debugging. diff --git a/cmd/csi-snapshotter/main.go b/cmd/csi-snapshotter/main.go index e27c18991..1439c0417 100644 --- a/cmd/csi-snapshotter/main.go +++ b/cmd/csi-snapshotter/main.go @@ -30,7 +30,6 @@ import ( "google.golang.org/grpc" - "github.com/spf13/pflag" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/client-go/kubernetes" diff --git a/cmd/snapshot-controller/main.go b/cmd/snapshot-controller/main.go index c59115574..f7a9eac20 100644 --- a/cmd/snapshot-controller/main.go +++ b/cmd/snapshot-controller/main.go @@ -64,10 +64,11 @@ var ( kubeAPIQPS = flag.Float64("kube-api-qps", 5, "QPS to use while communicating with the kubernetes apiserver. Defaults to 5.0.") kubeAPIBurst = flag.Int("kube-api-burst", 10, "Burst to use while communicating with the kubernetes apiserver. Defaults to 10.") - httpEndpoint = flag.String("http-endpoint", "", "The TCP network address where the HTTP server for diagnostics, including metrics, will listen (example: :8080). The default is empty string, which means the server is disabled.") - metricsPath = flag.String("metrics-path", "/metrics", "The HTTP path where prometheus metrics will be exposed. Default is `/metrics`.") - retryIntervalStart = flag.Duration("retry-interval-start", time.Second, "Initial retry interval of failed volume snapshot creation or deletion. It doubles with each failure, up to retry-interval-max. Default is 1 second.") - retryIntervalMax = flag.Duration("retry-interval-max", 5*time.Minute, "Maximum retry interval of failed volume snapshot creation or deletion. Default is 5 minutes.") + httpEndpoint = flag.String("http-endpoint", "", "The TCP network address where the HTTP server for diagnostics, including metrics, will listen (example: :8080). The default is empty string, which means the server is disabled.") + metricsPath = flag.String("metrics-path", "/metrics", "The HTTP path where prometheus metrics will be exposed. Default is `/metrics`.") + retryIntervalStart = flag.Duration("retry-interval-start", time.Second, "Initial retry interval of failed volume snapshot creation or deletion. It doubles with each failure, up to retry-interval-max. Default is 1 second.") + retryIntervalMax = flag.Duration("retry-interval-max", 5*time.Minute, "Maximum retry interval of failed volume snapshot creation or deletion. Default is 5 minutes.") + enableDistributedSnapshotting = flag.Bool("enable-distributed-snapshotting", false, "Enables each node to handle snapshotting for the local volumes created on that node") ) var ( @@ -178,6 +179,7 @@ func main() { *resyncPeriod, workqueue.NewItemExponentialFailureRateLimiter(*retryIntervalStart, *retryIntervalMax), workqueue.NewItemExponentialFailureRateLimiter(*retryIntervalStart, *retryIntervalMax), + *enableDistributedSnapshotting, ) if err := ensureCustomResourceDefinitionsExist(snapClient); err != nil { diff --git a/deploy/kubernetes/snapshot-controller/rbac-snapshot-controller.yaml b/deploy/kubernetes/snapshot-controller/rbac-snapshot-controller.yaml index acf206441..abdbdd823 100644 --- a/deploy/kubernetes/snapshot-controller/rbac-snapshot-controller.yaml +++ b/deploy/kubernetes/snapshot-controller/rbac-snapshot-controller.yaml @@ -44,7 +44,7 @@ rules: - apiGroups: ["snapshot.storage.k8s.io"] resources: ["volumesnapshots/status"] verbs: ["update", "patch"] - # Enable this RBAC rule only when using distributed snapshotting, i.e. when the node-deployment flag is set to true + # Enable this RBAC rule only when using distributed snapshotting, i.e. when the enable-distributed-snapshotting flag is set to true # - apiGroups: [""] # resources: ["nodes"] # verbs: ["get", "list", "watch"] diff --git a/go.mod b/go.mod index 58ccd85cb..25b2243bc 100644 --- a/go.mod +++ b/go.mod @@ -17,7 +17,6 @@ require ( github.com/prometheus/client_model v0.2.0 github.com/prometheus/common v0.28.0 github.com/spf13/cobra v1.2.1 - github.com/spf13/pflag v1.0.5 golang.org/x/oauth2 v0.0.0-20210819190943-2bc19b11175f // indirect google.golang.org/appengine v1.6.7 // indirect google.golang.org/grpc v1.40.0 @@ -47,6 +46,7 @@ require ( github.com/modern-go/reflect2 v1.0.2 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/prometheus/procfs v0.6.0 // indirect + github.com/spf13/pflag v1.0.5 // indirect golang.org/x/net v0.0.0-20210825183410-e898025ed96a // indirect golang.org/x/sys v0.0.0-20210831042530-f4d43177bf5e // indirect golang.org/x/term v0.0.0-20210615171337-6886f2dfbf5b // indirect diff --git a/pkg/common-controller/framework_test.go b/pkg/common-controller/framework_test.go index 78860c07a..b38875e45 100644 --- a/pkg/common-controller/framework_test.go +++ b/pkg/common-controller/framework_test.go @@ -842,6 +842,7 @@ func newTestController(kubeClient kubernetes.Interface, clientset clientset.Inte 60*time.Second, workqueue.NewItemExponentialFailureRateLimiter(1*time.Millisecond, 1*time.Minute), workqueue.NewItemExponentialFailureRateLimiter(1*time.Millisecond, 1*time.Minute), + false, ) ctrl.eventRecorder = record.NewFakeRecorder(1000) diff --git a/pkg/common-controller/snapshot_controller.go b/pkg/common-controller/snapshot_controller.go index ac45219bd..36895cf6c 100644 --- a/pkg/common-controller/snapshot_controller.go +++ b/pkg/common-controller/snapshot_controller.go @@ -672,13 +672,15 @@ func (ctrl *csiSnapshotCommonController) createSnapshotContent(snapshot *crdv1.V }, } - nodeName, err := ctrl.getManagedByNode(volume) - if err != nil { - return nil, err - } - if nodeName != "" { - snapshotContent.Labels = map[string]string{ - utils.VolumeSnapshotContentManagedByLabel: nodeName, + if ctrl.enableDistributedSnapshotting { + nodeName, err := ctrl.getManagedByNode(volume) + if err != nil { + return nil, err + } + if nodeName != "" { + snapshotContent.Labels = map[string]string{ + utils.VolumeSnapshotContentManagedByLabel: nodeName, + } } } @@ -1677,7 +1679,7 @@ func (ctrl *csiSnapshotCommonController) getManagedByNode(pv *v1.PersistentVolum nodes, err := ctrl.client.CoreV1().Nodes().List(context.Background(), metav1.ListOptions{}) if err != nil { klog.Errorf("failed to get the list of nodes: %q", err) - return "", nil + return "", err } for _, node := range nodes.Items { diff --git a/pkg/common-controller/snapshot_controller_base.go b/pkg/common-controller/snapshot_controller_base.go index 0d5b46d0b..6f06d7697 100644 --- a/pkg/common-controller/snapshot_controller_base.go +++ b/pkg/common-controller/snapshot_controller_base.go @@ -64,6 +64,8 @@ type csiSnapshotCommonController struct { metricsManager metrics.MetricsManager resyncPeriod time.Duration + + enableDistributedSnapshotting bool } // NewCSISnapshotController returns a new *csiSnapshotCommonController @@ -78,6 +80,7 @@ func NewCSISnapshotCommonController( resyncPeriod time.Duration, snapshotRateLimiter workqueue.RateLimiter, contentRateLimiter workqueue.RateLimiter, + enableDistributedSnapshotting bool, ) *csiSnapshotCommonController { broadcaster := record.NewBroadcaster() broadcaster.StartLogging(klog.Infof) @@ -124,6 +127,7 @@ func NewCSISnapshotCommonController( ctrl.classLister = volumeSnapshotClassInformer.Lister() ctrl.classListerSynced = volumeSnapshotClassInformer.Informer().HasSynced + ctrl.enableDistributedSnapshotting = enableDistributedSnapshotting return ctrl } diff --git a/vendor/modules.txt b/vendor/modules.txt index 36a7236fb..ffa5a4d88 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -638,7 +638,7 @@ k8s.io/client-go/util/workqueue ## explicit; go 1.16 k8s.io/component-base/metrics k8s.io/component-base/version -# k8s.io/component-helpers v0.22.1 => k8s.io/component-helpers v0.23.0 +# k8s.io/component-helpers v0.23.0 => k8s.io/component-helpers v0.23.0 ## explicit; go 1.16 k8s.io/component-helpers/scheduling/corev1 k8s.io/component-helpers/scheduling/corev1/nodeaffinity