Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Add a flag in the common controller for distributed snapshotting
  • Loading branch information
nearora-msft committed Dec 21, 2021
1 parent ce5165f commit b5680b9
Show file tree
Hide file tree
Showing 9 changed files with 31 additions and 21 deletions.
8 changes: 5 additions & 3 deletions README.md
Expand Up @@ -134,7 +134,9 @@ Read more about how to install the example webhook [here](deploy/kubernetes/webh

* `--retry-interval-start`: Initial retry interval of failed volume snapshot creation or deletion. It doubles with each failure, up to retry-interval-max. Default value is 1 second.

*`--retry-interval-max`: Maximum retry interval of failed volume snapshot creation or deletion. Default value is 5 minutes.
* `--retry-interval-max`: Maximum retry interval of failed volume snapshot creation or deletion. Default value is 5 minutes.

* `--enable-distributed-snapshotting` : Enables each node to handle snapshots for the volumes local to that node. Off by default. It should be set to true only if `--node-deployment` parameter for the csi external snapshotter sidecar is set to true.

#### Other recognized arguments
* `--kubeconfig <path>`: Path to Kubernetes client configuration that the snapshot controller uses to connect to Kubernetes API server. When omitted, default token provided by Kubernetes will be used. This option is useful only when the snapshot controller does not run as a Kubernetes pod, e.g. for debugging.
Expand Down Expand Up @@ -172,11 +174,11 @@ Read more about how to install the example webhook [here](deploy/kubernetes/webh

* `--worker-threads`: Number of worker threads for running create snapshot and delete snapshot operations. Default value is 10.

* `--node-deployment`: Enables deploying the sidecar controller together with a CSI driver on nodes to manage node-local volumes. Off by default.
* `--node-deployment`: Enables deploying the sidecar controller together with a CSI driver on nodes to manage node-local volumes. Off by default. This should be set to true along with the `--enable-distributed-snapshotting` in the snapshot controller parameters to make use of distributed snapshotting.

* `--retry-interval-start`: Initial retry interval of failed volume snapshot creation or deletion. It doubles with each failure, up to retry-interval-max. Default value is 1 second.

*`--retry-interval-max`: Maximum retry interval of failed volume snapshot creation or deletion. Default value is 5 minutes.
* `--retry-interval-max`: Maximum retry interval of failed volume snapshot creation or deletion. Default value is 5 minutes.
#### Other recognized arguments
* `--kubeconfig <path>`: Path to Kubernetes client configuration that the CSI external-snapshotter uses to connect to Kubernetes API server. When omitted, default token provided by Kubernetes will be used. This option is useful only when the external-snapshotter does not run as a Kubernetes pod, e.g. for debugging.

Expand Down
1 change: 0 additions & 1 deletion cmd/csi-snapshotter/main.go
Expand Up @@ -30,7 +30,6 @@ import (

"google.golang.org/grpc"

"github.com/spf13/pflag"
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/client-go/kubernetes"
Expand Down
10 changes: 6 additions & 4 deletions cmd/snapshot-controller/main.go
Expand Up @@ -64,10 +64,11 @@ var (
kubeAPIQPS = flag.Float64("kube-api-qps", 5, "QPS to use while communicating with the kubernetes apiserver. Defaults to 5.0.")
kubeAPIBurst = flag.Int("kube-api-burst", 10, "Burst to use while communicating with the kubernetes apiserver. Defaults to 10.")

httpEndpoint = flag.String("http-endpoint", "", "The TCP network address where the HTTP server for diagnostics, including metrics, will listen (example: :8080). The default is empty string, which means the server is disabled.")
metricsPath = flag.String("metrics-path", "/metrics", "The HTTP path where prometheus metrics will be exposed. Default is `/metrics`.")
retryIntervalStart = flag.Duration("retry-interval-start", time.Second, "Initial retry interval of failed volume snapshot creation or deletion. It doubles with each failure, up to retry-interval-max. Default is 1 second.")
retryIntervalMax = flag.Duration("retry-interval-max", 5*time.Minute, "Maximum retry interval of failed volume snapshot creation or deletion. Default is 5 minutes.")
httpEndpoint = flag.String("http-endpoint", "", "The TCP network address where the HTTP server for diagnostics, including metrics, will listen (example: :8080). The default is empty string, which means the server is disabled.")
metricsPath = flag.String("metrics-path", "/metrics", "The HTTP path where prometheus metrics will be exposed. Default is `/metrics`.")
retryIntervalStart = flag.Duration("retry-interval-start", time.Second, "Initial retry interval of failed volume snapshot creation or deletion. It doubles with each failure, up to retry-interval-max. Default is 1 second.")
retryIntervalMax = flag.Duration("retry-interval-max", 5*time.Minute, "Maximum retry interval of failed volume snapshot creation or deletion. Default is 5 minutes.")
enableDistributedSnapshotting = flag.Bool("enable-distributed-snapshotting", false, "Enables each node to handle snapshotting for the local volumes created on that node")
)

var (
Expand Down Expand Up @@ -178,6 +179,7 @@ func main() {
*resyncPeriod,
workqueue.NewItemExponentialFailureRateLimiter(*retryIntervalStart, *retryIntervalMax),
workqueue.NewItemExponentialFailureRateLimiter(*retryIntervalStart, *retryIntervalMax),
*enableDistributedSnapshotting,
)

if err := ensureCustomResourceDefinitionsExist(snapClient); err != nil {
Expand Down
Expand Up @@ -44,7 +44,7 @@ rules:
- apiGroups: ["snapshot.storage.k8s.io"]
resources: ["volumesnapshots/status"]
verbs: ["update", "patch"]
# Enable this RBAC rule only when using distributed snapshotting, i.e. when the node-deployment flag is set to true
# Enable this RBAC rule only when using distributed snapshotting, i.e. when the enable-distributed-snapshotting flag is set to true
# - apiGroups: [""]
# resources: ["nodes"]
# verbs: ["get", "list", "watch"]
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Expand Up @@ -17,7 +17,6 @@ require (
github.com/prometheus/client_model v0.2.0
github.com/prometheus/common v0.28.0
github.com/spf13/cobra v1.2.1
github.com/spf13/pflag v1.0.5
golang.org/x/oauth2 v0.0.0-20210819190943-2bc19b11175f // indirect
google.golang.org/appengine v1.6.7 // indirect
google.golang.org/grpc v1.40.0
Expand Down Expand Up @@ -47,6 +46,7 @@ require (
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/prometheus/procfs v0.6.0 // indirect
github.com/spf13/pflag v1.0.5 // indirect
golang.org/x/net v0.0.0-20210825183410-e898025ed96a // indirect
golang.org/x/sys v0.0.0-20210831042530-f4d43177bf5e // indirect
golang.org/x/term v0.0.0-20210615171337-6886f2dfbf5b // indirect
Expand Down
1 change: 1 addition & 0 deletions pkg/common-controller/framework_test.go
Expand Up @@ -842,6 +842,7 @@ func newTestController(kubeClient kubernetes.Interface, clientset clientset.Inte
60*time.Second,
workqueue.NewItemExponentialFailureRateLimiter(1*time.Millisecond, 1*time.Minute),
workqueue.NewItemExponentialFailureRateLimiter(1*time.Millisecond, 1*time.Minute),
false,
)

ctrl.eventRecorder = record.NewFakeRecorder(1000)
Expand Down
22 changes: 12 additions & 10 deletions pkg/common-controller/snapshot_controller.go
Expand Up @@ -29,7 +29,7 @@ import (
"k8s.io/apimachinery/pkg/labels"
"k8s.io/client-go/kubernetes/scheme"
ref "k8s.io/client-go/tools/reference"
corev1 "k8s.io/component-helpers/scheduling/corev1"
corev1helpers "k8s.io/component-helpers/scheduling/corev1"
klog "k8s.io/klog/v2"

crdv1 "github.com/kubernetes-csi/external-snapshotter/client/v4/apis/volumesnapshot/v1"
Expand Down Expand Up @@ -672,13 +672,15 @@ func (ctrl *csiSnapshotCommonController) createSnapshotContent(snapshot *crdv1.V
},
}

nodeName, err := ctrl.getManagedByNode(volume)
if err != nil {
return nil, err
}
if nodeName != "" {
snapshotContent.Labels = map[string]string{
utils.VolumeSnapshotContentManagedByLabel: nodeName,
if ctrl.enableDistributedSnapshotting {
nodeName, err := ctrl.getManagedByNode(volume)
if err != nil {
return nil, err
}
if nodeName != "" {
snapshotContent.Labels = map[string]string{
utils.VolumeSnapshotContentManagedByLabel: nodeName,
}
}
}

Expand Down Expand Up @@ -1677,11 +1679,11 @@ func (ctrl *csiSnapshotCommonController) getManagedByNode(pv *v1.PersistentVolum
nodes, err := ctrl.client.CoreV1().Nodes().List(context.Background(), metav1.ListOptions{})
if err != nil {
klog.Errorf("failed to get the list of nodes: %q", err)
return "", nil
return "", err
}

for _, node := range nodes.Items {
match, _ := corev1.MatchNodeSelectorTerms(&node, nodeSelectorTerms)
match, _ := corev1helpers.MatchNodeSelectorTerms(&node, nodeSelectorTerms)
if match {
return node.Name, nil
}
Expand Down
4 changes: 4 additions & 0 deletions pkg/common-controller/snapshot_controller_base.go
Expand Up @@ -64,6 +64,8 @@ type csiSnapshotCommonController struct {
metricsManager metrics.MetricsManager

resyncPeriod time.Duration

enableDistributedSnapshotting bool
}

// NewCSISnapshotController returns a new *csiSnapshotCommonController
Expand All @@ -78,6 +80,7 @@ func NewCSISnapshotCommonController(
resyncPeriod time.Duration,
snapshotRateLimiter workqueue.RateLimiter,
contentRateLimiter workqueue.RateLimiter,
enableDistributedSnapshotting bool,
) *csiSnapshotCommonController {
broadcaster := record.NewBroadcaster()
broadcaster.StartLogging(klog.Infof)
Expand Down Expand Up @@ -124,6 +127,7 @@ func NewCSISnapshotCommonController(

ctrl.classLister = volumeSnapshotClassInformer.Lister()
ctrl.classListerSynced = volumeSnapshotClassInformer.Informer().HasSynced
ctrl.enableDistributedSnapshotting = enableDistributedSnapshotting

return ctrl
}
Expand Down
2 changes: 1 addition & 1 deletion vendor/modules.txt
Expand Up @@ -638,7 +638,7 @@ k8s.io/client-go/util/workqueue
## explicit; go 1.16
k8s.io/component-base/metrics
k8s.io/component-base/version
# k8s.io/component-helpers v0.22.1 => k8s.io/component-helpers v0.23.0
# k8s.io/component-helpers v0.23.0 => k8s.io/component-helpers v0.23.0
## explicit; go 1.16
k8s.io/component-helpers/scheduling/corev1
k8s.io/component-helpers/scheduling/corev1/nodeaffinity
Expand Down

0 comments on commit b5680b9

Please sign in to comment.