From 0b98e5610d79da8ff01e385f147d90b01afaff36 Mon Sep 17 00:00:00 2001 From: dntosas Date: Fri, 20 Nov 2020 20:53:07 +0200 Subject: [PATCH] [addons/CA] Add support for specifying resources and metrics - Resources We enable users to set their desired capacity for cluster-autoscaler addon. There are edge cases, especially in big clusters, where autoscaler needs to reconcile a large number of objects thus may need increased memory to avoid OOMkills or increased cpu to avoid saturation. - Metrics Cluster autoscaler provides valuable insights for monitoring capacity allocation and scheduling aspects of a cluster. In this commit, we enable users to add proper annotation on deployment to scrape metrics via Prometheus. Signed-off-by: dntosas --- docs/cluster_spec.md | 3 ++ go.sum | 1 + k8s/crds/kops.k8s.io_clusters.yaml | 31 +++++++++++++++++++ pkg/apis/kops/componentconfig.go | 15 +++++++++ pkg/apis/kops/v1alpha2/componentconfig.go | 15 +++++++++ .../kops/v1alpha2/zz_generated.conversion.go | 10 ++++++ .../kops/v1alpha2/zz_generated.deepcopy.go | 25 +++++++++++++++ pkg/apis/kops/zz_generated.deepcopy.go | 25 +++++++++++++++ pkg/model/components/clusterautoscaler.go | 3 ++ upup/models/bindata.go | 17 +++++++--- .../k8s-1.15.yaml.template | 16 +++++++--- 11 files changed, 151 insertions(+), 10 deletions(-) diff --git a/docs/cluster_spec.md b/docs/cluster_spec.md index f34db9aff362c..ec2e7770b2181 100644 --- a/docs/cluster_spec.md +++ b/docs/cluster_spec.md @@ -686,6 +686,9 @@ spec: enabled: true skipNodesWithLocalStorage: true skipNodesWithSystemPods: true + cpuRequest: "100m" + memoryRequest: "300Mi" + metrics: true ``` Read more about cluster autoscaler in the [official documentation](https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler). diff --git a/go.sum b/go.sum index db3a66dce9ec1..f3dbdcbce3250 100644 --- a/go.sum +++ b/go.sum @@ -1420,6 +1420,7 @@ k8s.io/cloud-provider v0.20.0-beta.2/go.mod h1:xu40/8K5o/wqZyuChUVqD5CoqVOVzKXE/ k8s.io/cloud-provider-openstack v1.18.0 h1:v/ebjNEdx0hBaygsRohSS643f41lj2CwvapCbn+aLOs= k8s.io/cloud-provider-openstack v1.18.0/go.mod h1:03202t5Sp+4Vmk6pxJ/hVH0fEkm9gMc/pku/QpkJQMQ= k8s.io/cluster-bootstrap v0.20.0-beta.2/go.mod h1:kYKZIdQhCt0sh13R7Bjm1JXDg9QVjez8TLlyhtDC8Ck= +k8s.io/code-generator v0.20.0-beta.2 h1:9b5RwuTexjs/UH3BUMCMI4lTECshBUc/DenKnKc3eCs= k8s.io/code-generator v0.20.0-beta.2/go.mod h1:UsqdF+VX4PU2g46NC2JRs4gc+IfrctnwHb76RNbWHJg= k8s.io/component-base v0.20.0-beta.2 h1:jg3eglA+HSDgimMoHoFvAiwHAGYxEy2D5HKhLPR4AjM= k8s.io/component-base v0.20.0-beta.2/go.mod h1:PS+w/i0JTsaQbtzk8EVPlj2WrY9E23MfWK/K4MNy7uc= diff --git a/k8s/crds/kops.k8s.io_clusters.yaml b/k8s/crds/kops.k8s.io_clusters.yaml index 806138ec3bbbd..0d2702ef3f580 100644 --- a/k8s/crds/kops.k8s.io_clusters.yaml +++ b/k8s/crds/kops.k8s.io_clusters.yaml @@ -351,6 +351,20 @@ spec: balanceSimilarNodeGroups: description: 'BalanceSimilarNodeGroups makes cluster autoscaler treat similar node groups as one. Default: false' type: boolean + cpuLimit: + anyOf: + - type: integer + - type: string + description: 'CPULimit of cluster autoscaler container. Default: 100m' + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + cpuRequest: + anyOf: + - type: integer + - type: string + description: 'CPURequest of cluster autoscaler container. Default: 100m' + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true enabled: description: 'Enabled enables the cluster autoscaler. Default: false' type: boolean @@ -360,6 +374,23 @@ spec: image: description: 'Image is the docker container used. Default: the latest supported image for the specified kubernetes version.' type: string + memoryLimit: + anyOf: + - type: integer + - type: string + description: 'MemoryLimit of cluster autoscaler container. Default: 300Mi' + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + memoryRequest: + anyOf: + - type: integer + - type: string + description: 'MemoryRequest of cluster autoscaler container. Default: 300Mi' + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + metrics: + description: 'Metrics when enabled, add prometheus annotation on cluster autoscaler deployment. Default: false' + type: boolean scaleDownUtilizationThreshold: description: 'ScaleDownUtilizationThreshold determines the utilization threshold for node scale-down. Default: 0.5' type: string diff --git a/pkg/apis/kops/componentconfig.go b/pkg/apis/kops/componentconfig.go index 74b6dbce7cb8a..08e178a54ebcd 100644 --- a/pkg/apis/kops/componentconfig.go +++ b/pkg/apis/kops/componentconfig.go @@ -812,6 +812,21 @@ type ClusterAutoscalerConfig struct { // Image is the docker container used. // Default: the latest supported image for the specified kubernetes version. Image *string `json:"image,omitempty"` + // MemoryRequest of cluster autoscaler container. + // Default: 300Mi + MemoryRequest *resource.Quantity `json:"memoryRequest,omitempty"` + // CPURequest of cluster autoscaler container. + // Default: 100m + CPURequest *resource.Quantity `json:"cpuRequest,omitempty"` + // MemoryLimit of cluster autoscaler container. + // Default: 300Mi + MemoryLimit *resource.Quantity `json:"memoryLimit,omitempty"` + // CPULimit of cluster autoscaler container. + // Default: 100m + CPULimit *resource.Quantity `json:"cpuLimit,omitempty"` + // Metrics when enabled, add prometheus annotation on cluster autoscaler deployment. + // Default: false + Metrics *bool `json:"metrics,omitempty"` } // MetricsServerConfig determines the metrics server configuration. diff --git a/pkg/apis/kops/v1alpha2/componentconfig.go b/pkg/apis/kops/v1alpha2/componentconfig.go index fca173b5bde44..a67963d505872 100644 --- a/pkg/apis/kops/v1alpha2/componentconfig.go +++ b/pkg/apis/kops/v1alpha2/componentconfig.go @@ -813,6 +813,21 @@ type ClusterAutoscalerConfig struct { // Image is the docker container used. // Default: the latest supported image for the specified kubernetes version. Image *string `json:"image,omitempty"` + // MemoryRequest of cluster autoscaler container. + // Default: 300Mi + MemoryRequest *resource.Quantity `json:"memoryRequest,omitempty"` + // CPURequest of cluster autoscaler container. + // Default: 100m + CPURequest *resource.Quantity `json:"cpuRequest,omitempty"` + // MemoryLimit of cluster autoscaler container. + // Default: 300Mi + MemoryLimit *resource.Quantity `json:"memoryLimit,omitempty"` + // CPULimit of cluster autoscaler container. + // Default: 100m + CPULimit *resource.Quantity `json:"cpuLimit,omitempty"` + // Metrics when enabled, add prometheus annotation on cluster autoscaler deployment. + // Default: false + Metrics *bool `json:"metrics,omitempty"` } // MetricsServerConfig determines the metrics server configuration. diff --git a/pkg/apis/kops/v1alpha2/zz_generated.conversion.go b/pkg/apis/kops/v1alpha2/zz_generated.conversion.go index 26d59a64cb32f..1e7537fbf1f9a 100644 --- a/pkg/apis/kops/v1alpha2/zz_generated.conversion.go +++ b/pkg/apis/kops/v1alpha2/zz_generated.conversion.go @@ -1794,6 +1794,11 @@ func autoConvert_v1alpha2_ClusterAutoscalerConfig_To_kops_ClusterAutoscalerConfi out.SkipNodesWithSystemPods = in.SkipNodesWithSystemPods out.SkipNodesWithLocalStorage = in.SkipNodesWithLocalStorage out.Image = in.Image + out.MemoryRequest = in.MemoryRequest + out.CPURequest = in.CPURequest + out.MemoryLimit = in.MemoryLimit + out.CPULimit = in.CPULimit + out.Metrics = in.Metrics return nil } @@ -1810,6 +1815,11 @@ func autoConvert_kops_ClusterAutoscalerConfig_To_v1alpha2_ClusterAutoscalerConfi out.SkipNodesWithSystemPods = in.SkipNodesWithSystemPods out.SkipNodesWithLocalStorage = in.SkipNodesWithLocalStorage out.Image = in.Image + out.MemoryRequest = in.MemoryRequest + out.CPURequest = in.CPURequest + out.MemoryLimit = in.MemoryLimit + out.CPULimit = in.CPULimit + out.Metrics = in.Metrics return nil } diff --git a/pkg/apis/kops/v1alpha2/zz_generated.deepcopy.go b/pkg/apis/kops/v1alpha2/zz_generated.deepcopy.go index 89c48b5b3d039..d699f5f5e9fd4 100644 --- a/pkg/apis/kops/v1alpha2/zz_generated.deepcopy.go +++ b/pkg/apis/kops/v1alpha2/zz_generated.deepcopy.go @@ -617,6 +617,31 @@ func (in *ClusterAutoscalerConfig) DeepCopyInto(out *ClusterAutoscalerConfig) { *out = new(string) **out = **in } + if in.MemoryRequest != nil { + in, out := &in.MemoryRequest, &out.MemoryRequest + x := (*in).DeepCopy() + *out = &x + } + if in.CPURequest != nil { + in, out := &in.CPURequest, &out.CPURequest + x := (*in).DeepCopy() + *out = &x + } + if in.MemoryLimit != nil { + in, out := &in.MemoryLimit, &out.MemoryLimit + x := (*in).DeepCopy() + *out = &x + } + if in.CPULimit != nil { + in, out := &in.CPULimit, &out.CPULimit + x := (*in).DeepCopy() + *out = &x + } + if in.Metrics != nil { + in, out := &in.Metrics, &out.Metrics + *out = new(bool) + **out = **in + } return } diff --git a/pkg/apis/kops/zz_generated.deepcopy.go b/pkg/apis/kops/zz_generated.deepcopy.go index f721a3fd9fafd..8289b3fdea3b0 100644 --- a/pkg/apis/kops/zz_generated.deepcopy.go +++ b/pkg/apis/kops/zz_generated.deepcopy.go @@ -717,6 +717,31 @@ func (in *ClusterAutoscalerConfig) DeepCopyInto(out *ClusterAutoscalerConfig) { *out = new(string) **out = **in } + if in.MemoryRequest != nil { + in, out := &in.MemoryRequest, &out.MemoryRequest + x := (*in).DeepCopy() + *out = &x + } + if in.CPURequest != nil { + in, out := &in.CPURequest, &out.CPURequest + x := (*in).DeepCopy() + *out = &x + } + if in.MemoryLimit != nil { + in, out := &in.MemoryLimit, &out.MemoryLimit + x := (*in).DeepCopy() + *out = &x + } + if in.CPULimit != nil { + in, out := &in.CPULimit, &out.CPULimit + x := (*in).DeepCopy() + *out = &x + } + if in.Metrics != nil { + in, out := &in.Metrics, &out.Metrics + *out = new(bool) + **out = **in + } return } diff --git a/pkg/model/components/clusterautoscaler.go b/pkg/model/components/clusterautoscaler.go index af49159a2cb36..0cd5a0448c3c1 100644 --- a/pkg/model/components/clusterautoscaler.go +++ b/pkg/model/components/clusterautoscaler.go @@ -80,6 +80,9 @@ func (b *ClusterAutoscalerOptionsBuilder) BuildOptions(o interface{}) error { if cas.BalanceSimilarNodeGroups == nil { cas.BalanceSimilarNodeGroups = fi.Bool(false) } + if cas.Metrics == nil { + cas.Metrics = fi.Bool(false) + } return nil } diff --git a/upup/models/bindata.go b/upup/models/bindata.go index fd9f013f8a767..4d98e62ff019a 100644 --- a/upup/models/bindata.go +++ b/upup/models/bindata.go @@ -709,6 +709,12 @@ spec: metadata: labels: app: cluster-autoscaler + {{ if .Metrics }} + annotations: + prometheus.io/path: "/metrics" + prometheus.io/port: "8085" + prometheus.io/scrape: "true" + {{ end }} spec: serviceAccountName: cluster-autoscaler tolerations: @@ -721,11 +727,11 @@ spec: name: cluster-autoscaler resources: limits: - cpu: 100m - memory: 300Mi + cpu: {{ or .CPULimit "100m"}} + memory: {{ or .MemoryLimit "300Mi"}} requests: - cpu: 100m - memory: 300Mi + cpu: {{ or .CPURequest "100m"}} + memory: {{ or .MemoryRequest "300Mi"}} command: - ./cluster-autoscaler - --balance-similar-node-groups={{ .BalanceSimilarNodeGroups }} @@ -751,7 +757,8 @@ spec: periodSeconds: 10 successThreshold: 1 timeoutSeconds: 1 -{{ end }}`) +{{ end }} +`) func cloudupResourcesAddonsClusterAutoscalerAddonsK8sIoK8s115YamlTemplateBytes() ([]byte, error) { return _cloudupResourcesAddonsClusterAutoscalerAddonsK8sIoK8s115YamlTemplate, nil diff --git a/upup/models/cloudup/resources/addons/cluster-autoscaler.addons.k8s.io/k8s-1.15.yaml.template b/upup/models/cloudup/resources/addons/cluster-autoscaler.addons.k8s.io/k8s-1.15.yaml.template index d12587108c16a..a322c528e56c3 100644 --- a/upup/models/cloudup/resources/addons/cluster-autoscaler.addons.k8s.io/k8s-1.15.yaml.template +++ b/upup/models/cloudup/resources/addons/cluster-autoscaler.addons.k8s.io/k8s-1.15.yaml.template @@ -134,6 +134,12 @@ spec: metadata: labels: app: cluster-autoscaler + {{ if .Metrics }} + annotations: + prometheus.io/path: "/metrics" + prometheus.io/port: "8085" + prometheus.io/scrape: "true" + {{ end }} spec: serviceAccountName: cluster-autoscaler tolerations: @@ -146,11 +152,11 @@ spec: name: cluster-autoscaler resources: limits: - cpu: 100m - memory: 300Mi + cpu: {{ or .CPULimit "100m"}} + memory: {{ or .MemoryLimit "300Mi"}} requests: - cpu: 100m - memory: 300Mi + cpu: {{ or .CPURequest "100m"}} + memory: {{ or .MemoryRequest "300Mi"}} command: - ./cluster-autoscaler - --balance-similar-node-groups={{ .BalanceSimilarNodeGroups }} @@ -176,4 +182,4 @@ spec: periodSeconds: 10 successThreshold: 1 timeoutSeconds: 1 -{{ end }} \ No newline at end of file +{{ end }}