Skip to content

Commit

Permalink
[addons/CA] Add support for specifying resources and metrics
Browse files Browse the repository at this point in the history
- Resources
We enable users to set their desired capacity for cluster-autoscaler addon.

There are edge cases, especially in big clusters, where autoscaler needs
to reconcile a large number of objects thus may need increased memory to
avoid OOMkills or increased cpu to avoid saturation.

- Metrics
Cluster autoscaler provides valuable insights for monitoring capacity
allocation and scheduling aspects of a cluster. In this commit, we
enable users to add proper annotation on deployment to scrape metrics
via Prometheus.

Signed-off-by: dntosas <ntosas@gmail.com>
  • Loading branch information
dntosas committed Nov 20, 2020
1 parent 01a261c commit ec2b3e2
Show file tree
Hide file tree
Showing 11 changed files with 151 additions and 10 deletions.
3 changes: 3 additions & 0 deletions docs/cluster_spec.md
Original file line number Diff line number Diff line change
Expand Up @@ -686,6 +686,9 @@ spec:
enabled: true
skipNodesWithLocalStorage: true
skipNodesWithSystemPods: true
cpuRequest: "100m"
memoryRequest: "300Mi"
metrics: true
```

Read more about cluster autoscaler in the [official documentation](https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler).
Expand Down
1 change: 1 addition & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -1420,6 +1420,7 @@ k8s.io/cloud-provider v0.20.0-beta.2/go.mod h1:xu40/8K5o/wqZyuChUVqD5CoqVOVzKXE/
k8s.io/cloud-provider-openstack v1.18.0 h1:v/ebjNEdx0hBaygsRohSS643f41lj2CwvapCbn+aLOs=
k8s.io/cloud-provider-openstack v1.18.0/go.mod h1:03202t5Sp+4Vmk6pxJ/hVH0fEkm9gMc/pku/QpkJQMQ=
k8s.io/cluster-bootstrap v0.20.0-beta.2/go.mod h1:kYKZIdQhCt0sh13R7Bjm1JXDg9QVjez8TLlyhtDC8Ck=
k8s.io/code-generator v0.20.0-beta.2 h1:9b5RwuTexjs/UH3BUMCMI4lTECshBUc/DenKnKc3eCs=
k8s.io/code-generator v0.20.0-beta.2/go.mod h1:UsqdF+VX4PU2g46NC2JRs4gc+IfrctnwHb76RNbWHJg=
k8s.io/component-base v0.20.0-beta.2 h1:jg3eglA+HSDgimMoHoFvAiwHAGYxEy2D5HKhLPR4AjM=
k8s.io/component-base v0.20.0-beta.2/go.mod h1:PS+w/i0JTsaQbtzk8EVPlj2WrY9E23MfWK/K4MNy7uc=
Expand Down
31 changes: 31 additions & 0 deletions k8s/crds/kops.k8s.io_clusters.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,20 @@ spec:
balanceSimilarNodeGroups:
description: 'BalanceSimilarNodeGroups makes cluster autoscaler treat similar node groups as one. Default: false'
type: boolean
cpuLimit:
anyOf:
- type: integer
- type: string
description: 'CPULimit of cluster autoscaler container. Default: 100m'
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
cpuRequest:
anyOf:
- type: integer
- type: string
description: 'CPURequest of cluster autoscaler container. Default: 100m'
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
enabled:
description: 'Enabled enables the cluster autoscaler. Default: false'
type: boolean
Expand All @@ -360,6 +374,23 @@ spec:
image:
description: 'Image is the docker container used. Default: the latest supported image for the specified kubernetes version.'
type: string
memoryLimit:
anyOf:
- type: integer
- type: string
description: 'MemoryLimit of cluster autoscaler container. Default: 300Mi'
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
memoryRequest:
anyOf:
- type: integer
- type: string
description: 'MemoryRequest of cluster autoscaler container. Default: 300Mi'
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
metrics:
description: 'Metrics when enabled, add prometheus annotation on cluster autoscaler deployment. Default: false'
type: boolean
scaleDownUtilizationThreshold:
description: 'ScaleDownUtilizationThreshold determines the utilization threshold for node scale-down. Default: 0.5'
type: string
Expand Down
15 changes: 15 additions & 0 deletions pkg/apis/kops/componentconfig.go
Original file line number Diff line number Diff line change
Expand Up @@ -812,6 +812,21 @@ type ClusterAutoscalerConfig struct {
// Image is the docker container used.
// Default: the latest supported image for the specified kubernetes version.
Image *string `json:"image,omitempty"`
// MemoryRequest of cluster autoscaler container.
// Default: 300Mi
MemoryRequest *resource.Quantity `json:"memoryRequest,omitempty"`
// CPURequest of cluster autoscaler container.
// Default: 100m
CPURequest *resource.Quantity `json:"cpuRequest,omitempty"`
// MemoryLimit of cluster autoscaler container.
// Default: 300Mi
MemoryLimit *resource.Quantity `json:"memoryLimit,omitempty"`
// CPULimit of cluster autoscaler container.
// Default: 100m
CPULimit *resource.Quantity `json:"cpuLimit,omitempty"`
// Metrics when enabled, add prometheus annotation on cluster autoscaler deployment.
// Default: false
Metrics *bool `json:"metrics,omitempty"`
}

// MetricsServerConfig determines the metrics server configuration.
Expand Down
15 changes: 15 additions & 0 deletions pkg/apis/kops/v1alpha2/componentconfig.go
Original file line number Diff line number Diff line change
Expand Up @@ -813,6 +813,21 @@ type ClusterAutoscalerConfig struct {
// Image is the docker container used.
// Default: the latest supported image for the specified kubernetes version.
Image *string `json:"image,omitempty"`
// MemoryRequest of cluster autoscaler container.
// Default: 300Mi
MemoryRequest *resource.Quantity `json:"memoryRequest,omitempty"`
// CPURequest of cluster autoscaler container.
// Default: 100m
CPURequest *resource.Quantity `json:"cpuRequest,omitempty"`
// MemoryLimit of cluster autoscaler container.
// Default: 300Mi
MemoryLimit *resource.Quantity `json:"memoryLimit,omitempty"`
// CPULimit of cluster autoscaler container.
// Default: 100m
CPULimit *resource.Quantity `json:"cpuLimit,omitempty"`
// Metrics when enabled, add prometheus annotation on cluster autoscaler deployment.
// Default: false
Metrics *bool `json:"metrics,omitempty"`
}

// MetricsServerConfig determines the metrics server configuration.
Expand Down
10 changes: 10 additions & 0 deletions pkg/apis/kops/v1alpha2/zz_generated.conversion.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

25 changes: 25 additions & 0 deletions pkg/apis/kops/v1alpha2/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

25 changes: 25 additions & 0 deletions pkg/apis/kops/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions pkg/model/components/clusterautoscaler.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ func (b *ClusterAutoscalerOptionsBuilder) BuildOptions(o interface{}) error {
if cas.BalanceSimilarNodeGroups == nil {
cas.BalanceSimilarNodeGroups = fi.Bool(false)
}
if cas.Metrics == nil {
cas.BalanceSimilarNodeGroups = fi.Bool(false)
}

return nil
}
17 changes: 12 additions & 5 deletions upup/models/bindata.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,12 @@ spec:
metadata:
labels:
app: cluster-autoscaler
{{ if .Metrics }}
annotations:
prometheus.io/path: "/metrics"
prometheus.io/port: "8085"
prometheus.io/scrape: "true"
{{ end }}
spec:
serviceAccountName: cluster-autoscaler
tolerations:
Expand All @@ -146,11 +152,11 @@ spec:
name: cluster-autoscaler
resources:
limits:
cpu: 100m
memory: 300Mi
cpu: {{ or .CPULimit "100m"}}
memory: {{ or .MemoryLimit "300Mi"}}
requests:
cpu: 100m
memory: 300Mi
cpu: {{ or .CPURequest "100m"}}
memory: {{ or .MemoryRequest "300Mi"}}
command:
- ./cluster-autoscaler
- --balance-similar-node-groups={{ .BalanceSimilarNodeGroups }}
Expand All @@ -176,4 +182,4 @@ spec:
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 1
{{ end }}
{{ end }}

0 comments on commit ec2b3e2

Please sign in to comment.