MON-3304: Add option to specify resource limits for all components

Signed-off-by: Pranshu Srivastava <rexagod@gmail.com>
openshift · Aug 8, 2023 · ddfcec4 · ddfcec4
1 parent 47320d5
commit ddfcec4
Show file tree

Hide file tree

Showing 15 changed files with 280 additions and 86 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,7 @@
 - [#1950](https://github.com/openshift/cluster-monitoring-operator/pull/1950) Disable CORS headers on Thanos querier by default and add a flag to enable them back.
 - [#1963](https://github.com/openshift/cluster-monitoring-operator/pull/1963) Add nodeExporter settings for network devices list.
 - [#2049](https://github.com/openshift/cluster-monitoring-operator/pull/2049) Remove Kube*QuotaOvercommit alerts.
+- [#2067](https://github.com/openshift/cluster-monitoring-operator/pull/2067) Add options to specify resource requests and limits for all components.
 
 ## 4.13
 

diff --git a/Documentation/api.md b/Documentation/api.md
@@ -39,6 +39,7 @@ Configuring Cluster Monitoring is optional. If the config does not exist or is e
 * [NodeExporterConfig](#nodeexporterconfig)
 * [OpenShiftStateMetricsConfig](#openshiftstatemetricsconfig)
 * [PrometheusK8sConfig](#prometheusk8sconfig)
+* [PrometheusOperatorAdmissionWebhookConfig](#prometheusoperatoradmissionwebhookconfig)
 * [PrometheusOperatorConfig](#prometheusoperatorconfig)
 * [PrometheusRestrictedConfig](#prometheusrestrictedconfig)
 * [RemoteWriteSpec](#remotewritespec)
@@ -131,6 +132,7 @@ The `ClusterMonitoringConfiguration` resource defines settings that customize th
 | kubeStateMetrics | *[KubeStateMetricsConfig](#kubestatemetricsconfig) | `KubeStateMetricsConfig` defines settings for the `kube-state-metrics` agent. |
 | prometheusK8s | *[PrometheusK8sConfig](#prometheusk8sconfig) | `PrometheusK8sConfig` defines settings for the Prometheus component. |
 | prometheusOperator | *[PrometheusOperatorConfig](#prometheusoperatorconfig) | `PrometheusOperatorConfig` defines settings for the Prometheus Operator component. |
+| prometheusOperatorAdmissionWebhook | *[PrometheusOperatorAdmissionWebhookConfig](#prometheusoperatoradmissionwebhookconfig) | `PrometheusOperatorConfig` defines settings for the Prometheus Operator's admission webhook component. |
 | openshiftStateMetrics | *[OpenShiftStateMetricsConfig](#openshiftstatemetricsconfig) | `OpenShiftMetricsConfig` defines settings for the `openshift-state-metrics` agent. |
 | telemeterClient | *[TelemeterClientConfig](#telemeterclientconfig) | `TelemeterClientConfig` defines settings for the Telemeter Client component. |
 | thanosQuerier | *[ThanosQuerierConfig](#thanosquerierconfig) | `ThanosQuerierConfig` defines settings for the Thanos Querier component. |
@@ -167,6 +169,7 @@ The `K8sPrometheusAdapter` resource defines settings for the Prometheus Adapter
 | -------- | ---- | ----------- |
 | audit | *Audit | Defines the audit configuration used by the Prometheus Adapter instance. Possible profile values are: `metadata`, `request`, `requestresponse`, and `none`. The default value is `metadata`. |
 | nodeSelector | map[string]string | Defines the nodes on which the pods are scheduled. |
+| resources | *[v1.ResourceRequirements](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#resourcerequirements-v1-core) | Defines resource requests and limits for the PrometheusAdapter container. |
 | tolerations | [][v1.Toleration](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#toleration-v1-core) | Defines tolerations for the pods. |
 | topologySpreadConstraints | []v1.TopologySpreadConstraint | Defines a pod's topology spread constraints. |
 | dedicatedServiceMonitors | *[DedicatedServiceMonitors](#dedicatedservicemonitors) | Defines dedicated service monitors. |
@@ -185,6 +188,7 @@ The `KubeStateMetricsConfig` resource defines settings for the `kube-state-metri
 | Property | Type | Description |
 | -------- | ---- | ----------- |
 | nodeSelector | map[string]string | Defines the nodes on which the pods are scheduled. |
+| resources | *[v1.ResourceRequirements](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#resourcerequirements-v1-core) | Defines resource requests and limits for the KubeStateMetrics container. |
 | tolerations | [][v1.Toleration](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#toleration-v1-core) | Defines tolerations for the pods. |
 | topologySpreadConstraints | []v1.TopologySpreadConstraint | Defines a pod's topology spread constraints. |
 
@@ -382,6 +386,7 @@ The `NodeExporterConfig` resource defines settings for the `node-exporter` agent
 | collectors | [NodeExporterCollectorConfig](#nodeexportercollectorconfig) | Defines which collectors are enabled and their additional configuration parameters. |
 | maxProcs | uint32 | The target number of CPUs on which the Node Exporter's process will run. Use this setting to override the default value, which is set either to `4` or to the number of CPUs on the host, whichever is smaller. The default value is computed at runtime and set via the `GOMAXPROCS` environment variable before Node Exporter is launched. If a kernel deadlock occurs or if performance degrades when reading from `sysfs` concurrently, you can change this value to `1`, which limits Node Exporter to running on one CPU. For nodes with a high CPU count, setting the limit to a low number saves resources by preventing Go routines from being scheduled to run on all CPUs. However, I/O performance degrades if the `maxProcs` value is set too low, and there are many metrics to collect. |
 | ignoredNetworkDevices | *[]string | A list of network devices, as regular expressions, to be excluded from the relevant collector configuration such as `netdev` and `netclass`. When not set, the Cluster Monitoring Operator uses a predefined list of devices to be excluded to minimize the impact on memory usage. When set as an empty list, no devices are excluded. If you modify this setting, monitor the `prometheus-k8s` deployment closely for excessive memory usage. |
+| resources | *[v1.ResourceRequirements](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#resourcerequirements-v1-core) | Defines resource requests and limits for the NodeExporter container. |
 
 [Back to TOC](#table-of-contents)
 
@@ -397,6 +402,7 @@ The `OpenShiftStateMetricsConfig` resource defines settings for the `openshift-s
 | Property | Type | Description |
 | -------- | ---- | ----------- |
 | nodeSelector | map[string]string | Defines the nodes on which the pods are scheduled. |
+| resources | *[v1.ResourceRequirements](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#resourcerequirements-v1-core) | Defines resource requests and limits for the OpenShiftStateMetrics container. |
 | tolerations | [][v1.Toleration](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#toleration-v1-core) | Defines tolerations for the pods. |
 | topologySpreadConstraints | []v1.TopologySpreadConstraint | Defines a pod's topology spread constraints. |
 
@@ -430,6 +436,21 @@ The `PrometheusK8sConfig` resource defines settings for the Prometheus component
 
 [Back to TOC](#table-of-contents)
 
+## PrometheusOperatorAdmissionWebhookConfig
+
+#### Description
+
+The `PrometheusOperatorConfig` resource defines settings for the Prometheus Operator's admission webhook workload.
+
+
+<em>appears in: [ClusterMonitoringConfiguration](#clustermonitoringconfiguration)</em>
+
+| Property | Type | Description |
+| -------- | ---- | ----------- |
+| resources | *[v1.ResourceRequirements](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#resourcerequirements-v1-core) | Defines resource requests and limits for the console-plugin container. |
+
+[Back to TOC](#table-of-contents)
+
 ## PrometheusOperatorConfig
 
 #### Description
@@ -443,6 +464,7 @@ The `PrometheusOperatorConfig` resource defines settings for the Prometheus Oper
 | -------- | ---- | ----------- |
 | logLevel | string | Defines the log level settings for Prometheus Operator. The possible values are `error`, `warn`, `info`, and `debug`. The default value is `info`. |
 | nodeSelector | map[string]string | Defines the nodes on which the pods are scheduled. |
+| resources | *[v1.ResourceRequirements](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#resourcerequirements-v1-core) | Defines resource requests and limits for the PrometheusOperator container. |
 | tolerations | [][v1.Toleration](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#toleration-v1-core) | Defines tolerations for the pods. |
 | topologySpreadConstraints | []v1.TopologySpreadConstraint | Defines a pod's topology spread constraints. |
 
@@ -545,6 +567,7 @@ The `TLSConfig` resource configures the settings for TLS connections.
 | Property | Type | Description |
 | -------- | ---- | ----------- |
 | nodeSelector | map[string]string | Defines the nodes on which the pods are scheduled. |
+| resources | *[v1.ResourceRequirements](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#resourcerequirements-v1-core) | Defines resource requests and limits for the Telemeter container. |
 | tolerations | [][v1.Toleration](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#toleration-v1-core) | Defines tolerations for the pods. |
 | topologySpreadConstraints | []v1.TopologySpreadConstraint | Defines a pod's topology spread constraints. |
 

diff --git a/Documentation/openshiftdocs/index.adoc b/Documentation/openshiftdocs/index.adoc
@@ -59,6 +59,7 @@ The configuration file itself is always defined under the `config.yaml` key in t
 * link:modules/nodeexporterconfig.adoc[NodeExporterConfig]
 * link:modules/openshiftstatemetricsconfig.adoc[OpenShiftStateMetricsConfig]
 * link:modules/prometheusk8sconfig.adoc[PrometheusK8sConfig]
+* link:modules/prometheusoperatoradmissionwebhookconfig.adoc[PrometheusOperatorAdmissionWebhookConfig]
 * link:modules/prometheusoperatorconfig.adoc[PrometheusOperatorConfig]
 * link:modules/prometheusrestrictedconfig.adoc[PrometheusRestrictedConfig]
 * link:modules/remotewritespec.adoc[RemoteWriteSpec]

diff --git a/Documentation/openshiftdocs/modules/clustermonitoringconfiguration.adoc b/Documentation/openshiftdocs/modules/clustermonitoringconfiguration.adoc
@@ -27,6 +27,8 @@ The `ClusterMonitoringConfiguration` resource defines settings that customize th
 
 |prometheusOperator|*link:prometheusoperatorconfig.adoc[PrometheusOperatorConfig]|`PrometheusOperatorConfig` defines settings for the Prometheus Operator component.
 
+|prometheusOperatorAdmissionWebhook|*link:prometheusoperatoradmissionwebhookconfig.adoc[PrometheusOperatorAdmissionWebhookConfig]|`PrometheusOperatorConfig` defines settings for the Prometheus Operator's admission webhook component.
+
 |openshiftStateMetrics|*link:openshiftstatemetricsconfig.adoc[OpenShiftStateMetricsConfig]|`OpenShiftMetricsConfig` defines settings for the `openshift-state-metrics` agent.
 
 |telemeterClient|*link:telemeterclientconfig.adoc[TelemeterClientConfig]|`TelemeterClientConfig` defines settings for the Telemeter Client component.

diff --git a/Documentation/openshiftdocs/modules/k8sprometheusadapter.adoc b/Documentation/openshiftdocs/modules/k8sprometheusadapter.adoc
@@ -22,6 +22,8 @@ Appears in: link:clustermonitoringconfiguration.adoc[ClusterMonitoringConfigurat
 
 |nodeSelector|map[string]string|Defines the nodes on which the pods are scheduled.
 
+|resources|*v1.ResourceRequirements|Defines resource requests and limits for the PrometheusAdapter container.
+
 |tolerations|[]v1.Toleration|Defines tolerations for the pods.
 
 |topologySpreadConstraints|[]v1.TopologySpreadConstraint|Defines a pod's topology spread constraints.

diff --git a/Documentation/openshiftdocs/modules/kubestatemetricsconfig.adoc b/Documentation/openshiftdocs/modules/kubestatemetricsconfig.adoc
@@ -20,6 +20,8 @@ Appears in: link:clustermonitoringconfiguration.adoc[ClusterMonitoringConfigurat
 | Property | Type | Description 
 |nodeSelector|map[string]string|Defines the nodes on which the pods are scheduled.
 
+|resources|*v1.ResourceRequirements|Defines resource requests and limits for the KubeStateMetrics container.
+
 |tolerations|[]v1.Toleration|Defines tolerations for the pods.
 
 |topologySpreadConstraints|[]v1.TopologySpreadConstraint|Defines a pod's topology spread constraints.

diff --git a/Documentation/openshiftdocs/modules/nodeexporterconfig.adoc b/Documentation/openshiftdocs/modules/nodeexporterconfig.adoc
@@ -24,6 +24,8 @@ Appears in: link:clustermonitoringconfiguration.adoc[ClusterMonitoringConfigurat
 
 |ignoredNetworkDevices|*[]string|A list of network devices, as regular expressions, to be excluded from the relevant collector configuration such as `netdev` and `netclass`. When not set, the Cluster Monitoring Operator uses a predefined list of devices to be excluded to minimize the impact on memory usage. When set as an empty list, no devices are excluded. If you modify this setting, monitor the `prometheus-k8s` deployment closely for excessive memory usage.
 
+|resources|*v1.ResourceRequirements|Defines resource requests and limits for the NodeExporter container.
+
 |===
 
 link:../index.adoc[Back to TOC]
diff --git a/Documentation/openshiftdocs/modules/openshiftstatemetricsconfig.adoc b/Documentation/openshiftdocs/modules/openshiftstatemetricsconfig.adoc
@@ -20,6 +20,8 @@ Appears in: link:clustermonitoringconfiguration.adoc[ClusterMonitoringConfigurat
 | Property | Type | Description 
 |nodeSelector|map[string]string|Defines the nodes on which the pods are scheduled.
 
+|resources|*v1.ResourceRequirements|Defines resource requests and limits for the OpenShiftStateMetrics container.
+
 |tolerations|[]v1.Toleration|Defines tolerations for the pods.
 
 |topologySpreadConstraints|[]v1.TopologySpreadConstraint|Defines a pod's topology spread constraints.

diff --git a/Documentation/openshiftdocs/modules/prometheusoperatoradmissionwebhookconfig.adoc b/Documentation/openshiftdocs/modules/prometheusoperatoradmissionwebhookconfig.adoc
@@ -0,0 +1,25 @@
+// DO NOT EDIT THE CONTENT IN THIS FILE. It is automatically generated from the 
+	// source code for the Cluster Monitoring Operator. Any changes made to this 
+	// file will be overwritten when the content is re-generated. If you wish to 
+	// make edits, read the docgen utility instructions in the source code for the 
+	// CMO.
+	:_content-type: ASSEMBLY
+
+== PrometheusOperatorAdmissionWebhookConfig
+
+=== Description
+
+The `PrometheusOperatorConfig` resource defines settings for the Prometheus Operator's admission webhook workload.
+
+
+
+Appears in: link:clustermonitoringconfiguration.adoc[ClusterMonitoringConfiguration]
+
+[options="header"]
+|===
+| Property | Type | Description 
+|resources|*v1.ResourceRequirements|Defines resource requests and limits for the console-plugin container.
+
+|===
+
+link:../index.adoc[Back to TOC]
diff --git a/Documentation/openshiftdocs/modules/prometheusoperatorconfig.adoc b/Documentation/openshiftdocs/modules/prometheusoperatorconfig.adoc
@@ -23,6 +23,8 @@ link:userworkloadconfiguration.adoc[UserWorkloadConfiguration]
 
 |nodeSelector|map[string]string|Defines the nodes on which the pods are scheduled.
 
+|resources|*v1.ResourceRequirements|Defines resource requests and limits for the PrometheusOperator container.
+
 |tolerations|[]v1.Toleration|Defines tolerations for the pods.
 
 |topologySpreadConstraints|[]v1.TopologySpreadConstraint|Defines a pod's topology spread constraints.

diff --git a/Documentation/openshiftdocs/modules/telemeterclientconfig.adoc b/Documentation/openshiftdocs/modules/telemeterclientconfig.adoc
@@ -23,6 +23,8 @@ Appears in: link:clustermonitoringconfiguration.adoc[ClusterMonitoringConfigurat
 | Property | Type | Description 
 |nodeSelector|map[string]string|Defines the nodes on which the pods are scheduled.
 
+|resources|*v1.ResourceRequirements|Defines resource requests and limits for the Telemeter container.
+
 |tolerations|[]v1.Toleration|Defines tolerations for the pods.
 
 |topologySpreadConstraints|[]v1.TopologySpreadConstraint|Defines a pod's topology spread constraints.

diff --git a/pkg/manifests/config.go b/pkg/manifests/config.go
@@ -254,6 +254,9 @@ func (c *Config) applyDefaults() {
 	if c.ClusterMonitoringConfiguration.PrometheusOperatorConfig == nil {
 		c.ClusterMonitoringConfiguration.PrometheusOperatorConfig = &PrometheusOperatorConfig{}
 	}
+	if c.ClusterMonitoringConfiguration.PrometheusOperatorAdmissionWebhookConfig == nil {
+		c.ClusterMonitoringConfiguration.PrometheusOperatorAdmissionWebhookConfig = &PrometheusOperatorAdmissionWebhookConfig{}
+	}
 	if c.ClusterMonitoringConfiguration.PrometheusK8sConfig == nil {
 		c.ClusterMonitoringConfiguration.PrometheusK8sConfig = &PrometheusK8sConfig{}
 	}

diff --git a/pkg/manifests/manifests.go b/pkg/manifests/manifests.go
@@ -750,6 +750,9 @@ func (f *Factory) KubeStateMetricsDeployment() (*appsv1.Deployment, error) {
 			d.Spec.Template.Spec.Containers[i].Args = f.setTLSSecurityConfiguration(container.Args, KubeRbacProxyTLSCipherSuitesFlag, KubeRbacProxyMinTLSVersionFlag)
 		case "kube-state-metrics":
 			d.Spec.Template.Spec.Containers[i].Image = f.config.Images.KubeStateMetrics
+			if f.config.ClusterMonitoringConfiguration.KubeStateMetricsConfig.Resources != nil {
+				d.Spec.Template.Spec.Containers[i].Resources = *f.config.ClusterMonitoringConfiguration.KubeStateMetricsConfig.Resources
+			}
 		}
 	}
 
@@ -809,6 +812,9 @@ func (f *Factory) OpenShiftStateMetricsDeployment() (*appsv1.Deployment, error)
 			d.Spec.Template.Spec.Containers[i].Args = f.setTLSSecurityConfiguration(container.Args, KubeRbacProxyTLSCipherSuitesFlag, KubeRbacProxyMinTLSVersionFlag)
 		case "openshift-state-metrics":
 			d.Spec.Template.Spec.Containers[i].Image = f.config.Images.OpenShiftStateMetrics
+			if f.config.ClusterMonitoringConfiguration.OpenShiftMetricsConfig.Resources != nil {
+				d.Spec.Template.Spec.Containers[i].Resources = *f.config.ClusterMonitoringConfiguration.OpenShiftMetricsConfig.Resources
+			}
 		}
 	}
 
@@ -964,6 +970,9 @@ func (f *Factory) NodeExporterDaemonSet() (*appsv1.DaemonSet, error) {
 			if err != nil {
 				return nil, err
 			}
+			if f.config.ClusterMonitoringConfiguration.NodeExporterConfig.Resources != nil {
+				ds.Spec.Template.Spec.Containers[i].Resources = *f.config.ClusterMonitoringConfiguration.NodeExporterConfig.Resources
+			}
 		case "kube-rbac-proxy":
 			ds.Spec.Template.Spec.Containers[i].Image = f.config.Images.KubeRbacProxy
 			ds.Spec.Template.Spec.Containers[i].Args = f.setTLSSecurityConfiguration(container.Args, KubeRbacProxyTLSCipherSuitesFlag, KubeRbacProxyMinTLSVersionFlag)
@@ -2004,6 +2013,10 @@ func (f *Factory) PrometheusAdapterDeployment(apiAuthSecretName string, requesth
 	spec.Containers[0].Args = f.setTLSSecurityConfiguration(spec.Containers[0].Args,
 		PrometheusAdapterTLSCipherSuitesFlag, PrometheusAdapterTLSMinTLSVersionFlag)
 
+	if f.config.ClusterMonitoringConfiguration.K8sPrometheusAdapter.Resources != nil {
+		spec.Containers[0].Resources = *f.config.ClusterMonitoringConfiguration.K8sPrometheusAdapter.Resources
+	}
+
 	dep.Spec.Template.Spec = spec
 
 	return dep, nil
@@ -2162,6 +2175,10 @@ func (f *Factory) PrometheusOperatorAdmissionWebhookDeployment() (*appsv1.Deploy
 		case "prometheus-operator-admission-webhook":
 			d.Spec.Template.Spec.Containers[i].Image = f.config.Images.PrometheusOperatorAdmissionWebhook
 
+			if f.config.ClusterMonitoringConfiguration.PrometheusOperatorConfig.Resources != nil {
+				d.Spec.Template.Spec.Containers[i].Resources = *f.config.ClusterMonitoringConfiguration.PrometheusOperatorConfig.Resources
+			}
+
 			args := d.Spec.Template.Spec.Containers[i].Args
 			if f.config.ClusterMonitoringConfiguration.PrometheusOperatorConfig.LogLevel != "" {
 				args = append(args, fmt.Sprintf("--log-level=%s", f.config.ClusterMonitoringConfiguration.PrometheusOperatorConfig.LogLevel))
@@ -2213,6 +2230,10 @@ func (f *Factory) PrometheusOperatorDeployment() (*appsv1.Deployment, error) {
 		case "prometheus-operator":
 			d.Spec.Template.Spec.Containers[i].Image = f.config.Images.PrometheusOperator
 
+			if f.config.ClusterMonitoringConfiguration.PrometheusOperatorConfig.Resources != nil {
+				d.Spec.Template.Spec.Containers[i].Resources = *f.config.ClusterMonitoringConfiguration.PrometheusOperatorConfig.Resources
+			}
+
 			args := d.Spec.Template.Spec.Containers[i].Args
 			for i := range args {
 				if strings.HasPrefix(args[i], PrometheusConfigReloaderFlag) && f.config.Images.PrometheusConfigReloader != "" {
@@ -2261,6 +2282,10 @@ func (f *Factory) PrometheusOperatorUserWorkloadDeployment() (*appsv1.Deployment
 		case "prometheus-operator":
 			d.Spec.Template.Spec.Containers[i].Image = f.config.Images.PrometheusOperator
 
+			if f.config.UserWorkloadConfiguration.PrometheusOperator.Resources != nil {
+				d.Spec.Template.Spec.Containers[i].Resources = *f.config.UserWorkloadConfiguration.PrometheusOperator.Resources
+			}
+
 			args := d.Spec.Template.Spec.Containers[i].Args
 			for i := range args {
 				if strings.HasPrefix(args[i], PrometheusConfigReloaderFlag) {
@@ -2884,6 +2909,10 @@ func (f *Factory) TelemeterClientDeployment(proxyCABundleCM *v1.ConfigMap, s *v1
 		case "telemeter-client":
 			d.Spec.Template.Spec.Containers[i].Image = f.config.Images.TelemeterClient
 
+			if f.config.ClusterMonitoringConfiguration.TelemeterClientConfig.Resources != nil {
+				d.Spec.Template.Spec.Containers[i].Resources = *f.config.ClusterMonitoringConfiguration.TelemeterClientConfig.Resources
+			}
+
 			if f.config.ClusterMonitoringConfiguration.TelemeterClientConfig.ClusterID != "" {
 				setContainerEnvironmentVariable(&d.Spec.Template.Spec.Containers[i], "ID", f.config.ClusterMonitoringConfiguration.TelemeterClientConfig.ClusterID)
 			}