diff --git a/Documentation/api.md b/Documentation/api.md index c83763b450..f3364c9c42 100644 --- a/Documentation/api.md +++ b/Documentation/api.md @@ -25,6 +25,7 @@ Configuring Cluster Monitoring is optional. If the config does not exist or is e * [DedicatedServiceMonitors](#dedicatedservicemonitors) * [K8sPrometheusAdapter](#k8sprometheusadapter) * [KubeStateMetricsConfig](#kubestatemetricsconfig) +* [MetricsServerConfig](#metricsserverconfig) * [MonitoringPluginConfig](#monitoringpluginconfig) * [NodeExporterCollectorBuddyInfoConfig](#nodeexportercollectorbuddyinfoconfig) * [NodeExporterCollectorConfig](#nodeexportercollectorconfig) @@ -129,6 +130,7 @@ The `ClusterMonitoringConfiguration` resource defines settings that customize th | alertmanagerMain | *[AlertmanagerMainConfig](#alertmanagermainconfig) | `AlertmanagerMainConfig` defines settings for the Alertmanager component in the `openshift-monitoring` namespace. | | enableUserWorkload | *bool | `UserWorkloadEnabled` is a Boolean flag that enables monitoring for user-defined projects. | | k8sPrometheusAdapter | *[K8sPrometheusAdapter](#k8sprometheusadapter) | `K8sPrometheusAdapter` defines settings for the Prometheus Adapter component. | +| metricsServer | *[MetricsServerConfig](#metricsserverconfig) | `MetricsServer` defines settings for the MetricsServer component. | | kubeStateMetrics | *[KubeStateMetricsConfig](#kubestatemetricsconfig) | `KubeStateMetricsConfig` defines settings for the `kube-state-metrics` agent. | | prometheusK8s | *[PrometheusK8sConfig](#prometheusk8sconfig) | `PrometheusK8sConfig` defines settings for the Prometheus component. | | prometheusOperator | *[PrometheusOperatorConfig](#prometheusoperatorconfig) | `PrometheusOperatorConfig` defines settings for the Prometheus Operator component. | @@ -194,6 +196,24 @@ The `KubeStateMetricsConfig` resource defines settings for the `kube-state-metri [Back to TOC](#table-of-contents) +## MetricsServerConfig + +#### Description + +The `MetricsServerConfig` resource defines settings for the MetricsServer component. + + +appears in: [ClusterMonitoringConfiguration](#clustermonitoringconfiguration) + +| Property | Type | Description | +| -------- | ---- | ----------- | +| nodeSelector | map[string]string | Defines the nodes on which the pods are scheduled. | +| tolerations | [][v1.Toleration](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#toleration-v1-core) | Defines tolerations for the pods. | +| resources | *[v1.ResourceRequirements](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#resourcerequirements-v1-core) | Defines resource requests and limits for the Metrics Server container. | +| topologySpreadConstraints | []v1.TopologySpreadConstraint | Defines a pod's topology spread constraints. | + +[Back to TOC](#table-of-contents) + ## MonitoringPluginConfig #### Description diff --git a/Documentation/openshiftdocs/index.adoc b/Documentation/openshiftdocs/index.adoc index 093da80e02..e34342bdac 100644 --- a/Documentation/openshiftdocs/index.adoc +++ b/Documentation/openshiftdocs/index.adoc @@ -45,6 +45,7 @@ The configuration file itself is always defined under the `config.yaml` key in t * link:modules/dedicatedservicemonitors.adoc[DedicatedServiceMonitors] * link:modules/k8sprometheusadapter.adoc[K8sPrometheusAdapter] * link:modules/kubestatemetricsconfig.adoc[KubeStateMetricsConfig] +* link:modules/metricsserverconfig.adoc[MetricsServerConfig] * link:modules/monitoringpluginconfig.adoc[MonitoringPluginConfig] * link:modules/nodeexportercollectorbuddyinfoconfig.adoc[NodeExporterCollectorBuddyInfoConfig] * link:modules/nodeexportercollectorconfig.adoc[NodeExporterCollectorConfig] diff --git a/Documentation/openshiftdocs/modules/clustermonitoringconfiguration.adoc b/Documentation/openshiftdocs/modules/clustermonitoringconfiguration.adoc index 0553f82e4c..7121b1d81e 100644 --- a/Documentation/openshiftdocs/modules/clustermonitoringconfiguration.adoc +++ b/Documentation/openshiftdocs/modules/clustermonitoringconfiguration.adoc @@ -21,6 +21,8 @@ The `ClusterMonitoringConfiguration` resource defines settings that customize th |k8sPrometheusAdapter|*link:k8sprometheusadapter.adoc[K8sPrometheusAdapter]|`K8sPrometheusAdapter` defines settings for the Prometheus Adapter component. +|metricsServer|*link:metricsserverconfig.adoc[MetricsServerConfig]|`MetricsServer` defines settings for the MetricsServer component. + |kubeStateMetrics|*link:kubestatemetricsconfig.adoc[KubeStateMetricsConfig]|`KubeStateMetricsConfig` defines settings for the `kube-state-metrics` agent. |prometheusK8s|*link:prometheusk8sconfig.adoc[PrometheusK8sConfig]|`PrometheusK8sConfig` defines settings for the Prometheus component. diff --git a/Documentation/openshiftdocs/modules/metricsserverconfig.adoc b/Documentation/openshiftdocs/modules/metricsserverconfig.adoc new file mode 100644 index 0000000000..816afa9f8d --- /dev/null +++ b/Documentation/openshiftdocs/modules/metricsserverconfig.adoc @@ -0,0 +1,31 @@ +// DO NOT EDIT THE CONTENT IN THIS FILE. It is automatically generated from the + // source code for the Cluster Monitoring Operator. Any changes made to this + // file will be overwritten when the content is re-generated. If you wish to + // make edits, read the docgen utility instructions in the source code for the + // CMO. + :_content-type: ASSEMBLY + +== MetricsServerConfig + +=== Description + +The `MetricsServerConfig` resource defines settings for the MetricsServer component. + + + +Appears in: link:clustermonitoringconfiguration.adoc[ClusterMonitoringConfiguration] + +[options="header"] +|=== +| Property | Type | Description +|nodeSelector|map[string]string|Defines the nodes on which the pods are scheduled. + +|tolerations|[]v1.Toleration|Defines tolerations for the pods. + +|resources|*v1.ResourceRequirements|Defines resource requests and limits for the Metrics Server container. + +|topologySpreadConstraints|[]v1.TopologySpreadConstraint|Defines a pod's topology spread constraints. + +|=== + +link:../index.adoc[Back to TOC] diff --git a/assets/metrics-server/api-service.yaml b/assets/metrics-server/api-service.yaml new file mode 100644 index 0000000000..bbfef65f94 --- /dev/null +++ b/assets/metrics-server/api-service.yaml @@ -0,0 +1,20 @@ +apiVersion: apiregistration.k8s.io/v1 +kind: APIService +metadata: + annotations: + service.beta.openshift.io/inject-cabundle: "true" + labels: + app.kubernetes.io/component: metrics-server + app.kubernetes.io/managed-by: cluster-monitoring-operator + app.kubernetes.io/name: metrics-server + app.kubernetes.io/part-of: openshift-monitoring + name: v1beta1.metrics.k8s.io +spec: + group: metrics.k8s.io + groupPriorityMinimum: 100 + insecureSkipTLSVerify: false + service: + name: metrics-server + namespace: openshift-monitoring + version: v1beta1 + versionPriority: 100 diff --git a/assets/metrics-server/cluster-role-binding-auth-delegator.yaml b/assets/metrics-server/cluster-role-binding-auth-delegator.yaml new file mode 100644 index 0000000000..fad58afef1 --- /dev/null +++ b/assets/metrics-server/cluster-role-binding-auth-delegator.yaml @@ -0,0 +1,17 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/component: metrics-server + app.kubernetes.io/managed-by: cluster-monitoring-operator + app.kubernetes.io/name: auth-delegator + app.kubernetes.io/part-of: openshift-monitoring + name: metrics-server:system:auth-delegator +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: system:auth-delegator +subjects: +- kind: ServiceAccount + name: metrics-server + namespace: openshift-monitoring diff --git a/assets/metrics-server/cluster-role-binding.yaml b/assets/metrics-server/cluster-role-binding.yaml new file mode 100644 index 0000000000..fe5405295c --- /dev/null +++ b/assets/metrics-server/cluster-role-binding.yaml @@ -0,0 +1,16 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/managed-by: cluster-monitoring-operator + app.kubernetes.io/name: metrics-server + app.kubernetes.io/part-of: openshift-monitoring + name: system:metrics-server +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: system:metrics-server +subjects: +- kind: ServiceAccount + name: metrics-server + namespace: openshift-monitoring diff --git a/assets/metrics-server/cluster-role.yaml b/assets/metrics-server/cluster-role.yaml new file mode 100644 index 0000000000..19be5ca4b0 --- /dev/null +++ b/assets/metrics-server/cluster-role.yaml @@ -0,0 +1,25 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: metrics-server + app.kubernetes.io/managed-by: cluster-monitoring-operator + app.kubernetes.io/name: metrics-server + app.kubernetes.io/part-of: openshift-monitoring + name: system:metrics-server +rules: +- apiGroups: + - "" + resources: + - nodes/metrics + verbs: + - get +- apiGroups: + - "" + resources: + - pods + - nodes + verbs: + - get + - list + - watch diff --git a/assets/metrics-server/deployment.yaml b/assets/metrics-server/deployment.yaml new file mode 100644 index 0000000000..954ab85c5f --- /dev/null +++ b/assets/metrics-server/deployment.yaml @@ -0,0 +1,103 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app.kubernetes.io/component: metrics-server + app.kubernetes.io/managed-by: cluster-monitoring-operator + app.kubernetes.io/name: metrics-server + app.kubernetes.io/part-of: openshift-monitoring + name: metrics-server + namespace: openshift-monitoring +spec: + replicas: 2 + selector: + matchLabels: + app.kubernetes.io/component: metrics-server + app.kubernetes.io/name: metrics-server + app.kubernetes.io/part-of: openshift-monitoring + strategy: + rollingUpdate: + maxUnavailable: 1 + template: + metadata: + annotations: + target.workload.openshift.io/management: '{"effect": "PreferredDuringScheduling"}' + labels: + app.kubernetes.io/component: metrics-server + app.kubernetes.io/name: metrics-server + app.kubernetes.io/part-of: openshift-monitoring + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + app.kubernetes.io/component: metrics-server + app.kubernetes.io/name: metrics-server + app.kubernetes.io/part-of: openshift-monitoring + namespaces: + - openshift-monitoring + topologyKey: kubernetes.io/hostname + containers: + - args: + - --secure-port=10250 + - --kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname + - --kubelet-use-node-status-port + - --metric-resolution=15s + - --kubelet-certificate-authority=/etc/tls/kubelet-serving-ca-bundle/ca-bundle.crt + - --kubelet-client-certificate=/etc/tls/metrics-client-certs/tls.crt + - --kubelet-client-key=/etc/tls/metrics-client-certs/tls.key + - --tls-cert-file=/etc/tls/private/tls.crt + - --tls-private-key-file=/etc/tls/private/tls.key + - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305 + image: registry.k8s.io/metrics-server/metrics-server:v0.6.4 + imagePullPolicy: IfNotPresent + livenessProbe: + failureThreshold: 3 + httpGet: + path: /livez + port: https + scheme: HTTPS + periodSeconds: 10 + name: metrics-server + ports: + - containerPort: 10250 + name: https + protocol: TCP + readinessProbe: + failureThreshold: 3 + httpGet: + path: /readyz + port: https + scheme: HTTPS + initialDelaySeconds: 20 + periodSeconds: 10 + resources: + requests: + cpu: 1m + memory: 40Mi + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + runAsNonRoot: true + volumeMounts: + - mountPath: /etc/tls/private + name: secret-metrics-server-tls + - mountPath: /etc/tls/metrics-client-certs + name: secret-metrics-client-certs + - mountPath: /etc/tls/kubelet-serving-ca-bundle + name: configmap-kubelet-serving-ca-bundle + nodeSelector: + kubernetes.io/os: linux + priorityClassName: system-cluster-critical + serviceAccountName: metrics-server + volumes: + - name: secret-metrics-client-certs + secret: + secretName: metrics-client-certs + - name: secret-metrics-server-tls + secret: + secretName: metrics-server-tls + - configMap: + name: kubelet-serving-ca-bundle + name: configmap-kubelet-serving-ca-bundle diff --git a/assets/metrics-server/pod-disruption-budget.yaml b/assets/metrics-server/pod-disruption-budget.yaml new file mode 100644 index 0000000000..e76c97f9f9 --- /dev/null +++ b/assets/metrics-server/pod-disruption-budget.yaml @@ -0,0 +1,17 @@ +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + labels: + app.kubernetes.io/component: metrics-server + app.kubernetes.io/managed-by: cluster-monitoring-operator + app.kubernetes.io/name: metrics-server + app.kubernetes.io/part-of: openshift-monitoring + name: metrics-server + namespace: openshift-monitoring +spec: + minAvailable: 1 + selector: + matchLabels: + app.kubernetes.io/component: metrics-server + app.kubernetes.io/name: metrics-server + app.kubernetes.io/part-of: openshift-monitoring diff --git a/assets/metrics-server/role-binding-auth-reader.yaml b/assets/metrics-server/role-binding-auth-reader.yaml new file mode 100644 index 0000000000..6b11a238ce --- /dev/null +++ b/assets/metrics-server/role-binding-auth-reader.yaml @@ -0,0 +1,18 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + labels: + app.kubernetes.io/component: metrics-server + app.kubernetes.io/managed-by: cluster-monitoring-operator + app.kubernetes.io/name: metrics-server-auth-reader + app.kubernetes.io/part-of: openshift-monitoring + name: metrics-server-auth-reader + namespace: kube-system +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: extension-apiserver-authentication-reader +subjects: +- kind: ServiceAccount + name: metrics-server + namespace: openshift-monitoring diff --git a/assets/metrics-server/service-account.yaml b/assets/metrics-server/service-account.yaml new file mode 100644 index 0000000000..310685e790 --- /dev/null +++ b/assets/metrics-server/service-account.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/component: metrics-server + app.kubernetes.io/managed-by: cluster-monitoring-operator + app.kubernetes.io/name: metrics-server + app.kubernetes.io/part-of: openshift-monitoring + name: metrics-server + namespace: openshift-monitoring diff --git a/assets/metrics-server/service-monitor.yaml b/assets/metrics-server/service-monitor.yaml new file mode 100644 index 0000000000..e3a6761e4e --- /dev/null +++ b/assets/metrics-server/service-monitor.yaml @@ -0,0 +1,26 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + app.kubernetes.io/component: metrics-server + app.kubernetes.io/managed-by: cluster-monitoring-operator + app.kubernetes.io/name: metrics-server + app.kubernetes.io/part-of: openshift-monitoring + name: metrics-server + namespace: openshift-monitoring +spec: + endpoints: + - bearerTokenFile: "" + port: https + scheme: https + tlsConfig: + caFile: /etc/prometheus/configmaps/serving-certs-ca-bundle/service-ca.crt + certFile: /etc/prometheus/secrets/metrics-client-certs/tls.crt + insecureSkipVerify: false + keyFile: /etc/prometheus/secrets/metrics-client-certs/tls.key + serverName: metrics-server.openshift-monitoring.svc + selector: + matchLabels: + app.kubernetes.io/component: metrics-server + app.kubernetes.io/name: metrics-server + app.kubernetes.io/part-of: openshift-monitoring diff --git a/assets/metrics-server/service.yaml b/assets/metrics-server/service.yaml new file mode 100644 index 0000000000..c2d2d6c0e1 --- /dev/null +++ b/assets/metrics-server/service.yaml @@ -0,0 +1,21 @@ +apiVersion: v1 +kind: Service +metadata: + annotations: + service.beta.openshift.io/serving-cert-secret-name: metrics-server-tls + labels: + app.kubernetes.io/component: metrics-server + app.kubernetes.io/managed-by: cluster-monitoring-operator + app.kubernetes.io/name: metrics-server + app.kubernetes.io/part-of: openshift-monitoring + name: metrics-server + namespace: openshift-monitoring +spec: + ports: + - name: https + port: 443 + protocol: TCP + targetPort: https + selector: + app.kubernetes.io/name: metrics-server + app.kubernetes.io/part-of: openshift-monitoring diff --git a/jsonnet/components/metrics-server.libsonnet b/jsonnet/components/metrics-server.libsonnet new file mode 100644 index 0000000000..39fb5f594d --- /dev/null +++ b/jsonnet/components/metrics-server.libsonnet @@ -0,0 +1,358 @@ +function(params) { + local cfg = params, + + serviceAccount: { + apiVersion: 'v1', + kind: 'ServiceAccount', + metadata: { + labels: { + 'app.kubernetes.io/name': 'metrics-server', + 'app.kubernetes.io/component': 'metrics-server', + } + cfg.commonLabels, + name: 'metrics-server', + namespace: cfg.namespace, + }, + }, + clusterRole: { + apiVersion: 'rbac.authorization.k8s.io/v1', + kind: 'ClusterRole', + metadata: { + labels: { + 'app.kubernetes.io/name': 'metrics-server', + 'app.kubernetes.io/component': 'metrics-server', + } + cfg.commonLabels, + name: 'system:metrics-server', + }, + rules: [ + { + apiGroups: [''], + resources: ['nodes/metrics'], + verbs: ['get'], + }, + { + apiGroups: [''], + resources: ['pods', 'nodes'], + verbs: ['get', 'list', 'watch'], + }, + ], + }, + roleBindingAuthReader: { + apiVersion: 'rbac.authorization.k8s.io/v1', + kind: 'RoleBinding', + metadata: { + labels: { + 'app.kubernetes.io/name': 'metrics-server-auth-reader', + 'app.kubernetes.io/component': 'metrics-server', + } + cfg.commonLabels, + name: 'metrics-server-auth-reader', + namespace: 'kube-system', + }, + roleRef: { + apiGroup: 'rbac.authorization.k8s.io', + kind: 'Role', + name: 'extension-apiserver-authentication-reader', + }, + subjects: [ + { + kind: 'ServiceAccount', + name: 'metrics-server', + namespace: cfg.namespace, + }, + ], + }, + clusterRoleBindingAuthDelegator: { + apiVersion: 'rbac.authorization.k8s.io/v1', + kind: 'ClusterRoleBinding', + metadata: { + labels: { + 'app.kubernetes.io/name': 'auth-delegator', + 'app.kubernetes.io/component': 'metrics-server', + } + cfg.commonLabels, + name: 'metrics-server:system:auth-delegator', + }, + roleRef: { + apiGroup: 'rbac.authorization.k8s.io', + kind: 'ClusterRole', + name: 'system:auth-delegator', + }, + subjects: [ + { + kind: 'ServiceAccount', + name: 'metrics-server', + namespace: cfg.namespace, + }, + ], + }, + clusterRoleBinding: { + apiVersion: 'rbac.authorization.k8s.io/v1', + kind: 'ClusterRoleBinding', + metadata: { + labels: { + 'app.kubernetes.io/name': 'metrics-server', + } + cfg.commonLabels, + name: 'system:metrics-server', + }, + roleRef: { + apiGroup: 'rbac.authorization.k8s.io', + kind: 'ClusterRole', + name: 'system:metrics-server', + }, + subjects: [ + { + kind: 'ServiceAccount', + name: 'metrics-server', + namespace: cfg.namespace, + }, + ], + }, + service: { + apiVersion: 'v1', + kind: 'Service', + metadata: { + labels: { + 'app.kubernetes.io/name': 'metrics-server', + 'app.kubernetes.io/component': 'metrics-server', + } + cfg.commonLabels, + annotations: { + 'service.beta.openshift.io/serving-cert-secret-name': 'metrics-server-tls', + }, + name: 'metrics-server', + namespace: cfg.namespace, + }, + spec: { + ports: [ + { + name: 'https', + port: 443, + protocol: 'TCP', + targetPort: 'https', + }, + ], + selector: { + 'app.kubernetes.io/name': 'metrics-server', + } + cfg.commonLabels, + }, + }, + deployment: { + apiVersion: 'apps/v1', + kind: 'Deployment', + metadata: { + labels: { + 'app.kubernetes.io/name': 'metrics-server', + 'app.kubernetes.io/component': 'metrics-server', + } + cfg.commonLabels, + name: 'metrics-server', + namespace: cfg.namespace, + }, + spec: { + replicas: 2, + selector: { + matchLabels: { + 'app.kubernetes.io/name': 'metrics-server', + 'app.kubernetes.io/component': 'metrics-server', + } + cfg.commonLabels, + }, + strategy: { + rollingUpdate: { + maxUnavailable: 1, + }, + }, + template: { + metadata: { + labels: { + 'app.kubernetes.io/name': 'metrics-server', + 'app.kubernetes.io/component': 'metrics-server', + } + cfg.commonLabels, + }, + spec: { + affinity: { + podAntiAffinity: { + requiredDuringSchedulingIgnoredDuringExecution: [ + { + labelSelector: { + matchLabels: { + 'app.kubernetes.io/name': 'metrics-server', + 'app.kubernetes.io/component': 'metrics-server', + } + cfg.commonLabels, + }, + namespaces: [cfg.namespace], + topologyKey: 'kubernetes.io/hostname', + }, + ], + }, + }, + containers: [ + { + image: cfg.image, + args: [ + '--secure-port=10250', + '--kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname', + '--kubelet-use-node-status-port', + '--metric-resolution=15s', + '--kubelet-certificate-authority=/etc/tls/kubelet-serving-ca-bundle/ca-bundle.crt', + '--kubelet-client-certificate=/etc/tls/metrics-client-certs/tls.crt', + '--kubelet-client-key=/etc/tls/metrics-client-certs/tls.key', + '--tls-cert-file=/etc/tls/private/tls.crt', + '--tls-private-key-file=/etc/tls/private/tls.key', + '--tls-cipher-suites=' + cfg.tlsCipherSuites, + ], + imagePullPolicy: 'IfNotPresent', + livenessProbe: { + failureThreshold: 3, + httpGet: { + path: '/livez', + port: 'https', + scheme: 'HTTPS', + }, + periodSeconds: 10, + }, + name: 'metrics-server', + ports: [ + { + containerPort: 10250, + name: 'https', + protocol: 'TCP', + }, + ], + // metrics-server waits for 2 scrapes to report ready. + // The first one happens during boostrap and the second one after 15s (kubelet scrape interval) + // kubelet scrape interval is specified in metrics-server flag `--metric-resolution=15s` + readinessProbe: { + failureThreshold: 3, + httpGet: { + path: '/readyz', + port: 'https', + scheme: 'HTTPS', + }, + initialDelaySeconds: 20, + periodSeconds: 10, + }, + resources: { + requests: { + cpu: '1m', + memory: '40Mi', + }, + }, + securityContext: { + allowPrivilegeEscalation: false, + readOnlyRootFilesystem: true, + runAsNonRoot: true, + }, + volumeMounts: [ + { + mountPath: '/etc/tls/private', + name: 'secret-metrics-server-tls', + }, + { + mountPath: '/etc/tls/metrics-client-certs', + name: 'secret-metrics-client-certs', + }, + { + mountPath: '/etc/tls/kubelet-serving-ca-bundle', + name: 'configmap-kubelet-serving-ca-bundle', + }, + ], + }, + ], + nodeSelector: { + 'kubernetes.io/os': 'linux', + }, + priorityClassName: 'system-cluster-critical', + serviceAccountName: 'metrics-server', + volumes: [ + { + name: 'secret-metrics-client-certs', + secret: { + secretName: 'metrics-client-certs', + }, + }, + { + name: 'secret-metrics-server-tls', + secret: { + secretName: 'metrics-server-tls', + }, + }, + { + configMap: { + name: 'kubelet-serving-ca-bundle', + }, + name: 'configmap-kubelet-serving-ca-bundle', + }, + ], + }, + }, + }, + }, + podDisruptionBudget: { + apiVersion: 'policy/v1', + kind: 'PodDisruptionBudget', + metadata: { + labels: { + 'app.kubernetes.io/name': 'metrics-server', + 'app.kubernetes.io/component': 'metrics-server', + } + cfg.commonLabels, + name: 'metrics-server', + namespace: cfg.namespace, + }, + spec: { + minAvailable: 1, + selector: { + matchLabels: { + 'app.kubernetes.io/name': 'metrics-server', + 'app.kubernetes.io/component': 'metrics-server', + } + cfg.commonLabels, + }, + }, + }, + apiService: { + apiVersion: 'apiregistration.k8s.io/v1', + kind: 'APIService', + metadata: { + name: 'v1beta1.metrics.k8s.io', + labels: { + 'app.kubernetes.io/name': 'metrics-server', + 'app.kubernetes.io/component': 'metrics-server', + } + cfg.commonLabels, + annotations+: { + 'service.beta.openshift.io/inject-cabundle': 'true', + }, + }, + spec: { + service: { + name: $.service.metadata.name, + namespace: cfg.namespace, + }, + group: 'metrics.k8s.io', + version: 'v1beta1', + insecureSkipTLSVerify: false, + groupPriorityMinimum: 100, + versionPriority: 100, + }, + }, + serviceMonitor: { + apiVersion: 'monitoring.coreos.com/v1', + kind: 'ServiceMonitor', + metadata: { + labels: { + 'app.kubernetes.io/name': 'metrics-server', + 'app.kubernetes.io/component': 'metrics-server', + } + cfg.commonLabels, + name: 'metrics-server', + namespace: cfg.namespace, + }, + spec: { + endpoints: [ + { + port: 'https', + scheme: 'https', + }, + ], + selector: { + matchLabels: { + 'app.kubernetes.io/name': 'metrics-server', + 'app.kubernetes.io/component': 'metrics-server', + } + cfg.commonLabels, + }, + }, + }, +} diff --git a/jsonnet/main.jsonnet b/jsonnet/main.jsonnet index e44fb47af0..775352248f 100644 --- a/jsonnet/main.jsonnet +++ b/jsonnet/main.jsonnet @@ -12,6 +12,7 @@ local kubeStateMetrics = import './components/kube-state-metrics.libsonnet'; local controlPlane = import './components/control-plane.libsonnet'; local nodeExporter = import './components/node-exporter.libsonnet'; local prometheusAdapter = import './components/prometheus-adapter.libsonnet'; +local metricsServer = import './components/metrics-server.libsonnet'; local prometheusOperator = import './components/prometheus-operator.libsonnet'; local admissionWebhook = import './components/admission-webhook.libsonnet'; local prometheusOperatorUserWorkload = import './components/prometheus-operator-user-workload.libsonnet'; @@ -64,6 +65,7 @@ local commonConfig = { kubeStateMetrics: 'registry.k8s.io/kube-state-metrics/kube-state-metrics:v' + $.versions.kubeStateMetrics, nodeExporter: 'quay.io/prometheus/node-exporter:v' + $.versions.nodeExporter, prometheusAdapter: 'kubernetes-sigs/prometheus-adapter:v' + $.versions.prometheusAdapter, + kubernetesMetricsServer: 'registry.k8s.io/metrics-server/metrics-server:v' + $.versions.kubernetesMetricsServer, prometheusOperator: 'quay.io/prometheus-operator/prometheus-operator:v' + $.versions.prometheusOperator, prometheusOperatorReloader: 'quay.io/prometheus-operator/prometheus-config-reloader:v' + $.versions.prometheusOperator, prometheusOperatorAdmissionWebhook: 'quay.io/prometheus-operator/admission-webhook:v' + $.versions.prometheusOperator, @@ -248,6 +250,13 @@ local inCluster = prometheusAdapterMetricPrefix: $.values.common.prometheusAdapterMetricPrefix, containerQuerySelector: 'job="kubelet"', }, + metricsServer: { + namespace: $.values.common.namespace, + version: $.values.common.versions.kubernetesMetricsServer, + image: $.values.common.images.kubernetesMetricsServer, + commonLabels+: $.values.common.commonLabels, + tlsCipherSuites: $.values.common.tlsCipherSuites, + }, admissionWebhook: { name: 'prometheus-operator-admission-webhook', namespace: $.values.common.namespace, @@ -371,6 +380,7 @@ local inCluster = inCluster.openshiftStateMetrics.clusterRole.rules + inCluster.prometheusAdapter.clusterRole.rules + inCluster.prometheusAdapter.clusterRoleServerResources.rules + + inCluster.metricsServer.clusterRole.rules + inCluster.prometheus.clusterRole.rules + std.flatMap(function(role) role.rules, inCluster.prometheus.roleSpecificNamespaces.items) + @@ -388,6 +398,7 @@ local inCluster = nodeExporter: nodeExporter($.values.nodeExporter), prometheus: prometheus($.values.prometheus), prometheusAdapter: prometheusAdapter($.values.prometheusAdapter), + metricsServer: metricsServer($.values.metricsServer), admissionWebhook: admissionWebhook($.values.admissionWebhook), prometheusOperator: prometheusOperator($.values.prometheusOperator), controlPlane: controlPlane($.values.controlPlane), @@ -488,6 +499,7 @@ addLabels( { ['prometheus-operator-user-workload/' + name]: userWorkload.prometheusOperator[name] for name in std.objectFields(userWorkload.prometheusOperator) } + { ['prometheus-user-workload/' + name]: userWorkload.prometheus[name] for name in std.objectFields(userWorkload.prometheus) } + { ['prometheus-adapter/' + name]: inCluster.prometheusAdapter[name] for name in std.objectFields(inCluster.prometheusAdapter) } + + { ['metrics-server/' + name]: inCluster.metricsServer[name] for name in std.objectFields(inCluster.metricsServer) } + // needs to be removed once remote-write is allowed for sending telemetry { ['telemeter-client/' + name]: inCluster.telemeterClient[name] for name in std.objectFields(inCluster.telemeterClient) } + { ['monitoring-plugin/' + name]: inCluster.monitoringPlugin[name] for name in std.objectFields(inCluster.monitoringPlugin) } + diff --git a/manifests/0000_50_cluster-monitoring-operator_02-role.yaml b/manifests/0000_50_cluster-monitoring-operator_02-role.yaml index 44224dedfe..45f4d2e094 100644 --- a/manifests/0000_50_cluster-monitoring-operator_02-role.yaml +++ b/manifests/0000_50_cluster-monitoring-operator_02-role.yaml @@ -432,6 +432,21 @@ rules: - nodes/metrics verbs: - get +- apiGroups: + - "" + resources: + - pods + - nodes + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - nodes/metrics + verbs: + - get - nonResourceURLs: - /metrics verbs: diff --git a/manifests/0000_50_cluster-monitoring-operator_05-deployment-ibm-cloud-managed.yaml b/manifests/0000_50_cluster-monitoring-operator_05-deployment-ibm-cloud-managed.yaml index 3a061c363b..3a1b4978e2 100644 --- a/manifests/0000_50_cluster-monitoring-operator_05-deployment-ibm-cloud-managed.yaml +++ b/manifests/0000_50_cluster-monitoring-operator_05-deployment-ibm-cloud-managed.yaml @@ -49,6 +49,7 @@ spec: - -images=k8s-prometheus-adapter=quay.io/openshift/origin-k8s-prometheus-adapter:latest - -images=thanos=quay.io/openshift/origin-thanos:latest - -images=monitoring-plugin=quay.io/openshift/origin-monitoring-plugin:latest + - -images=kube-metrics-server=quay.io/openshift/origin-kube-metrics-server:latest env: - name: RELEASE_VERSION value: 0.0.1-snapshot diff --git a/manifests/0000_50_cluster-monitoring-operator_05-deployment.yaml b/manifests/0000_50_cluster-monitoring-operator_05-deployment.yaml index 072b47b3cf..c382192fa3 100644 --- a/manifests/0000_50_cluster-monitoring-operator_05-deployment.yaml +++ b/manifests/0000_50_cluster-monitoring-operator_05-deployment.yaml @@ -75,6 +75,7 @@ spec: - "-images=k8s-prometheus-adapter=quay.io/openshift/origin-k8s-prometheus-adapter:latest" - "-images=thanos=quay.io/openshift/origin-thanos:latest" - "-images=monitoring-plugin=quay.io/openshift/origin-monitoring-plugin:latest" + - "-images=kube-metrics-server=quay.io/openshift/origin-kube-metrics-server:latest" env: - name: RELEASE_VERSION value: "0.0.1-snapshot" diff --git a/pkg/manifests/config.go b/pkg/manifests/config.go index 865fd46865..7aff042124 100644 --- a/pkg/manifests/config.go +++ b/pkg/manifests/config.go @@ -127,6 +127,7 @@ func (c Config) GetThanosRulerAlertmanagerConfigs() []AdditionalAlertmanagerConf type Images struct { K8sPrometheusAdapter string + MetricsServer string PromLabelProxy string PrometheusOperatorAdmissionWebhook string PrometheusOperator string @@ -347,6 +348,7 @@ func (c *Config) SetImages(images map[string]string) { c.Images.TelemeterClient = images["telemeter-client"] c.Images.PromLabelProxy = images["prom-label-proxy"] c.Images.K8sPrometheusAdapter = images["k8s-prometheus-adapter"] + c.Images.MetricsServer = images["kube-metrics-server"] c.Images.OpenShiftStateMetrics = images["openshift-state-metrics"] c.Images.Thanos = images["thanos"] c.Images.MonitoringPlugin = images["monitoring-plugin"] diff --git a/pkg/manifests/manifests.go b/pkg/manifests/manifests.go index 6c0a743050..d3b06f7ae8 100644 --- a/pkg/manifests/manifests.go +++ b/pkg/manifests/manifests.go @@ -196,6 +196,17 @@ var ( PrometheusAdapterMinimalServiceMonitor = "prometheus-adapter/minimal-service-monitor.yaml" PrometheusAdapterServiceAccount = "prometheus-adapter/service-account.yaml" + MetricsServerAPIService = "metrics-server/api-service.yaml" + MetricsServerServiceAccount = "metrics-server/service-account.yaml" + MetricsServerClusterRole = "metrics-server/cluster-role.yaml" + MetricsServerClusterRoleBinding = "metrics-server/cluster-role-binding.yaml" + MetricsServerClusterRoleBindingAuthDelegator = "metrics-server/cluster-role-binding-auth-delegator.yaml" + MetricsServerRoleBindingAuthReader = "metrics-server/role-binding-auth-reader.yaml" + MetricsServerDeployment = "metrics-server/deployment.yaml" + MetricsServerService = "metrics-server/service.yaml" + MetricsServerServiceMonitor = "metrics-server/service-monitor.yaml" + MetricsServerPodDisruptionBudget = "metrics-server/pod-disruption-budget.yaml" + AdmissionWebhookRuleValidatingWebhook = "admission-webhook/prometheus-rule-validating-webhook.yaml" AdmissionWebhookAlertmanagerConfigValidatingWebhook = "admission-webhook/alertmanager-config-validating-webhook.yaml" AdmissionWebhookDeployment = "admission-webhook/deployment.yaml" @@ -308,6 +319,8 @@ var ( PrometheusOperatorWebTLSMinTLSVersionFlag = "--web.tls-min-version=" PrometheusAdapterTLSCipherSuitesFlag = "--tls-cipher-suites=" PrometheusAdapterTLSMinTLSVersionFlag = "--tls-min-version=" + MetricsServerTLSCipherSuitesFlag = "--tls-cipher-suites=" + MetricsServerTLSMinTLSVersionFlag = "--tls-min-version=" KubeRbacProxyTLSCipherSuitesFlag = "--tls-cipher-suites=" KubeRbacProxyMinTLSVersionFlag = "--tls-min-version=" @@ -2094,6 +2107,85 @@ func (f *Factory) PrometheusAdapterAPIService() (*apiregistrationv1.APIService, return f.NewAPIService(f.assets.MustNewAssetReader(PrometheusAdapterAPIService)) } +func (f *Factory) MetricsServerServiceAccount() (*v1.ServiceAccount, error) { + return f.NewServiceAccount(f.assets.MustNewAssetReader(MetricsServerServiceAccount)) +} + +func (f *Factory) MetricsServerClusterRole() (*rbacv1.ClusterRole, error) { + return f.NewClusterRole(f.assets.MustNewAssetReader(MetricsServerClusterRole)) +} + +func (f *Factory) MetricsServerClusterRoleBinding() (*rbacv1.ClusterRoleBinding, error) { + return f.NewClusterRoleBinding(f.assets.MustNewAssetReader(MetricsServerClusterRoleBinding)) +} + +func (f *Factory) MetricsServerClusterRoleBindingAuthDelegator() (*rbacv1.ClusterRoleBinding, error) { + return f.NewClusterRoleBinding(f.assets.MustNewAssetReader(MetricsServerClusterRoleBindingAuthDelegator)) +} + +func (f *Factory) MetricsServerRoleBindingAuthReader() (*rbacv1.RoleBinding, error) { + return f.NewRoleBinding(f.assets.MustNewAssetReader(MetricsServerRoleBindingAuthReader)) +} + +func (f *Factory) MetricsServerDeployment() (*appsv1.Deployment, error) { + dep, err := f.NewDeployment(f.assets.MustNewAssetReader(MetricsServerDeployment)) + if err != nil { + return nil, err + } + + podSpec := &dep.Spec.Template.Spec + containers := podSpec.Containers + idx := slices.IndexFunc(containers, containerNameEquals("metrics-server")) + if idx < 0 { + return nil, errors.Errorf( + "failed to find metrics-server container %q in deployment %q", + "metrics-server", MetricsServerDeployment) + } + + containers[idx].Image = f.config.Images.MetricsServer + containers[idx].Args = f.setTLSSecurityConfiguration(podSpec.Containers[0].Args, + MetricsServerTLSCipherSuitesFlag, MetricsServerTLSMinTLSVersionFlag) + + config := f.config.ClusterMonitoringConfiguration.MetricsServerConfig + if config == nil { + return dep, nil + } + + if len(config.NodeSelector) > 0 { + podSpec.NodeSelector = config.NodeSelector + } + + if len(config.Tolerations) > 0 { + podSpec.Tolerations = config.Tolerations + } + + if config.Resources != nil { + containers[idx].Resources = *config.Resources + } + + if len(config.TopologySpreadConstraints) > 0 { + podSpec.TopologySpreadConstraints = config.TopologySpreadConstraints + } + + return dep, nil +} + +func (f *Factory) MetricsServerPodDisruptionBudget() (*policyv1.PodDisruptionBudget, error) { + return f.NewPodDisruptionBudget(f.assets.MustNewAssetReader(MetricsServerPodDisruptionBudget)) +} + +func (f *Factory) MetricsServerService() (*v1.Service, error) { + return f.NewService(f.assets.MustNewAssetReader(MetricsServerService)) +} + +func (f *Factory) MetricsServerServiceMonitor() (*monv1.ServiceMonitor, error) { + return f.NewServiceMonitor(f.assets.MustNewAssetReader(MetricsServerServiceMonitor)) +} + +func (f *Factory) MetricsServerAPIService() (*apiregistrationv1.APIService, error) { + return f.NewAPIService(f.assets.MustNewAssetReader(MetricsServerAPIService)) +} + func (f *Factory) PrometheusOperatorServiceMonitor() (*monv1.ServiceMonitor, error) { return f.NewServiceMonitor(f.assets.MustNewAssetReader(PrometheusOperatorServiceMonitor)) } diff --git a/pkg/manifests/manifests_test.go b/pkg/manifests/manifests_test.go index 93eae51597..71a6a9571b 100644 --- a/pkg/manifests/manifests_test.go +++ b/pkg/manifests/manifests_test.go @@ -2705,6 +2705,77 @@ k8sPrometheusAdapter: } } +func TestMetricsServerConfiguration(t *testing.T) { + config := ` +metricsServer: + resources: + requests: + cpu: 100m + memory: 100Mi + limits: + cpu: 200m + memory: 200Mi + nodeSelector: + node: linux + tolerations: + - effect: PreferNoSchedule + operator: Exists` + + c, err := NewConfigFromString(config, true) + if err != nil { + t.Fatal(err) + } + + c.SetImages(map[string]string{ + "kube-metrics-server": "docker.io/openshift/origin-kube-metrics-server:latest", + }) + + f := NewFactory("openshift-monitoring", "openshift-user-workload-monitoring", c, defaultInfrastructureReader(), &fakeProxyReader{}, NewAssets(assetsPath), &APIServerConfig{}, &configv1.Console{}) + d, err := f.MetricsServerDeployment() + if err != nil { + t.Fatal(err) + } + + for _, container := range d.Spec.Template.Spec.Containers { + if container.Name == "metrics-server" { + if container.Image != "docker.io/openshift/origin-kube-metrics-server:latest" { + t.Fatal("metrics-server image is not configured correctly") + } + + if !reflect.DeepEqual(container.Resources, *f.config.ClusterMonitoringConfiguration.MetricsServerConfig.Resources) { + t.Fatal("metrics-server resources are not configured correctly") + } + } + } + + for _, tc := range []struct { + name string + want, got interface{} + }{ + { + name: "node selector", + want: map[string]string{"node": "linux"}, + got: d.Spec.Template.Spec.NodeSelector, + }, + { + name: "tolerations", + want: []v1.Toleration{ + { + Effect: "PreferNoSchedule", + Operator: "Exists", + }, + }, + got: d.Spec.Template.Spec.Tolerations, + }, + } { + t.Run(tc.name, func(t *testing.T) { + if !reflect.DeepEqual(tc.got, tc.want) { + t.Errorf("want %+v, got %+v", tc.want, tc.got) + } + }) + } +} + func TestAlertmanagerMainStartupProbe(t *testing.T) { for _, tc := range []struct { name string diff --git a/pkg/manifests/types.go b/pkg/manifests/types.go index 2ca2e50a65..3f608eb490 100644 --- a/pkg/manifests/types.go +++ b/pkg/manifests/types.go @@ -43,6 +43,8 @@ type ClusterMonitoringConfiguration struct { HTTPConfig *HTTPConfig `json:"http,omitempty"` // `K8sPrometheusAdapter` defines settings for the Prometheus Adapter component. K8sPrometheusAdapter *K8sPrometheusAdapter `json:"k8sPrometheusAdapter,omitempty"` + // `MetricsServer` defines settings for the MetricsServer component. + MetricsServerConfig *MetricsServerConfig `json:"metricsServer,omitempty"` // `KubeStateMetricsConfig` defines settings for the `kube-state-metrics` agent. KubeStateMetricsConfig *KubeStateMetricsConfig `json:"kubeStateMetrics,omitempty"` // `PrometheusK8sConfig` defines settings for the Prometheus component. @@ -119,6 +121,18 @@ type K8sPrometheusAdapter struct { DedicatedServiceMonitors *DedicatedServiceMonitors `json:"dedicatedServiceMonitors,omitempty"` } +// The `MetricsServerConfig` resource defines settings for the MetricsServer component. +type MetricsServerConfig struct { + // Defines the nodes on which the pods are scheduled. + NodeSelector map[string]string `json:"nodeSelector,omitempty"` + // Defines tolerations for the pods. + Tolerations []v1.Toleration `json:"tolerations,omitempty"` + // Defines resource requests and limits for the Metrics Server container. + Resources *v1.ResourceRequirements `json:"resources,omitempty"` + // Defines a pod's topology spread constraints. + TopologySpreadConstraints []v1.TopologySpreadConstraint `json:"topologySpreadConstraints,omitempty"` +} + // You can use the `DedicatedServiceMonitors` resource to configure dedicated // Service Monitors for the Prometheus Adapter type DedicatedServiceMonitors struct { diff --git a/pkg/operator/operator.go b/pkg/operator/operator.go index d6e7c924f4..9191c68aa9 100644 --- a/pkg/operator/operator.go +++ b/pkg/operator/operator.go @@ -749,7 +749,8 @@ func (o *Operator) sync(ctx context.Context, key string) error { newTaskSpec("NodeExporter", tasks.NewNodeExporterTask(o.client, factory)), newTaskSpec("KubeStateMetrics", tasks.NewKubeStateMetricsTask(o.client, factory)), newTaskSpec("OpenshiftStateMetrics", tasks.NewOpenShiftStateMetricsTask(o.client, factory)), - newTaskSpec("PrometheusAdapter", tasks.NewPrometheusAdapterTask(ctx, o.namespace, o.client, factory, config)), + newTaskSpec("PrometheusAdapter", tasks.NewPrometheusAdapterTask(ctx, o.namespace, o.client, !o.metricsServerEnabled, factory, config)), + newTaskSpec("MetricsServer", tasks.NewMetricsServerTask(ctx, o.namespace, o.client, o.metricsServerEnabled, factory, config)), newTaskSpec("TelemeterClient", tasks.NewTelemeterClientTask(o.client, factory, config)), newTaskSpec("ThanosQuerier", tasks.NewThanosQuerierTask(o.client, factory, config)), newTaskSpec("ControlPlaneComponents", tasks.NewControlPlaneTask(o.client, factory, config)), diff --git a/pkg/tasks/metricsserver.go b/pkg/tasks/metricsserver.go new file mode 100644 index 0000000000..4f09c5182b --- /dev/null +++ b/pkg/tasks/metricsserver.go @@ -0,0 +1,210 @@ +package tasks + +import ( + "context" + + "github.com/openshift/cluster-monitoring-operator/pkg/client" + "github.com/openshift/cluster-monitoring-operator/pkg/manifests" + "github.com/pkg/errors" + appsv1 "k8s.io/api/apps/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +type MetricsServerTask struct { + client *client.Client + enabled bool + ctx context.Context + factory *manifests.Factory + config *manifests.Config + namespace string +} + +func NewMetricsServerTask(ctx context.Context, namespace string, client *client.Client, metricsServerEnabled bool, factory *manifests.Factory, config *manifests.Config) *MetricsServerTask { + return &MetricsServerTask{ + client: client, + enabled: metricsServerEnabled, + factory: factory, + config: config, + namespace: namespace, + ctx: ctx, + } +} + +func (t *MetricsServerTask) Run(ctx context.Context) error { + if t.enabled { + return t.create(ctx) + } + return nil +} + +func (t *MetricsServerTask) create(ctx context.Context) error { + { + sa, err := t.factory.MetricsServerServiceAccount() + if err != nil { + return errors.Wrap(err, "initializing MetricsServer ServiceAccount failed") + } + + err = t.client.CreateOrUpdateServiceAccount(ctx, sa) + if err != nil { + return errors.Wrap(err, "reconciling MetricsServer ServiceAccount failed") + } + } + { + cr, err := t.factory.MetricsServerClusterRole() + if err != nil { + return errors.Wrap(err, "initializing metrics-server ClusterRolefailed") + } + + err = t.client.CreateOrUpdateClusterRole(ctx, cr) + if err != nil { + return errors.Wrap(err, "reconciling metrics-server ClusterRole failed") + } + } + { + crb, err := t.factory.MetricsServerClusterRoleBinding() + if err != nil { + return errors.Wrap(err, "initializing MetricsServer ClusterRoleBinding failed") + } + + err = t.client.CreateOrUpdateClusterRoleBinding(ctx, crb) + if err != nil { + return errors.Wrap(err, "reconciling MetricsServer ClusterRoleBinding failed") + } + } + { + crb, err := t.factory.MetricsServerClusterRoleBindingAuthDelegator() + if err != nil { + return errors.Wrap(err, "initializing metrics-server:system:auth-delegator ClusterRoleBinding failed") + } + + err = t.client.CreateOrUpdateClusterRoleBinding(ctx, crb) + if err != nil { + return errors.Wrap(err, "reconciling metrics-server:system:auth-delegator ClusterRoleBinding failed") + } + } + { + rb, err := t.factory.MetricsServerRoleBindingAuthReader() + if err != nil { + return errors.Wrap(err, "initializing metrics-server-auth-reader RoleBinding failed") + } + + err = t.client.CreateOrUpdateRoleBinding(ctx, rb) + if err != nil { + return errors.Wrap(err, "reconciling metrics-server-auth-reader RoleBinding failed") + } + } + { + s, err := t.factory.MetricsServerService() + if err != nil { + return errors.Wrap(err, "initializing MetricsServer Service failed") + } + + err = t.client.CreateOrUpdateService(ctx, s) + if err != nil { + return errors.Wrap(err, "reconciling MetricsServer Service failed") + } + } + { + dep, err := t.factory.MetricsServerDeployment() + if err != nil { + return errors.Wrap(err, "initializing MetricsServer Deployment failed") + } + + err = t.client.CreateOrUpdateDeployment(ctx, dep) + if err != nil { + return errors.Wrap(err, "reconciling MetricsServer Deployment failed") + } + } + { + sm, err := t.factory.MetricsServerServiceMonitor() + if err != nil { + return errors.Wrap(err, "initializing MetricsServer ServiceMonitors failed") + } + + err = t.client.CreateOrUpdateServiceMonitor(ctx, sm) + if err != nil { + return errors.Wrapf(err, "reconciling %s/%s ServiceMonitor failed", sm.Namespace, sm.Name) + } + } + { + pdb, err := t.factory.MetricsServerPodDisruptionBudget() + if err != nil { + return errors.Wrap(err, "initializing MetricsServer PodDisruptionBudget failed") + } + + if pdb != nil { + err = t.client.CreateOrUpdatePodDisruptionBudget(ctx, pdb) + if err != nil { + return errors.Wrap(err, "reconciling MetricsServer PodDisruptionBudget failed") + } + } + } + { + api, err := t.factory.MetricsServerAPIService() + if err != nil { + return errors.Wrap(err, "initializing MetricsServer APIService failed") + } + + err = t.client.CreateOrUpdateAPIService(ctx, api) + if err != nil { + return errors.Wrap(err, "reconciling MetricsServer APIService failed") + } + } + + return t.removePrometheusAdapterResources(ctx) +} + +func (t *MetricsServerTask) removePrometheusAdapterResources(ctx context.Context) error { + pa := NewPrometheusAdapterTask(ctx, t.namespace, t.client, false, t.factory, t.config) + d := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "prometheus-adapter", + Namespace: t.namespace, + }, + } + + { + pdb, err := pa.factory.PrometheusAdapterPodDisruptionBudget() + if err != nil { + return errors.Wrap(err, "initializing PrometheusAdapter PodDisruptionBudget failed") + } + + if pdb != nil { + err = pa.client.DeletePodDisruptionBudget(ctx, pdb) + if err != nil { + return errors.Wrap(err, "deleting PrometheusAdapter PodDisruptionBudget failed") + } + } + } + { + sm, err := pa.factory.PrometheusAdapterServiceMonitor() + if err != nil { + return errors.Wrap(err, "initializing PrometheusAdapter ServiceMonitors failed") + } + + err = pa.client.DeleteServiceMonitor(ctx, sm) + if err != nil { + return errors.Wrapf(err, "deleting %s/%s ServiceMonitor failed", sm.Namespace, sm.Name) + } + } + { + s, err := pa.factory.PrometheusAdapterService() + if err != nil { + return errors.Wrap(err, "initializing PrometheusAdapter Service failed") + } + + err = pa.client.DeleteService(ctx, s) + if err != nil { + return errors.Wrap(err, "deleting PrometheusAdapter Service failed") + } + } + { + err := pa.client.DeleteDeployment(ctx, d) + if err != nil { + return errors.Wrap(err, "deleting PrometheusAdapter Deployment failed") + } + } + + // TODO(slashpai): Add steps to remove other resources if any + return nil +} diff --git a/pkg/tasks/prometheusadapter.go b/pkg/tasks/prometheusadapter.go index dc148d340f..bbfd6f42f0 100644 --- a/pkg/tasks/prometheusadapter.go +++ b/pkg/tasks/prometheusadapter.go @@ -12,15 +12,17 @@ import ( type PrometheusAdapterTask struct { client *client.Client + enabled bool ctx context.Context factory *manifests.Factory config *manifests.Config namespace string } -func NewPrometheusAdapterTask(ctx context.Context, namespace string, client *client.Client, factory *manifests.Factory, config *manifests.Config) *PrometheusAdapterTask { +func NewPrometheusAdapterTask(ctx context.Context, namespace string, client *client.Client, enabled bool, factory *manifests.Factory, config *manifests.Config) *PrometheusAdapterTask { return &PrometheusAdapterTask{ client: client, + enabled: enabled, factory: factory, config: config, namespace: namespace, @@ -29,6 +31,13 @@ func NewPrometheusAdapterTask(ctx context.Context, namespace string, client *cli } func (t *PrometheusAdapterTask) Run(ctx context.Context) error { + if t.enabled { + return t.create(ctx) + } + return nil +} + +func (t *PrometheusAdapterTask) create(ctx context.Context) error { { cr, err := t.factory.PrometheusAdapterClusterRole() if err != nil {