From c218667896a9e90fe5538e6e014afa6c8dba2d88 Mon Sep 17 00:00:00 2001 From: Jayapriya Pai Date: Thu, 11 Apr 2024 10:33:20 +0530 Subject: [PATCH] OCPBUGS-31411: add runbook link for KubeAggregatedAPIErrors alert Signed-off-by: Jayapriya Pai --- assets/control-plane/prometheus-rule.yaml | 1 + jsonnet/utils/sanitize-rules.libsonnet | 1 + 2 files changed, 2 insertions(+) diff --git a/assets/control-plane/prometheus-rule.yaml b/assets/control-plane/prometheus-rule.yaml index 63decfcf6b..0e2ef27350 100644 --- a/assets/control-plane/prometheus-rule.yaml +++ b/assets/control-plane/prometheus-rule.yaml @@ -401,6 +401,7 @@ spec: - alert: KubeAggregatedAPIErrors annotations: description: Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace }} has reported errors. It has appeared unavailable {{ $value | humanize }} times averaged over the past 10m. + runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/KubeAggregatedAPIErrors.md summary: Kubernetes aggregated API has reported errors. expr: | sum by(name, namespace, cluster)(increase(aggregator_unavailable_apiservice_total{job="apiserver"}[10m])) > 4 diff --git a/jsonnet/utils/sanitize-rules.libsonnet b/jsonnet/utils/sanitize-rules.libsonnet index bc63f0936a..66f046cab1 100644 --- a/jsonnet/utils/sanitize-rules.libsonnet +++ b/jsonnet/utils/sanitize-rules.libsonnet @@ -474,6 +474,7 @@ local includeRunbooks = { AlertmanagerClusterFailedToSendAlerts: openShiftRunbookCMO('AlertmanagerClusterFailedToSendAlerts.md'), ClusterOperatorDegraded: openShiftRunbookCMO('ClusterOperatorDegraded.md'), ClusterOperatorDown: openShiftRunbookCMO('ClusterOperatorDown.md'), + KubeAggregatedAPIErrors: openShiftRunbookCMO('KubeAggregatedAPIErrors.md'), KubeAPIDown: openShiftRunbookCMO('KubeAPIDown.md'), KubeDeploymentReplicasMismatch: openShiftRunbookCMO('KubeDeploymentReplicasMismatch.md'), KubeJobFailed: openShiftRunbookCMO('KubeJobFailed.md'),