From 4bd0c99452bf2af6060ec16c00e1f7918da582a0 Mon Sep 17 00:00:00 2001 From: Eliska Romanova Date: Thu, 25 Jul 2024 16:01:11 +0200 Subject: [PATCH] OBSDOCS-63: Update info re supported Alertmanager configurations --- ...ing-custom-alertmanager-configuration.adoc | 118 ---------------- ...fications-for-default-platform-alerts.adoc | 133 ++++++++++++++++++ ...otifications-for-user-defined-alerts.adoc} | 17 +-- ...on-monitoring-configuration-scenarios.adoc | 3 +- observability/monitoring/managing-alerts.adoc | 33 +++-- 5 files changed, 168 insertions(+), 136 deletions(-) delete mode 100644 modules/monitoring-applying-custom-alertmanager-configuration.adoc create mode 100644 modules/monitoring-configuring-notifications-for-default-platform-alerts.adoc rename modules/{monitoring-applying-a-custom-configuration-to-alertmanager-for-user-defined-alert-routing.adoc => monitoring-configuring-notifications-for-user-defined-alerts.adoc} (63%) diff --git a/modules/monitoring-applying-custom-alertmanager-configuration.adoc b/modules/monitoring-applying-custom-alertmanager-configuration.adoc deleted file mode 100644 index 1c1b1f54e9af..000000000000 --- a/modules/monitoring-applying-custom-alertmanager-configuration.adoc +++ /dev/null @@ -1,118 +0,0 @@ -// Module included in the following assemblies: -// -// * observability/monitoring/managing-alerts.adoc - -:_mod-docs-content-type: PROCEDURE -[id="applying-custom-alertmanager-configuration_{context}"] -= Applying a custom Alertmanager configuration - -You can overwrite the default Alertmanager configuration by editing the `alertmanager-main` secret in the `openshift-monitoring` namespace for the platform instance of Alertmanager. - -.Prerequisites - -* You have access to the cluster as a user with the `cluster-admin` cluster role. - -.Procedure - -To change the Alertmanager configuration from the CLI: - -. Print the currently active Alertmanager configuration into file `alertmanager.yaml`: -+ -[source,terminal] ----- -$ oc -n openshift-monitoring get secret alertmanager-main --template='{{ index .data "alertmanager.yaml" }}' | base64 --decode > alertmanager.yaml ----- -+ -. Edit the configuration in `alertmanager.yaml`: -+ -[source,yaml] ----- -global: - resolve_timeout: 5m -route: - group_wait: 30s <1> - group_interval: 5m <2> - repeat_interval: 12h <3> - receiver: default - routes: - - matchers: - - "alertname=Watchdog" - repeat_interval: 2m - receiver: watchdog - - matchers: - - "service=" <4> - routes: - - matchers: - - <5> - receiver: <6> -receivers: -- name: default -- name: watchdog -- name: -# ----- -<1> The `group_wait` value specifies how long Alertmanager waits before sending an initial notification for a group of alerts. -This value controls how long Alertmanager waits while collecting initial alerts for the same group before sending a notification. -<2> The `group_interval` value specifies how much time must elapse before Alertmanager sends a notification about new alerts added to a group of alerts for which an initial notification was already sent. -<3> The `repeat_interval` value specifies the minimum amount of time that must pass before an alert notification is repeated. -If you want a notification to repeat at each group interval, set the `repeat_interval` value to less than the `group_interval` value. -However, the repeated notification can still be delayed, for example, when certain Alertmanager pods are restarted or rescheduled. -<4> The `service` value specifies the service that fires the alerts. -<5> The `` value specifies the target alerts. -<6> The `receiver` value specifies the receiver to use for the alert. -+ -[NOTE] -==== -Use the `matchers` key name to indicate the matchers that an alert has to fulfill to match the node. -Do not use the `match` or `match_re` key names, which are both deprecated and planned for removal in a future release. - -In addition, if you define inhibition rules, use the `target_matchers` key name to indicate the target matchers and the `source_matchers` key name to indicate the source matchers. -Do not use the `target_match`, `target_match_re`, `source_match`, or `source_match_re` key names, which are deprecated and planned for removal in a future release. -==== -+ -The following Alertmanager configuration example configures PagerDuty as an alert receiver: -+ -[source,yaml] ----- -global: - resolve_timeout: 5m -route: - group_wait: 30s - group_interval: 5m - repeat_interval: 12h - receiver: default - routes: - - matchers: - - "alertname=Watchdog" - repeat_interval: 2m - receiver: watchdog - - matchers: - - "service=example-app" - routes: - - matchers: - - "severity=critical" - receiver: team-frontend-page* -receivers: -- name: default -- name: watchdog -- name: team-frontend-page - pagerduty_configs: - - service_key: "_your-key_" ----- -+ -With this configuration, alerts of `critical` severity that are fired by the `example-app` service are sent using the `team-frontend-page` receiver. Typically these types of alerts would be paged to an individual or a critical response team. -+ -. Apply the new configuration in the file: -+ -[source,terminal] ----- -$ oc -n openshift-monitoring create secret generic alertmanager-main --from-file=alertmanager.yaml --dry-run=client -o=yaml | oc -n openshift-monitoring replace secret --filename=- ----- - -To change the Alertmanager configuration from the {product-title} web console: - -. Go to the *Administration* -> *Cluster Settings* -> *Configuration* -> *Alertmanager* -> *YAML* page of the web console. - -. Modify the YAML configuration file. - -. Click *Save*. diff --git a/modules/monitoring-configuring-notifications-for-default-platform-alerts.adoc b/modules/monitoring-configuring-notifications-for-default-platform-alerts.adoc new file mode 100644 index 000000000000..10cce56f1dc4 --- /dev/null +++ b/modules/monitoring-configuring-notifications-for-default-platform-alerts.adoc @@ -0,0 +1,133 @@ +// Module included in the following assemblies: +// +// * observability/monitoring/managing-alerts.adoc + +:_mod-docs-content-type: PROCEDURE +[id="configuring-notifications-for-default-platform-alerts_{context}"] += Configuring notifications for default platform alerts + +You can configure Alertmanager to send notifications. Customize where and how Alertmanager sends notifications about default platform alerts by editing the default configuration in the `alertmanager-main` secret in the `openshift-monitoring` namespace. + +[IMPORTANT] +==== +Alertmanager does not send notifications by default. It is recommended to configure Alertmanager to receive notifications by setting up notifications details in the `alertmanager-main` secret configuration file. +==== + +.Prerequisites + +* You have access to the cluster as a user with the `cluster-admin` cluster role. + +.Procedure + +. Open the Alertmanager YAML configuration file: + +** To open the Alertmanager configuration from the CLI: + +.. Print the currently active Alertmanager configuration from the `alertmanager-main` secret into `alertmanager.yaml` file: ++ +[source,terminal] +---- +$ oc -n openshift-monitoring get secret alertmanager-main --template='{{ index .data "alertmanager.yaml" }}' | base64 --decode > alertmanager.yaml +---- + +.. Open the `alertmanager.yaml` file. + +** To open the Alertmanager configuration from the {product-title} web console: + +.. Go to the *Administration* -> *Cluster Settings* -> *Configuration* -> *Alertmanager* -> *YAML* page of the web console. + +. Edit the Alertmanager configuration by updating parameters in the YAML: ++ +[source,yaml] +---- +global: + resolve_timeout: 5m +route: + group_wait: 30s #<1> + group_interval: 5m #<2> + repeat_interval: 12h #<3> + receiver: default + routes: + - matchers: + - "alertname=Watchdog" + repeat_interval: 2m + receiver: watchdog + - matchers: + - "service=" #<4> + routes: + - matchers: + - #<5> + receiver: #<6> +receivers: +- name: default +- name: watchdog +- name: + #<7> +---- +<1> Specify how long Alertmanager waits while collecting initial alerts for a group of alerts before sending a notification. +<2> Specify how much time must elapse before Alertmanager sends a notification about new alerts added to a group of alerts for which an initial notification was already sent. +<3> Specify the minimum amount of time that must pass before an alert notification is repeated. +If you want a notification to repeat at each group interval, set the `repeat_interval` value to less than the `group_interval` value. +The repeated notification can still be delayed, for example, when certain Alertmanager pods are restarted or rescheduled. +<4> Specify the name of the service that fires the alerts. +<5> Specify labels to match your alerts. +<6> Specify the name of the receiver to use for the alerts. +<7> Specify the receiver configuration. ++ +[IMPORTANT] +==== +* Use the `matchers` key name to indicate the matchers that an alert has to fulfill to match the node. +Do not use the `match` or `match_re` key names, which are both deprecated and planned for removal in a future release. + +* If you define inhibition rules, use the following key names: ++ +-- +** `target_matchers`: to indicate the target matchers +** `source_matchers`: to indicate the source matchers +-- ++ +Do not use the `target_match`, `target_match_re`, `source_match`, or `source_match_re` key names, which are deprecated and planned for removal in a future release. +==== ++ +The following Alertmanager configuration example configures PagerDuty as an alert receiver: ++ +[source,yaml] +---- +global: + resolve_timeout: 5m +route: + group_wait: 30s + group_interval: 5m + repeat_interval: 12h + receiver: default + routes: + - matchers: + - "alertname=Watchdog" + repeat_interval: 2m + receiver: watchdog + - matchers: + - "service=example-app" + routes: + - matchers: + - "severity=critical" + receiver: team-frontend-page +receivers: +- name: default +- name: watchdog +- name: team-frontend-page + pagerduty_configs: + - service_key: "" +---- ++ +With this configuration, alerts of `critical` severity that are fired by the `example-app` service are sent through the `team-frontend-page` receiver. Typically, these types of alerts would be paged to an individual or a critical response team. + +. Apply the new configuration in the file: + +** To apply the changes from the CLI, run the following command: ++ +[source,terminal] +---- +$ oc -n openshift-monitoring create secret generic alertmanager-main --from-file=alertmanager.yaml --dry-run=client -o=yaml | oc -n openshift-monitoring replace secret --filename=- +---- + +** To apply the changes from the {product-title} web console, click *Save*. diff --git a/modules/monitoring-applying-a-custom-configuration-to-alertmanager-for-user-defined-alert-routing.adoc b/modules/monitoring-configuring-notifications-for-user-defined-alerts.adoc similarity index 63% rename from modules/monitoring-applying-a-custom-configuration-to-alertmanager-for-user-defined-alert-routing.adoc rename to modules/monitoring-configuring-notifications-for-user-defined-alerts.adoc index 23e26de10477..8f8c5a26477e 100644 --- a/modules/monitoring-applying-a-custom-configuration-to-alertmanager-for-user-defined-alert-routing.adoc +++ b/modules/monitoring-configuring-notifications-for-user-defined-alerts.adoc @@ -3,10 +3,10 @@ // * observability/monitoring/managing-alerts.adoc :_mod-docs-content-type: PROCEDURE -[id="applying-a-custom-configuration-to-alertmanager-for-user-defined-alert-routing_{context}"] -= Applying a custom configuration to Alertmanager for user-defined alert routing +[id="configuring-notifications-for-user-defined-alerts_{context}"] += Configuring notifications for user-defined alerts -If you have enabled a separate instance of Alertmanager dedicated to user-defined alert routing, you can overwrite the configuration for this instance of Alertmanager by editing the `alertmanager-user-workload` secret in the `openshift-user-workload-monitoring` namespace. +If you have enabled a separate instance of Alertmanager that is dedicated to user-defined alert routing, you can customize where and how the instance sends notifications by editing the `alertmanager-user-workload` secret in the `openshift-user-workload-monitoring` namespace. .Prerequisites @@ -37,15 +37,16 @@ route: - name: Default routes: - matchers: - - "service = prometheus-example-monitor" <1> - receiver: <2> + - "service = prometheus-example-monitor" #<1> + receiver: #<2> receivers: - name: Default - name: -# + #<3> ---- -<1> Specifies which alerts match the route. This example shows all alerts that have the `service="prometheus-example-monitor"` label. -<2> Specifies the receiver to use for the alerts group. +<1> Specify labels to match your alerts. This example targets all alerts that have the `service="prometheus-example-monitor"` label. +<2> Specify the name of the receiver to use for the alerts group. +<3> Specify the receiver configuration. + . Apply the new configuration in the file: + diff --git a/observability/monitoring/common-monitoring-configuration-scenarios.adoc b/observability/monitoring/common-monitoring-configuration-scenarios.adoc index 4f537c487d2a..a79c5bc7fbfe 100644 --- a/observability/monitoring/common-monitoring-configuration-scenarios.adoc +++ b/observability/monitoring/common-monitoring-configuration-scenarios.adoc @@ -28,6 +28,7 @@ Any other configuration options listed here are optional. * xref:../../observability/monitoring/configuring-the-monitoring-stack.adoc#creating-cluster-monitoring-configmap_configuring-the-monitoring-stack[Create the `cluster-monitoring-config` `ConfigMap` object] if it does not exist. * xref:../../observability/monitoring/managing-alerts.adoc#sending-notifications-to-external-systems_managing-alerts[Configure alert receivers] so that Alertmanager can send alerts to an external notification system such as email, Slack, or PagerDuty. +* xref:../../observability/monitoring/managing-alerts.adoc#configuring-notifications-for-default-platform-alerts_managing-alerts[Configure notifications for default platform alerts]. * For shorter term data retention, xref:../../observability/monitoring/configuring-the-monitoring-stack.adoc#configuring-persistent-storage_configuring-the-monitoring-stack[configure persistent storage] for Prometheus and Alertmanager to store metrics and alert data. Specify the metrics data retention parameters for Prometheus and Thanos Ruler. + @@ -70,7 +71,7 @@ Cluster administrators typically complete the following activities to configure * xref:../../observability/monitoring/enabling-alert-routing-for-user-defined-projects.adoc#enabling-alert-routing-for-user-defined-projects[Enable alert routing for user-defined projects] so that developers and other users can configure custom alerts and alert routing for their projects. * If needed, configure alert routing for user-defined projects to xref:../../observability/monitoring/enabling-alert-routing-for-user-defined-projects.adoc#enabling-a-separate-alertmanager-instance-for-user-defined-alert-routing_enabling-alert-routing-for-user-defined-projects[use an optional Alertmanager instance dedicated for use only by user-defined projects]. * xref:../../observability/monitoring/managing-alerts.adoc#configuring-different-alert-receivers-for-default-platform-alerts-and-user-defined-alerts_managing-alerts[Configure alert receivers] for user-defined projects. -* xref:../../observability/monitoring/managing-alerts.adoc#applying-a-custom-configuration-to-alertmanager-for-user-defined-alert-routing_managing-alerts[Apply a custom configuration to Alertmanager for user-defined alert routing]. +* xref:../../observability/monitoring/managing-alerts.adoc#configuring-notifications-for-user-defined-alerts_managing-alerts[Configure notifications for user-defined alerts]. After monitoring for user-defined projects is enabled and configured, developers and other non-administrator users can then perform the following activities to set up and use monitoring for their own projects: diff --git a/observability/monitoring/managing-alerts.adoc b/observability/monitoring/managing-alerts.adoc index 48ed03ee8063..c9a6a2843f24 100644 --- a/observability/monitoring/managing-alerts.adoc +++ b/observability/monitoring/managing-alerts.adoc @@ -101,20 +101,35 @@ include::modules/monitoring-configuring-different-alert-receivers-for-default-pl // Creating alert routing for user-defined projects include::modules/monitoring-creating-alert-routing-for-user-defined-projects.adoc[leveloffset=+2] -// Applying a custom Alertmanager configuration +[id="configuring-alertmanager-to-send-notifications"] +== Configuring Alertmanager to send notifications + +You can configure Alertmanager to send notifications by editing the ifndef::openshift-dedicated,openshift-rosa[] -include::modules/monitoring-applying-custom-alertmanager-configuration.adoc[leveloffset=+1] +`alertmanager-main` secret for default platform alerts or endif::openshift-dedicated,openshift-rosa[] +`alertmanager-user-workload` secret for user-defined alerts. -// Applying a custom configuration to Alertmanager for user-defined alert routing -include::modules/monitoring-applying-a-custom-configuration-to-alertmanager-for-user-defined-alert-routing.adoc[leveloffset=+1] +[NOTE] +==== +All features of a supported version of upstream Alertmanager are also supported in an OpenShift Alertmanager configuration. To check all the configuration options of a supported version of upstream Alertmanager, see link:https://prometheus.io/docs/alerting/0.26/configuration/[Alertmanager configuration]. +==== + +// Configuring notifications for default platform alerts +ifndef::openshift-dedicated,openshift-rosa[] +include::modules/monitoring-configuring-notifications-for-default-platform-alerts.adoc[leveloffset=+2] +endif::openshift-dedicated,openshift-rosa[] + +// Configuring notifications for user-defined alerts +include::modules/monitoring-configuring-notifications-for-user-defined-alerts.adoc[leveloffset=+2] [role="_additional-resources"] -.Additional resources +[id="additional-resources_configuring-alertmanager-to-send-notifications"] +== Additional resources -* See link:https://www.pagerduty.com/[the PagerDuty official site] for more information on PagerDuty. -* See link:https://www.pagerduty.com/docs/guides/prometheus-integration-guide/[the PagerDuty Prometheus Integration Guide] to learn how to retrieve the `service_key`. -* See link:https://prometheus.io/docs/alerting/configuration/[Alertmanager configuration] for configuring alerting through different alert receivers. +* link:https://www.pagerduty.com/[PagerDuty official site] +* link:https://www.pagerduty.com/docs/guides/prometheus-integration-guide/[PagerDuty Prometheus Integration Guide] +* xref:../../observability/monitoring/configuring-the-monitoring-stack.adoc#support-version-matrix-for-monitoring-components_configuring-the-monitoring-stack[Support version matrix for monitoring components] ifndef::openshift-rosa,openshift-dedicated[] -* See xref:../../observability/monitoring/enabling-alert-routing-for-user-defined-projects.adoc#enabling-alert-routing-for-user-defined-projects[Enabling alert routing for user-defined projects] to learn how to enable a dedicated instance of Alertmanager for user-defined alert routing. +* xref:../../observability/monitoring/enabling-alert-routing-for-user-defined-projects.adoc#enabling-alert-routing-for-user-defined-projects[Enabling alert routing for user-defined projects] endif::[]