From 0254c0e91dbde0c32075cc0a85f7e76c478f9d0e Mon Sep 17 00:00:00 2001 From: Dawud M <7688823+technowhizz@users.noreply.github.com> Date: Thu, 2 Feb 2023 14:41:21 +0000 Subject: [PATCH] Add 2m wait to some blackbox rules Adds a 2m wait to some blackbox rules to allow for temporary DNS failures to resolve. This also should stop false positives in the case of a probe timeout that gets resolved by the next probe. Also shows the status code on probe http failures. Co-authored-by: Will Szumski Change-Id: I13671d96eb838b702b8be3ceb5b43ba92adc56ef --- etc/kayobe/kolla/config/prometheus/blackbox.rules | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/etc/kayobe/kolla/config/prometheus/blackbox.rules b/etc/kayobe/kolla/config/prometheus/blackbox.rules index 0f0f72012..59e3247bc 100644 --- a/etc/kayobe/kolla/config/prometheus/blackbox.rules +++ b/etc/kayobe/kolla/config/prometheus/blackbox.rules @@ -7,7 +7,7 @@ groups: rules: - alert: BlackboxProbeFailed expr: probe_success == 0 - for: 0m + for: 2m labels: severity: critical annotations: @@ -25,12 +25,12 @@ groups: - alert: BlackboxProbeHttpFailure expr: probe_http_status_code <= 199 OR probe_http_status_code >= 400 - for: 0m + for: 2m labels: severity: critical annotations: summary: Blackbox probe HTTP failure (instance {{ $labels.instance }}) - description: "HTTP status code is not 200-399" + description: "HTTP status code is not 200-399. Blackbox exporter got status code: {{ $value }}" - alert: BlackboxSslCertificateWillExpireSoon expr: probe_ssl_earliest_cert_expiry - time() < 86400 * 30