Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HAproxy: add instance label to sums #406

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
258 changes: 128 additions & 130 deletions dist/rules/haproxy/embedded-exporter-v2.yml
Original file line number Diff line number Diff line change
@@ -1,131 +1,129 @@
groups:

- name: EmbeddedExporterV2

rules:

- alert: HaproxyHighHttp4xxErrorRateBackend
expr: '((sum by (proxy) (rate(haproxy_server_http_responses_total{code="4xx"}[1m])) / sum by (proxy) (rate(haproxy_server_http_responses_total[1m]))) * 100) > 5'
for: 1m
labels:
severity: critical
annotations:
summary: HAProxy high HTTP 4xx error rate backend (instance {{ $labels.instance }})
description: "Too many HTTP requests with status 4xx (> 5%) on backend {{ $labels.fqdn }}/{{ $labels.backend }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"

- alert: HaproxyHighHttp5xxErrorRateBackend
expr: '((sum by (proxy) (rate(haproxy_server_http_responses_total{code="5xx"}[1m])) / sum by (proxy) (rate(haproxy_server_http_responses_total[1m]))) * 100) > 5'
for: 1m
labels:
severity: critical
annotations:
summary: HAProxy high HTTP 5xx error rate backend (instance {{ $labels.instance }})
description: "Too many HTTP requests with status 5xx (> 5%) on backend {{ $labels.fqdn }}/{{ $labels.backend }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"

- alert: HaproxyHighHttp4xxErrorRateServer
expr: '((sum by (server) (rate(haproxy_server_http_responses_total{code="4xx"}[1m])) / sum by (server) (rate(haproxy_server_http_responses_total[1m]))) * 100) > 5'
for: 1m
labels:
severity: critical
annotations:
summary: HAProxy high HTTP 4xx error rate server (instance {{ $labels.instance }})
description: "Too many HTTP requests with status 4xx (> 5%) on server {{ $labels.server }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"

- alert: HaproxyHighHttp5xxErrorRateServer
expr: '((sum by (server) (rate(haproxy_server_http_responses_total{code="5xx"}[1m])) / sum by (server) (rate(haproxy_server_http_responses_total[1m]))) * 100) > 5'
for: 1m
labels:
severity: critical
annotations:
summary: HAProxy high HTTP 5xx error rate server (instance {{ $labels.instance }})
description: "Too many HTTP requests with status 5xx (> 5%) on server {{ $labels.server }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"

- alert: HaproxyServerResponseErrors
expr: '(sum by (server) (rate(haproxy_server_response_errors_total[1m])) / sum by (server) (rate(haproxy_server_http_responses_total[1m]))) * 100 > 5'
for: 1m
labels:
severity: critical
annotations:
summary: HAProxy server response errors (instance {{ $labels.instance }})
description: "Too many response errors to {{ $labels.server }} server (> 5%).\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"

- alert: HaproxyBackendConnectionErrors
expr: '(sum by (proxy) (rate(haproxy_backend_connection_errors_total[1m]))) > 100'
for: 1m
labels:
severity: critical
annotations:
summary: HAProxy backend connection errors (instance {{ $labels.instance }})
description: "Too many connection errors to {{ $labels.fqdn }}/{{ $labels.backend }} backend (> 100 req/s). Request throughput may be too high.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"

- alert: HaproxyServerConnectionErrors
expr: '(sum by (proxy) (rate(haproxy_server_connection_errors_total[1m]))) > 100'
for: 0m
labels:
severity: critical
annotations:
summary: HAProxy server connection errors (instance {{ $labels.instance }})
description: "Too many connection errors to {{ $labels.server }} server (> 100 req/s). Request throughput may be too high.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"

- alert: HaproxyBackendMaxActiveSession>80%
expr: '((haproxy_server_max_sessions >0) * 100) / (haproxy_server_limit_sessions > 0) > 80'
for: 2m
labels:
severity: warning
annotations:
summary: HAProxy backend max active session > 80% (instance {{ $labels.instance }})
description: "Session limit from backend {{ $labels.proxy }} to server {{ $labels.server }} reached 80% of limit - {{ $value | printf \"%.2f\"}}%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"

- alert: HaproxyPendingRequests
expr: 'sum by (proxy) (rate(haproxy_backend_current_queue[2m])) > 0'
for: 2m
labels:
severity: warning
annotations:
summary: HAProxy pending requests (instance {{ $labels.instance }})
description: "Some HAProxy requests are pending on {{ $labels.proxy }} - {{ $value | printf \"%.2f\"}}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"

- alert: HaproxyHttpSlowingDown
expr: 'avg by (instance, proxy) (haproxy_backend_max_total_time_seconds) > 1'
for: 1m
labels:
severity: warning
annotations:
summary: HAProxy HTTP slowing down (instance {{ $labels.instance }})
description: "Average request time is increasing - {{ $value | printf \"%.2f\"}}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"

- alert: HaproxyRetryHigh
expr: 'sum by (proxy) (rate(haproxy_backend_retry_warnings_total[1m])) > 10'
for: 2m
labels:
severity: warning
annotations:
summary: HAProxy retry high (instance {{ $labels.instance }})
description: "High rate of retry on {{ $labels.proxy }} - {{ $value | printf \"%.2f\"}}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"

- alert: HaproxyHasNoAliveBackends
expr: 'haproxy_backend_active_servers + haproxy_backend_backup_servers == 0'
for: 0m
labels:
severity: critical
annotations:
summary: HAproxy has no alive backends (instance {{ $labels.instance }})
description: "HAProxy has no alive active or backup backends for {{ $labels.proxy }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"

- alert: HaproxyFrontendSecurityBlockedRequests
expr: 'sum by (proxy) (rate(haproxy_frontend_denied_connections_total[2m])) > 10'
for: 2m
labels:
severity: warning
annotations:
summary: HAProxy frontend security blocked requests (instance {{ $labels.instance }})
description: "HAProxy is blocking requests for security reason\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"

- alert: HaproxyServerHealthcheckFailure
expr: 'increase(haproxy_server_check_failures_total[1m]) > 0'
for: 1m
labels:
severity: warning
annotations:
summary: HAProxy server healthcheck failure (instance {{ $labels.instance }})
description: "Some server healthcheck are failing on {{ $labels.server }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- name: EmbeddedExporterV2

rules:
- alert: HaproxyHighHttp4xxErrorRateBackend
expr: '((sum by (proxy, instance) (rate(haproxy_server_http_responses_total{code="4xx"}[1m])) / sum by (proxy, instance) (rate(haproxy_server_http_responses_total[1m]))) * 100) > 5'
for: 1m
labels:
severity: critical
annotations:
summary: HAProxy high HTTP 4xx error rate backend (instance {{ $labels.instance }})
description: "Too many HTTP requests with status 4xx (> 5%) on backend {{ $labels.fqdn }}/{{ $labels.backend }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"

- alert: HaproxyHighHttp5xxErrorRateBackend
expr: '((sum by (proxy, instance) (rate(haproxy_server_http_responses_total{code="5xx"}[1m])) / sum by (proxy, instance) (rate(haproxy_server_http_responses_total[1m]))) * 100) > 5'
for: 1m
labels:
severity: critical
annotations:
summary: HAProxy high HTTP 5xx error rate backend (instance {{ $labels.instance }})
description: "Too many HTTP requests with status 5xx (> 5%) on backend {{ $labels.fqdn }}/{{ $labels.backend }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"

- alert: HaproxyHighHttp4xxErrorRateServer
expr: '((sum by (server, instance) (rate(haproxy_server_http_responses_total{code="4xx"}[1m])) / sum by (server, instance) (rate(haproxy_server_http_responses_total[1m]))) * 100) > 5'
for: 1m
labels:
severity: critical
annotations:
summary: HAProxy high HTTP 4xx error rate server (instance {{ $labels.instance }})
description: "Too many HTTP requests with status 4xx (> 5%) on server {{ $labels.server }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"

- alert: HaproxyHighHttp5xxErrorRateServer
expr: '((sum by (server, instance) (rate(haproxy_server_http_responses_total{code="5xx"}[1m])) / sum by (server, instance) (rate(haproxy_server_http_responses_total[1m]))) * 100) > 5'
for: 1m
labels:
severity: critical
annotations:
summary: HAProxy high HTTP 5xx error rate server (instance {{ $labels.instance }})
description: "Too many HTTP requests with status 5xx (> 5%) on server {{ $labels.server }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"

- alert: HaproxyServerResponseErrors
expr: "(sum by (server, instance) (rate(haproxy_server_response_errors_total[1m])) / sum by (server, instance) (rate(haproxy_server_http_responses_total[1m]))) * 100 > 5"
for: 1m
labels:
severity: critical
annotations:
summary: HAProxy server response errors (instance {{ $labels.instance }})
description: "Too many response errors to {{ $labels.server }} server (> 5%).\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"

- alert: HaproxyBackendConnectionErrors
expr: "(sum by (proxy, instance) (rate(haproxy_backend_connection_errors_total[1m]))) > 100"
for: 1m
labels:
severity: critical
annotations:
summary: HAProxy backend connection errors (instance {{ $labels.instance }})
description: "Too many connection errors to {{ $labels.fqdn }}/{{ $labels.backend }} backend (> 100 req/s). Request throughput may be too high.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"

- alert: HaproxyServerConnectionErrors
expr: "(sum by (proxy, instance) (rate(haproxy_server_connection_errors_total[1m]))) > 100"
for: 0m
labels:
severity: critical
annotations:
summary: HAProxy server connection errors (instance {{ $labels.instance }})
description: "Too many connection errors to {{ $labels.server }} server (> 100 req/s). Request throughput may be too high.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"

- alert: HaproxyBackendMaxActiveSession>80%
expr: "((haproxy_server_max_sessions >0) * 100) / (haproxy_server_limit_sessions > 0) > 80"
for: 2m
labels:
severity: warning
annotations:
summary: HAProxy backend max active session > 80% (instance {{ $labels.instance }})
description: "Session limit from backend {{ $labels.proxy }} to server {{ $labels.server }} reached 80% of limit - {{ $value | printf \"%.2f\"}}%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"

- alert: HaproxyPendingRequests
expr: "sum by (proxy, instance) (rate(haproxy_backend_current_queue[2m])) > 0"
for: 2m
labels:
severity: warning
annotations:
summary: HAProxy pending requests (instance {{ $labels.instance }})
description: "Some HAProxy requests are pending on {{ $labels.proxy }} - {{ $value | printf \"%.2f\"}}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"

- alert: HaproxyHttpSlowingDown
expr: "avg by (instance, proxy) (haproxy_backend_max_total_time_seconds) > 1"
for: 1m
labels:
severity: warning
annotations:
summary: HAProxy HTTP slowing down (instance {{ $labels.instance }})
description: "Average request time is increasing - {{ $value | printf \"%.2f\"}}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"

- alert: HaproxyRetryHigh
expr: "sum by (proxy, instance) (rate(haproxy_backend_retry_warnings_total[1m])) > 10"
for: 2m
labels:
severity: warning
annotations:
summary: HAProxy retry high (instance {{ $labels.instance }})
description: "High rate of retry on {{ $labels.proxy }} - {{ $value | printf \"%.2f\"}}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"

- alert: HaproxyHasNoAliveBackends
expr: "haproxy_backend_active_servers + haproxy_backend_backup_servers == 0"
for: 0m
labels:
severity: critical
annotations:
summary: HAproxy has no alive backends (instance {{ $labels.instance }})
description: "HAProxy has no alive active or backup backends for {{ $labels.proxy }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"

- alert: HaproxyFrontendSecurityBlockedRequests
expr: "sum by (proxy, instance) (rate(haproxy_frontend_denied_connections_total[2m])) > 10"
for: 2m
labels:
severity: warning
annotations:
summary: HAProxy frontend security blocked requests (instance {{ $labels.instance }})
description: "HAProxy is blocking requests for security reason\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"

- alert: HaproxyServerHealthcheckFailure
expr: "increase(haproxy_server_check_failures_total[1m]) > 0"
for: 1m
labels:
severity: warning
annotations:
summary: HAProxy server healthcheck failure (instance {{ $labels.instance }})
description: "Some server healthcheck are failing on {{ $labels.server }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"