Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cherry-pick #3204 after it has been inadvertently removed #3979

Merged
merged 1 commit into from
Mar 26, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions examples/alerts/alerts.md
Original file line number Diff line number Diff line change
Expand Up @@ -322,11 +322,12 @@ rules:
severity: critical
- alert: ThanosSidecarUnhealthy
annotations:
description: Thanos Sidecar {{$labels.instance}} is unhealthy for {{$value}} seconds.
description: Thanos Sidecar {{$labels.instance}} is unhealthy for more than {{$value}}
seconds.
runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarunhealthy
summary: Thanos Sidecar is unhealthy.
expr: |
time() - max by (job, instance) (thanos_sidecar_last_heartbeat_success_time_seconds{job=~".*thanos-sidecar.*"}) >= 600
time() - max by (job, instance) (timestamp(thanos_sidecar_last_heartbeat_success_time_seconds{job=~".*thanos-sidecar.*"})) >= 240
labels:
severity: critical
```
Expand Down
6 changes: 3 additions & 3 deletions examples/alerts/alerts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -306,12 +306,12 @@ groups:
severity: critical
- alert: ThanosSidecarUnhealthy
annotations:
description: Thanos Sidecar {{$labels.instance}} is unhealthy for {{$value}}
seconds.
description: Thanos Sidecar {{$labels.instance}} is unhealthy for more than
{{$value}} seconds.
runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarunhealthy
summary: Thanos Sidecar is unhealthy.
expr: |
time() - max by (job, instance) (thanos_sidecar_last_heartbeat_success_time_seconds{job=~".*thanos-sidecar.*"}) >= 600
time() - max by (job, instance) (timestamp(thanos_sidecar_last_heartbeat_success_time_seconds{job=~".*thanos-sidecar.*"})) >= 240
labels:
severity: critical
- name: thanos-store
Expand Down
142 changes: 47 additions & 95 deletions examples/alerts/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ tests:
- interval: 1m
input_series:
- series: 'thanos_sidecar_last_heartbeat_success_time_seconds{namespace="production", job="thanos-sidecar", instance="thanos-sidecar-0"}'
values: '5 10 43 17 11 0 0 0'
values: '5 10 43 17 11 _x5 0x10'
- series: 'thanos_sidecar_last_heartbeat_success_time_seconds{namespace="production", job="thanos-sidecar", instance="thanos-sidecar-1"}'
values: '4 9 42 15 10 0 0 0'
values: '4 9 42 15 10 _x5 0x10'
promql_expr_test:
- expr: time()
eval_time: 1m
Expand All @@ -22,112 +22,64 @@ tests:
exp_samples:
- labels: '{}'
value: 120
- expr: max(thanos_sidecar_last_heartbeat_success_time_seconds{job="thanos-sidecar"}) by (job, instance)
eval_time: 2m
exp_samples:
- labels: '{job="thanos-sidecar", instance="thanos-sidecar-0"}'
value: 43
- labels: '{job="thanos-sidecar", instance="thanos-sidecar-1"}'
value: 42
- expr: max(thanos_sidecar_last_heartbeat_success_time_seconds{job="thanos-sidecar"}) by (job, instance)
eval_time: 10m
exp_samples:
- labels: '{job="thanos-sidecar", instance="thanos-sidecar-0"}'
value: 0
- labels: '{job="thanos-sidecar", instance="thanos-sidecar-1"}'
value: 0
- expr: max(thanos_sidecar_last_heartbeat_success_time_seconds{job="thanos-sidecar"}) by (job, instance)
eval_time: 11m
- expr: time() - max by (job, instance) (timestamp(thanos_sidecar_last_heartbeat_success_time_seconds{job=~".*thanos-sidecar.*"}))
eval_time: 5m
exp_samples:
- labels: '{job="thanos-sidecar", instance="thanos-sidecar-0"}'
value: 0
value: 60
- labels: '{job="thanos-sidecar", instance="thanos-sidecar-1"}'
value: 0
- expr: time() - max(thanos_sidecar_last_heartbeat_success_time_seconds{job="thanos-sidecar"}) by (job, instance)
eval_time: 10m
value: 60
- expr: time() - max by (job, instance) (timestamp(thanos_sidecar_last_heartbeat_success_time_seconds{job=~".*thanos-sidecar.*"}))
eval_time: 6m
exp_samples:
- labels: '{job="thanos-sidecar", instance="thanos-sidecar-0"}'
value: 600
value: 120
- labels: '{job="thanos-sidecar", instance="thanos-sidecar-1"}'
value: 600
- expr: time() - max(thanos_sidecar_last_heartbeat_success_time_seconds{job="thanos-sidecar"}) by (job, instance)
eval_time: 11m
value: 120
- expr: time() - max by (job, instance) (timestamp(thanos_sidecar_last_heartbeat_success_time_seconds{job=~".*thanos-sidecar.*"}))
eval_time: 7m
exp_samples:
- labels: '{job="thanos-sidecar", instance="thanos-sidecar-0"}'
value: 660
value: 180
- labels: '{job="thanos-sidecar", instance="thanos-sidecar-1"}'
value: 660
- expr: time() - max(thanos_sidecar_last_heartbeat_success_time_seconds{job="thanos-sidecar"}) by (job, instance) >= 600
eval_time: 12m
value: 180
- expr: time() - max by (job, instance) (timestamp(thanos_sidecar_last_heartbeat_success_time_seconds{job=~".*thanos-sidecar.*"}))
eval_time: 8m
exp_samples:
- labels: '{job="thanos-sidecar", instance="thanos-sidecar-0"}'
value: 720
value: 240
- labels: '{job="thanos-sidecar", instance="thanos-sidecar-1"}'
value: 720
value: 240
alert_rule_test:
- eval_time: 1m
alertname: ThanosSidecarUnhealthy
- eval_time: 2m
alertname: ThanosSidecarUnhealthy
- eval_time: 3m
alertname: ThanosSidecarUnhealthy
- eval_time: 10m
alertname: ThanosSidecarUnhealthy
exp_alerts:
- exp_labels:
severity: critical
job: thanos-sidecar
instance: thanos-sidecar-0
exp_annotations:
description: 'Thanos Sidecar thanos-sidecar-0 is unhealthy for 600 seconds.'
runbook_url: 'https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarunhealthy'
summary: 'Thanos Sidecar is unhealthy.'
- exp_labels:
severity: critical
job: thanos-sidecar
instance: thanos-sidecar-1
exp_annotations:
description: 'Thanos Sidecar thanos-sidecar-1 is unhealthy for 600 seconds.'
runbook_url: 'https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarunhealthy'
summary: 'Thanos Sidecar is unhealthy.'
- eval_time: 11m
alertname: ThanosSidecarUnhealthy
exp_alerts:
- exp_labels:
severity: critical
job: thanos-sidecar
instance: thanos-sidecar-0
exp_annotations:
description: 'Thanos Sidecar thanos-sidecar-0 is unhealthy for 660 seconds.'
runbook_url: 'https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarunhealthy'
summary: 'Thanos Sidecar is unhealthy.'
- exp_labels:
severity: critical
job: thanos-sidecar
instance: thanos-sidecar-1
exp_annotations:
description: 'Thanos Sidecar thanos-sidecar-1 is unhealthy for 660 seconds.'
runbook_url: 'https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarunhealthy'
summary: 'Thanos Sidecar is unhealthy.'
- eval_time: 12m
alertname: ThanosSidecarUnhealthy
exp_alerts:
- exp_labels:
severity: critical
job: thanos-sidecar
instance: thanos-sidecar-0
exp_annotations:
description: 'Thanos Sidecar thanos-sidecar-0 is unhealthy for 720 seconds.'
runbook_url: 'https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarunhealthy'
summary: 'Thanos Sidecar is unhealthy.'
- exp_labels:
severity: critical
job: thanos-sidecar
instance: thanos-sidecar-1
exp_annotations:
description: 'Thanos Sidecar thanos-sidecar-1 is unhealthy for 720 seconds.'
runbook_url: 'https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarunhealthy'
summary: 'Thanos Sidecar is unhealthy.'
- eval_time: 1m
alertname: ThanosSidecarUnhealthy
- eval_time: 2m
alertname: ThanosSidecarUnhealthy
- eval_time: 3m
alertname: ThanosSidecarUnhealthy
- eval_time: 5m
alertname: ThanosSidecarUnhealthy
- eval_time: 8m
alertname: ThanosSidecarUnhealthy
exp_alerts:
- exp_labels:
severity: critical
job: thanos-sidecar
instance: thanos-sidecar-0
exp_annotations:
description: 'Thanos Sidecar thanos-sidecar-0 is unhealthy for more than 240 seconds.'
runbook_url: 'https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarunhealthy'
summary: 'Thanos Sidecar is unhealthy.'
- exp_labels:
severity: critical
job: thanos-sidecar
instance: thanos-sidecar-1
exp_annotations:
description: 'Thanos Sidecar thanos-sidecar-1 is unhealthy for more than 240 seconds.'
runbook_url: 'https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarunhealthy'
summary: 'Thanos Sidecar is unhealthy.'
- eval_time: 10m
alertname: ThanosSidecarUnhealthy
- interval: 1m
input_series:
- series: 'prometheus_rule_evaluations_total{namespace="production", job="thanos-ruler", instance="thanos-ruler-0"}'
Expand Down
4 changes: 2 additions & 2 deletions mixin/alerts/sidecar.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,11 @@
{
alert: 'ThanosSidecarUnhealthy',
annotations: {
description: 'Thanos Sidecar {{$labels.instance}}%s is unhealthy for {{$value}} seconds.' % location,
description: 'Thanos Sidecar {{$labels.instance}}%s is unhealthy for more than {{$value}} seconds.' % location,
summary: 'Thanos Sidecar is unhealthy.',
},
expr: |||
time() - max by (%(dimensions)s) (thanos_sidecar_last_heartbeat_success_time_seconds{%(selector)s}) >= 600
time() - max by (%(dimensions)s) (timestamp(thanos_sidecar_last_heartbeat_success_time_seconds{%(selector)s})) >= 240
||| % thanos.sidecar,
labels: {
severity: 'critical',
Expand Down
2 changes: 1 addition & 1 deletion mixin/runbook.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@
|---|---|---|---|---|
|ThanosSidecarPrometheusDown|Thanos Sidecar cannot connect to Prometheus|Thanos Sidecar {{$labels.instance}} cannot connect to Prometheus.|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarprometheusdown](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarprometheusdown)|
|ThanosSidecarBucketOperationsFailed|Thanos Sidecar bucket operations are failing|Thanos Sidecar {{$labels.instance}} bucket operations are failing|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarbucketoperationsfailed](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarbucketoperationsfailed)|
|ThanosSidecarUnhealthy|Thanos Sidecar is unhealthy.|Thanos Sidecar {{$labels.instance}} is unhealthy for {{$value}} seconds.|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarunhealthy](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarunhealthy)|
|ThanosSidecarUnhealthy|Thanos Sidecar is unhealthy.|Thanos Sidecar {{$labels.instance}} is unhealthy for more than {{$value}} seconds.|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarunhealthy](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarunhealthy)|

## thanos-store

Expand Down