Skip to content
This repository has been archived by the owner on Apr 2, 2024. It is now read-only.

Commit

Permalink
Rules+Alert config w/ Alertmanager & cleanup
Browse files Browse the repository at this point in the history
- Recording Rules and Alert example for normal and HA
- Alertmanager for normal and HA
- Fixed up HA to match HA in a bunch of ways
  - scrape_configs
  - database (timescale -> postgres)
  - password
  - db args -> connstring
  • Loading branch information
jamessewell authored and niksajakovljevic committed May 18, 2022
1 parent 022ccba commit 73bae80
Show file tree
Hide file tree
Showing 7 changed files with 86 additions and 24 deletions.
10 changes: 10 additions & 0 deletions docker-compose/alerts.yml
@@ -0,0 +1,10 @@
groups:
- name: alerts
rules:
- alert: Watchdog
annotations:
description: >
This is a Watchdog alert is meant to ensure that the entire Alerting
pipeline is functional. It is always firing in normal operation
summary: Alerting Watchdog
expr: vector(1)
10 changes: 10 additions & 0 deletions docker-compose/docker-compose.yaml
Expand Up @@ -18,6 +18,11 @@ services:
volumes:
- ${PWD}/prometheus.yml:/etc/prometheus/prometheus.yml

alertmanager:
image: prom/alertmanager:latest
ports:
- 9093:9093/tcp

promscale:
image: timescale/promscale:latest
ports:
Expand All @@ -26,12 +31,17 @@ services:
restart: on-failure
depends_on:
- db
volumes:
- ${PWD}/promscale_prometheus.yml:/prometheus.yml
- ${PWD}/rules.yml:/rules.yml
- ${PWD}/alerts.yml:/alerts.yml
environment:
PROMSCALE_WEB_TELEMETRY_PATH: /metrics-text
PROMSCALE_DB_URI: postgres://postgres:password@db:5432/postgres?sslmode=allow
PROMSCALE_TRACING_OTLP_SERVER_ADDRESS: ":9202"
PROMSCALE_TELEMETRY_TRACE_JAEGER_ENDPOINT: "http://otel-collector:14268/api/traces"
PROMSCALE_TELEMETRY_TRACE_SAMPLING_RATIO: "0.1"
PROMSCALE_METRICS_RULES_CONFIG_FILE: /prometheus.yml

otel-collector:
platform: linux/amd64
Expand Down
34 changes: 22 additions & 12 deletions docker-compose/high-availability/docker-compose.yaml
Expand Up @@ -6,9 +6,8 @@ services:
ports:
- 5432:5432/tcp
environment:
POSTGRES_PASSWORD: postgres
POSTGRES_PASSWORD: password
POSTGRES_USER: postgres
POSTGRES_DB: timescale

prometheus1:
image: prom/prometheus:latest
Expand All @@ -24,6 +23,11 @@ services:
volumes:
- ./prometheus2.yml:/etc/prometheus/prometheus.yml:ro

alertmanager:
image: prom/alertmanager:latest
ports:
- 9093:9093/tcp

promscale-connector1:
image: timescale/promscale:latest
ports:
Expand All @@ -32,29 +36,35 @@ services:
depends_on:
- db
- prometheus1
volumes:
- ${PWD}/../promscale_prometheus.yml:/prometheus.yml
- ${PWD}/../rules.yml:/rules.yml
- ${PWD}/../alerts.yml:/alerts.yml
environment:
PROMSCALE_METRICS_HIGH_AVAILABILITY: true
PROMSCALE_DB_CONNECT_RETRIES: 10
PROMSCALE_DB_HOST: db
PROMSCALE_DB_PASSWORD: postgres
PROMSCALE_DB_URI: postgres://postgres:password@db:5432/postgres?sslmode=allow
PROMSCALE_WEB_TELEMETRY_PATH: /metrics-text
PROMSCALE_DB_SSL_MODE: allow
PROMSCALE_METRICS_RULES_CONFIG_FILE: /prometheus.yml

promscale-connector2:
image: timescale/promscale:latest
ports:
- 9202:9201/tcp
build:
context: .
restart: on-failure
depends_on:
- db
- prometheus2
volumes:
- ${PWD}/../promscale_prometheus.yml:/prometheus.yml
- ${PWD}/../rules.yml:/rules.yml
- ${PWD}/../alerts.yml:/alerts.yml
environment:
PROMSCALE_METRICS_HIGH_AVAILABILITY: true
PROMSCALE_DB_CONNECT_RETRIES: 10
PROMSCALE_DB_HOST: db
PROMSCALE_DB_PASSWORD: postgres
PROMSCALE_DB_URI: postgres://postgres:password@db:5432/postgres?sslmode=allow
PROMSCALE_WEB_TELEMETRY_PATH: /metrics-text
PROMSCALE_DB_SSL_MODE: allow
PROMSCALE_METRICS_RULES_CONFIG_FILE: /prometheus.yml

node_exporter:
image: quay.io/prometheus/node-exporter
ports:
- "9100:9100"
15 changes: 10 additions & 5 deletions docker-compose/high-availability/prometheus1.yml
Expand Up @@ -25,10 +25,15 @@ remote_read:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: 'prometheus'

# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.

- job_name: prometheus
static_configs:
- targets: ['localhost:9090']
- job_name: node-exporter
static_configs:
- targets: ['node_exporter:9100']
- job_name: promscale
metrics_path: '/metrics-text'
static_configs:
- targets:
- 'promscale-connector1:9201'
- 'promscale-connector2:9201'
18 changes: 11 additions & 7 deletions docker-compose/high-availability/prometheus2.yml
Expand Up @@ -21,14 +21,18 @@ remote_write:
remote_read:
- url: "http://promscale-connector2:9201/read"

# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: 'prometheus'

# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.

- job_name: prometheus
static_configs:
- targets: ['localhost:9090']
- job_name: node-exporter
static_configs:
- targets: ['node_exporter:9100']
- job_name: promscale
metrics_path: '/metrics-text'
static_configs:
- targets:
- 'promscale-connector1:9201'
- 'promscale-connector2:9201'

16 changes: 16 additions & 0 deletions docker-compose/promscale_prometheus.yml
@@ -0,0 +1,16 @@
# Rules and alerts are read from the specified file(s)
rule_files:
- rules.yml
- alerts.yml

# Alerting specifies settings related to the Alertmanager
alerting:
alert_relabel_configs:
- replacement: "production"
target_label: "env"
action: "replace"
alertmanagers:
- static_configs:
- targets:
# Alertmanager's default port is 9093
- alertmanager:9093
7 changes: 7 additions & 0 deletions docker-compose/rules.yml
@@ -0,0 +1,7 @@
groups:
- name: rules
rules:
- record: instance_cpu:node_cpu_seconds_not_idle:rate5m
expr: >
sum(rate(node_cpu_seconds_total{mode!="idle"}[5m]))
without (mode,cpu)

0 comments on commit 73bae80

Please sign in to comment.