From 10cedc9220fcc3718e06db9252e052fb6e291ee1 Mon Sep 17 00:00:00 2001 From: Will Szumski Date: Tue, 29 Nov 2022 14:59:33 +0000 Subject: [PATCH 1/2] Fix templating issue Kolla doesn't template these rules files: https://opendev.org/openstack/kolla-ansible/src/branch/master/ansible/roles/prometheus/tasks/config.yml#L42-L56 --- etc/kayobe/kolla/config/prometheus/system.rules | 2 +- etc/kayobe/kolla/globals.yml | 3 --- etc/kayobe/stackhpc-monitoring.yml | 11 +++++++++++ 3 files changed, 12 insertions(+), 4 deletions(-) create mode 100644 etc/kayobe/stackhpc-monitoring.yml diff --git a/etc/kayobe/kolla/config/prometheus/system.rules b/etc/kayobe/kolla/config/prometheus/system.rules index a8fb59aaac..fe3a2b9acb 100644 --- a/etc/kayobe/kolla/config/prometheus/system.rules +++ b/etc/kayobe/kolla/config/prometheus/system.rules @@ -16,7 +16,7 @@ groups: description: "{{ $labels.device }} is {{ $value }}% full." - alert: LowMemory - expr: (node_memory_MemAvailable_bytes / 1024^3) < {{ alertmanager_low_memory_threshold_gib }} + expr: (node_memory_MemAvailable_bytes / 1024^3) < {% endraw %}{{ alertmanager_low_memory_threshold_gib }}{% raw %} for: 1m labels: severity: alert diff --git a/etc/kayobe/kolla/globals.yml b/etc/kayobe/kolla/globals.yml index c740e0e652..774da38baa 100644 --- a/etc/kayobe/kolla/globals.yml +++ b/etc/kayobe/kolla/globals.yml @@ -12,8 +12,5 @@ bifrost_tag: xena-20221128T101757 es_heap_size: 8g prometheus_cmdline_extras: "--storage.tsdb.retention.time=30d" -# Threshold to trigger a LowMemory alert in Gibibytes (GiB). When the amount -# of free memory is lower than this value an alert will be triggered. -alertmanager_low_memory_threshold_gib: 5 ############################################################################# diff --git a/etc/kayobe/stackhpc-monitoring.yml b/etc/kayobe/stackhpc-monitoring.yml new file mode 100644 index 0000000000..43f1f309f4 --- /dev/null +++ b/etc/kayobe/stackhpc-monitoring.yml @@ -0,0 +1,11 @@ +--- +# StackHPC monitoring configuration + +############################################################################### +# Alert configuration + +# Threshold to trigger a LowMemory alert in Gibibytes (GiB). When the amount +# of free memory is lower than this value an alert will be triggered. +alertmanager_low_memory_threshold_gib: 5 + +############################################################################### From 6f9cbaf7a38feaf16904e839ba83b76d02a40c9a Mon Sep 17 00:00:00 2001 From: Will Szumski Date: Tue, 29 Nov 2022 15:13:12 +0000 Subject: [PATCH 2/2] Docs for monitoring configuration --- doc/source/configuration/index.rst | 1 + doc/source/configuration/monitoring.rst | 13 +++++++++++++ 2 files changed, 14 insertions(+) create mode 100644 doc/source/configuration/monitoring.rst diff --git a/doc/source/configuration/index.rst b/doc/source/configuration/index.rst index 1330457797..c84f735cbf 100644 --- a/doc/source/configuration/index.rst +++ b/doc/source/configuration/index.rst @@ -9,3 +9,4 @@ the various features provided. :maxdepth: 1 release-train + monitoring diff --git a/doc/source/configuration/monitoring.rst b/doc/source/configuration/monitoring.rst new file mode 100644 index 0000000000..7e53629f19 --- /dev/null +++ b/doc/source/configuration/monitoring.rst @@ -0,0 +1,13 @@ +======================== +Monitoring Configuration +======================== + +StackHPC kayobe config includes a reference monitoring stack based on +Prometheus. Whilst this often works out of the box, there are some tunables +which can be customised to adapt the configuration to a particular deployment. + +The configuration options can be found in +``etc/kayobe/stackhpc-monitoring.yml``: + +.. literalinclude:: ../../../etc/kayobe/stackhpc-monitoring.yml + :language: yaml \ No newline at end of file