diff --git a/etc/kayobe/kolla/config/prometheus/system.rules b/etc/kayobe/kolla/config/prometheus/system.rules index ffc7d25a3..6adf515a6 100644 --- a/etc/kayobe/kolla/config/prometheus/system.rules +++ b/etc/kayobe/kolla/config/prometheus/system.rules @@ -78,4 +78,22 @@ groups: summary: "Host EDAC Uncorrectable Errors detected (instance {{ $labels.instance }})" description: "{{ $labels.instance }} has had {{ printf \"%.0f\" $value }} uncorrectable memory errors reported by EDAC in the last 5 minutes." + - alert: HostClockSkew + expr: (node_timex_offset_seconds > 0.05 and deriv(node_timex_offset_seconds[5m]) >= 0) or (node_timex_offset_seconds < -0.05 and deriv(node_timex_offset_seconds[5m]) <= 0) + for: 2m + labels: + severity: warning + annotations: + summary: Host clock skew (instance {{ $labels.instance }}) + description: "Clock skew detected. Clock is out of sync. Ensure NTP is configured correctly on this host." + + - alert: HostClockNotSynchronising + expr: min_over_time(node_timex_sync_status[1m]) == 0 and node_timex_maxerror_seconds >= 16 + for: 2m + labels: + severity: warning + annotations: + summary: Host clock not synchronising (instance {{ $labels.instance }}) + description: "Clock not synchronising. Ensure NTP is configured on this host." + {% endraw %} diff --git a/releasenotes/notes/ntp-alerts-0d110b4979457165.yaml b/releasenotes/notes/ntp-alerts-0d110b4979457165.yaml new file mode 100644 index 000000000..62d932cc2 --- /dev/null +++ b/releasenotes/notes/ntp-alerts-0d110b4979457165.yaml @@ -0,0 +1,4 @@ +--- +features: + - | + Adds NTP alerts to prometheus alertmanager. \ No newline at end of file