Skip to content

Commit

Permalink
Merge pull request #127 from projectsyn/feat/configurable-custom-rules
Browse files Browse the repository at this point in the history
Add component parameter for configuring additional Prometheus rules
  • Loading branch information
simu committed Jun 30, 2023
2 parents 25c5db0 + ab736df commit 2c46612
Show file tree
Hide file tree
Showing 6 changed files with 95 additions and 38 deletions.
11 changes: 11 additions & 0 deletions class/defaults.yml
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,17 @@ parameters:
CephOSDDiskNotResponding:
for: 5m

# Supports configuring recording/alerting rules by using the
# "record:" and "alert:" prefixes.
additionalRules:
"alert:RookCephOperatorScaledDown":
expr: kube_deployment_spec_replicas{deployment="rook-ceph-operator", namespace="${rook_ceph:namespace}"} == 0
for: 1h
annotations:
summary: rook-ceph operator scaled to 0 for more than 1 hour.
labels:
severity: warning

node_selector:
node-role.kubernetes.io/storage: ''

Expand Down
95 changes: 64 additions & 31 deletions component/alertrules.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -55,16 +55,21 @@ local runbook(alertname) =

local on_openshift =
inv.parameters.facts.distribution == 'openshift4';

local alertpatching =
if on_openshift then
import 'lib/alert-patching.libsonnet'
else
local patchRule(rule) =
local patchRule(rule, patches={}, patch_name=true) =
if !std.objectHas(rule, 'alert') then
rule
else
rule {
alert: 'SYN_%s' % super.alert,
alert:
if patch_name then
'SYN_%s' % super.alert
else
super.alert,
labels+: {
syn: 'true',
syn_component: inv.parameters._instance,
Expand All @@ -85,6 +90,37 @@ local alertpatching =
}
);

local prom =
if on_openshift then
import 'lib/prom.libsonnet'
else
std.trace(
'Prometheus object helper library not available on non-OCP4, additional rules may be configured incorrectly',
{
generateRules(name, rules): {
spec: {
groups: [
{
name: group_name,
rules: [
local keyparts = std.splitLimit(rulekey, ':', 1);
alertpatching.patchRule(
rules[group_name][rulekey] {
[keyparts[0]]: keyparts[1],
},
patches={},
patch_name=false,
)
for rulekey in std.objectFields(rules[group_name])
],
}
for group_name in std.objectFields(rules)
],
},
},
}
);

local alert_rules_raw = helpers.load_manifest('prometheus-ceph-rules');
assert std.length(alert_rules_raw) >= 1;
local alert_rules_manifests = std.filter(
Expand All @@ -102,40 +138,37 @@ local ignore_groups = std.set([
'ceph-node-alert.rules',
]);

local additional_rules = [
{
name: 'syn-rook-ceph-additional.alerts',
rules: [
alertpatching.patchRule(
{
alert: 'RookCephOperatorScaledDown',
expr: 'kube_deployment_spec_replicas{deployment="rook-ceph-operator", namespace="%s"} == 0' % params.namespace,
annotations: {
summary: 'rook-ceph operator scaled to 0 for more than 1 hour.',
description: 'TODO',
runbook_url: runbook('RookCephOperatorScaledDown'),
},
labels: {
severity: 'warning',
},
'for': '1h',
},
),
],
},
];

local add_runbook_url = {
rules: [
r {
annotations+: {
runbook_url: runbook(r.alert),
},
}
if std.objectHas(r, 'alert') then
r {
annotations+: {
[if !std.objectHas(r.annotations, 'runbook_url') then 'runbook_url']:
runbook(r.alert),
},
}
else
r
for r in super.rules
],
};

local additional_rules =
prom.generateRules(
'additional-rules',
// Adjust input to match expected format of `generateRules`
{
'syn-rook-ceph-additional.rules': params.alerts.additionalRules,
}
) {
spec+: {
groups: [
g + add_runbook_url
for g in super.groups
],
},
};

local alert_rules = [
local gs = std.filter(
function(it) !std.member(ignore_groups, it.name),
Expand All @@ -158,7 +191,7 @@ local alert_rules = [
if std.length(r.rules) > 0 then r
for g in gs
]
) + additional_rules,
) + additional_rules.spec.groups,
},
}
for rule_manifest in alert_rules_manifests
Expand Down
12 changes: 12 additions & 0 deletions docs/modules/ROOT/pages/references/parameters.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,18 @@ The component expects valid partial Prometheus alert rule objects as values.

IMPORTANT: The provided values aren't validated, they're applied to the corresponding upstream alert as-is.

=== `additionalRules`

[horizontal]
type:: dict
default:: https://github.com/projectsyn/component-rook-ceph/blob/master/class/defaults.yml[See `class/defaults.yml`]

This parameter allows users to configure additional alerting and recording rules.
All rules defined in this parameter will be added to rule group `syn-rook-ceph-additional.rules`.

For alerting rules, a runbook URL is injected if annotation `runbook_url` is not already set on the rule.
The injected runbook URL is derived from the alert name using pattern `pass:[https://hub.syn.tools/rook-ceph/runbooks/{alertname}.html]`.

== `node_selector`

[horizontal]
Expand Down
5 changes: 2 additions & 3 deletions tests/golden/defaults/rook-ceph/rook-ceph/40_alertrules.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -414,11 +414,10 @@ spec:
syn: 'true'
syn_component: rook-ceph
type: ceph_default
- name: syn-rook-ceph-additional.alerts
- name: syn-rook-ceph-additional.rules
rules:
- alert: SYN_RookCephOperatorScaledDown
- alert: RookCephOperatorScaledDown
annotations:
description: TODO
runbook_url: https://hub.syn.tools/rook-ceph/runbooks/RookCephOperatorScaledDown.html
summary: rook-ceph operator scaled to 0 for more than 1 hour.
expr: kube_deployment_spec_replicas{deployment="rook-ceph-operator", namespace="syn-rook-ceph-operator"}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -414,11 +414,10 @@ spec:
syn: 'true'
syn_component: rook-ceph
type: ceph_default
- name: syn-rook-ceph-additional.alerts
- name: syn-rook-ceph-additional.rules
rules:
- alert: SYN_RookCephOperatorScaledDown
- alert: RookCephOperatorScaledDown
annotations:
description: TODO
runbook_url: https://hub.syn.tools/rook-ceph/runbooks/RookCephOperatorScaledDown.html
summary: rook-ceph operator scaled to 0 for more than 1 hour.
expr: kube_deployment_spec_replicas{deployment="rook-ceph-operator", namespace="syn-rook-ceph-operator"}
Expand Down
5 changes: 4 additions & 1 deletion tests/openshift4.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,11 @@ parameters:
source: https://raw.githubusercontent.com/projectsyn/component-storageclass/v1.0.0/lib/storageclass.libsonnet
output_path: vendor/lib/storageclass.libsonnet
- type: https
source: https://raw.githubusercontent.com/appuio/component-openshift4-monitoring/v3.1.0/lib/openshift4-monitoring-alert-patching.libsonnet
source: https://raw.githubusercontent.com/appuio/component-openshift4-monitoring/v3.5.0/lib/openshift4-monitoring-alert-patching.libsonnet
output_path: vendor/lib/alert-patching.libsonnet
- type: https
source: https://raw.githubusercontent.com/appuio/component-openshift4-monitoring/v3.5.0/lib/openshift4-monitoring-prom.libsonnet
output_path: vendor/lib/prom.libsonnet

storageclass:
defaults: {}
Expand Down

0 comments on commit 2c46612

Please sign in to comment.