Skip to content

Commit

Permalink
Added support for custom log alerts
Browse files Browse the repository at this point in the history
  • Loading branch information
bastianeicher committed Jul 10, 2024
1 parent d35cd79 commit 9056e40
Show file tree
Hide file tree
Showing 5 changed files with 86 additions and 1 deletion.
8 changes: 8 additions & 0 deletions charts/generic-service/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,14 @@ app:
| `alerting.logs.levels.lokiConfigMap.label` | `loki_rule` | The label attached to the ConfigMap holding the Loki Rules |
| `alerting.logs.levels.lokiConfigMap.labelValue` | `""` | The value of the label attached to the Loki Rule ConfigMap |
| `alerting.logs.createLokiRule` | `false` | Whether to create a `LokiRule` custom resource of a ConfigMap for log alerting |
| `alerting.logs.custom.*.regex` | | Regular expression to use for matching log lines |
| `alerting.logs.custom.*.labelMatchers` | | Loki label matchers to use for matching log lines (e.g., `some_key="some_value"`) |
| `alerting.logs.custom.*.countInterval` | | The time interval in which to count the number of matching log lines |
| `alerting.logs.custom.*.predicate` | __required if used__ | An expression that triggers the alert when the number of matching log lines fulfills it |
| `alerting.logs.custom.*.severity` | `warning` | The severity of the alert |
| `alerting.logs.custom.*.topic` | `logs` | The `topic` label for the alert |
| `alerting.logs.custom.*.summary` | __required if used__ | A short summary of the alert |
| `alerting.logs.custom.*.description` | __required if used__ | A longer description of the alert; can include log labels via templating |
| `sidecars` | `[]` | Additional sidecar containers to be added to the `Pod` |
| `sidecarTemplates` | `[]` | Strings to be templated providing additional sidecar containers to be added to the Pod |
| `initContainers` | `[]` | Additional init containers to be added to the `Pod` |
Expand Down
13 changes: 13 additions & 0 deletions charts/generic-service/ci/custom-alerts-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,16 @@ alerting:
predicate: '> 100'
summary: some metric too high
description: Some metric is {{ $value }}, which is too high.
logs:
custom:
ForbiddenLogLine:
regex: evil
predicate: '> 0'
summary: Forbidden log line seen
description: A forbidden log line was seen.
RequiredLogLine:
regex: good
predicate: '== 0'
countInterval: 24h
summary: Required log line missing
description: A required log line was not seen within the last 24h.
20 changes: 19 additions & 1 deletion charts/generic-service/templates/alerts-logs.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{{- if and (and .Values.alerting.enabled .Values.alerting.logs.levels) (or .Values.alerting.logs.createLokiRule .Values.alerting.logs.lokiConfigMap) }}
{{- if and (and .Values.alerting.enabled (or .Values.alerting.logs.levels .Values.alerting.logs.custom)) (or .Values.alerting.logs.createLokiRule .Values.alerting.logs.lokiConfigMap) }}

{{- if .Values.alerting.logs.createLokiRule }}
apiVersion: logging.opsgy.com/v1beta1
Expand Down Expand Up @@ -31,4 +31,22 @@ data:
annotations: {{- include "generic-service.alert-annotations" $ | nindent 14 }} {{ $level }} logs
description: '{{ $.Release.Name }} reported {{"{{ $value }}"}} {{ $level }} log messages in the last {{ $.Values.alerting.logs.countInterval }}.'
{{- end }}

{{- range $name, $props := .Values.alerting.logs.custom }}
- alert: {{ $name | quote }}
expr: |
sum(
count_over_time(
({
namespace="{{ $.Release.Namespace }}", container="{{ include "generic-service.fullname" $ }}"{{ if $props.labelMatchers }}, {{ tpl $props.labelMatchers $ }}{{ end }}
} |~ {{ $props.regex | quote }})
[{{ $props.countInterval | default $.Values.alerting.logs.countInterval }}])
)
{{ $props.predicate | required "alert predicate required" }}
for: 1s
labels: {{- include "generic-service.alert-labels" $ | nindent 14 }} {{ $props.severity | default "warning" }}
topic: {{ $props.topic | default "logs" }}
annotations: {{- include "generic-service.alert-annotations" $ | nindent 14 }} {{ $props.summary | required "alert summary required" }}
description: {{ $props.description | required "alert description required" | quote }}
{{- end }}
{{- end }}
45 changes: 45 additions & 0 deletions charts/generic-service/values.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -1051,6 +1051,51 @@
"type": "boolean",
"default": false,
"description": "Whether to create a LokiRule custom resource instead of a ConfigMap for log alerting"
},
"custom": {
"type": "object",
"additionalProperties": {
"type": "object",
"properties": {
"regex": {
"type": "string",
"description": "Regular expression to use for matching log lines"
},
"labelMatchers": {
"type": "string",
"description": "Loki label matchers to use for matching log lines (e.g., some_key=\"some_value\")"
},
"countInterval": {
"type": "string",
"description": "The time interval in which to count the number of matching log lines"
},
"predicate": {
"type": "string",
"description": "An expression that triggers the alert when the number of matching log lines fulfills it"
},
"severity": {
"type": "string",
"default": "warning",
"description": "The severity of the alert"
},
"topic": {
"type": "string",
"default": "logs",
"description": "The topic label for the alert"
},
"summary": {
"type": "string",
"description": "A short summary of the alert"
},
"description": {
"type": "string",
"description": "A longer description of the alert; can include log labels via templating"
}
},
"required": ["predicate", "summary", "description"],
"additionalProperties": false
},
"description": "Additional alerts based on Prometheus metrics exposed by the service"
}
},
"additionalProperties": false
Expand Down
1 change: 1 addition & 0 deletions charts/generic-service/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ alerting:
lokiConfigMap:
label: "loki_rule"
labelValue: ""
custom: {}

initContainers: []
initContainerTemplates: []
Expand Down

0 comments on commit 9056e40

Please sign in to comment.