Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
202 changes: 126 additions & 76 deletions base-helm-configs/grafana/grafana-helm-overrides.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,93 +38,143 @@ grafana.ini:
type: mysql
host: mariadb-cluster.grafana.svc:3306
user: $__file{/etc/secrets/grafana-db/username}
password: $__file{/etc/secrets/grafana-db/password}
password: $__file{/etc/secrets/grafana-db/password}
name: grafana

datasources:
datasources.yaml:
apiversion: 1
datasources:
- name: Prometheus
type: prometheus
access: proxy
url: http://kube-prometheus-stack-prometheus.prometheus.svc.cluster.local:9090
isdefault: true
- name: Loki
type: loki
access: proxy
url: http://loki-gateway.{{ $.Release.Namespace }}.svc.cluster.local:80
editable: false
- name: Prometheus
type: prometheus
access: proxy
url: http://kube-prometheus-stack-prometheus.prometheus.svc.cluster.local:9090
isdefault: true
- name: Loki
type: loki
access: proxy
url: http://loki-gateway.{{ $.Release.Namespace }}.svc.cluster.local:80
editable: false

alerting:
rules.yaml:
groups:
- orgId: 1
name: loki 1 min eval
folder: rules
interval: 1m
rules:
- uid: ba943125-33ca-4e4e-85f8-13359a8e4d65
title: OVN claim storm
condition: B
data:
- refId: A
- orgId: 1
name: loki 1 min eval
folder: rules
interval: 1m
rules:
- uid: ba943125-33ca-4e4e-85f8-13359a8e4d65
title: OVN claim storm
condition: B
data:
- refId: A
queryType: instant
relativeTimeRange:
from: 60
to: 0
datasourceUid: P8E80F9AEF21F6940
model:
editorMode: builder
expr: rate({app="ovs"} |= `binding|INFO|cr-lrp` [1m])
intervalMs: 60000
maxDataPoints: 43200
queryType: instant
refId: A
- refId: B
relativeTimeRange:
from: 60
to: 0
datasourceUid: __expr__
model:
conditions:
- evaluator:
params:
- 1
- 0
type: gt
operator:
type: and
query:
params: []
reducer:
params: []
type: avg
type: query
datasource:
name: Expression
type: __expr__
uid: __expr__
expression: A
intervalMs: 1000
maxDataPoints: 43200
refId: B
type: threshold
noDataState: OK
execErrState: Error
for: 0s
notifications:
- uid: prom-alertmanager-notification
annotations:
description: >-
Checks app=ovs (ovs-ovn) pod logs for lines with string
'binding|INFO|cr-lrp'
summary: >-
This alerts on rapid port claims for cr-lrp ports on OVN
gateway nodes, which overloads the OVN south database and
interferes with the function of the affected ports.
labels: {}
isPaused: false
# Generated UUID using 'uuidgen'
- uid: c14dd8fd-54ec-4e15-9813-e02cc3269899
title: Neutron IPAM Duplicate Entry Error
condition: C
data:
- refId: A
queryType: instant
relativeTimeRange:
from: 60
to: 0
# Using same loki datasource as rule#ba943125-33ca-4e4e-85f8-13359a8e4d65
datasourceUid: P8E80F9AEF21F6940
model:
expr: rate({app="fluentbit"} |= `Duplicate entry|ERROR` [1m])
queryType: instant
relativeTimeRange:
from: 60
to: 0
datasourceUid: P8E80F9AEF21F6940
model:
editorMode: builder
expr: rate({app="ovs"} |= `binding|INFO|cr-lrp` [1m])
intervalMs: 60000
maxDataPoints: 43200
queryType: instant
refId: A
- refId: B
relativeTimeRange:
from: 60
to: 0
datasourceUid: __expr__
model:
conditions:
- evaluator:
params:
- 1
- 0
type: gt
operator:
type: and
query:
params: []
reducer:
params: []
type: avg
type: query
datasource:
name: Expression
type: __expr__
uid: __expr__
expression: A
intervalMs: 1000
maxDataPoints: 43200
refId: B
type: threshold
noDataState: OK
execErrState: Error
for: 0s
notifications:
- uid: prom-alertmanager-notification
annotations:
description: >-
Checks app=ovs (ovs-ovn) pod logs for lines with string
'binding|INFO|cr-lrp'
summary: >-
This alerts on rapid port claims for cr-lrp ports on OVN
gateway nodes, which overloads the OVN south database and
interferes with the function of the affected ports.
labels: {}
isPaused: false
refId: A
- refId: B
relativeTimeRange:
# Past 60 seconds (can be adjusted further)
from: 60
# 0 denotes till current time
to: 0
datasourceUid: __expr__
model:
conditions:
- evaluator:
params:
- 1
- 0
type: gt
operator:
type: and
reducer:
type: avg
type: query
datasource:
name: Expression
type: __expr__
uid: __expr__
expression: A
refId: B
type: threshold
noDataState: OK
execErrState: Error
notifications:
- uid: prom-alertmanager-notification
annotations:
summary: >
Checks for log lines containing 'Duplicate entry|ERROR' in nova logs.
isPaused: false
contactpoints.yaml:
secret:
apiVersion: 1
Expand Down