Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
172 changes: 79 additions & 93 deletions base-helm-configs/grafana/grafana-helm-overrides.yaml
Original file line number Diff line number Diff line change
@@ -1,24 +1,16 @@
# Set the custom_host variable to the desired hostname for Grafana
# This is used to set the domain and root_url in the grafana.ini file
# and the value of the custom_host variable must be a valid domain.
---
custom_host: grafana.example.com

persistence:
type: pvc
enabled: true
storageClassName: general
accessModes:
- ReadWriteMany

nodeSelector:
openstack-control-plane: enabled

ingress:
enabled: false

image:
tag: "10.3.3"

tag: 10.3.3
grafana.ini:
paths:
data: /var/lib/grafana/
Expand All @@ -32,99 +24,95 @@ grafana.ini:
grafana_net:
url: https://grafana.net
server:
domain: "{{ .Values.custom_host }}" # Ref: custom_host variable above
root_url: "https://{{ .Values.custom_host }}" # Ref: custom_host variable above
domain: "{{ .Values.custom_host }}"
root_url: https://{{ .Values.custom_host }}
database:
type: mysql
host: mariadb-cluster.grafana.svc:3306
user: $__file{/etc/secrets/grafana-db/username}
password: $__file{/etc/secrets/grafana-db/password}
password: $__file{/etc/secrets/grafana-db/password}
name: grafana

datasources:
datasources.yaml:
apiversion: 1
datasources:
- name: Prometheus
type: prometheus
access: proxy
url: http://kube-prometheus-stack-prometheus.prometheus.svc.cluster.local:9090
isdefault: true
- name: Loki
type: loki
access: proxy
url: http://loki-gateway.{{ $.Release.Namespace }}.svc.cluster.local:80
editable: false

- name: Prometheus
type: prometheus
access: proxy
url: http://kube-prometheus-stack-prometheus.prometheus.svc.cluster.local:9090
isdefault: true
- name: Loki
type: loki
access: proxy
url: http://loki-gateway.{{ $.Release.Namespace }}.svc.cluster.local:80
editable: false
alerting:
rules.yaml:
groups:
- orgId: 1
name: loki 1 min eval
folder: rules
interval: 1m
rules:
- uid: ba943125-33ca-4e4e-85f8-13359a8e4d65
title: OVN claim storm
condition: B
data:
- refId: A
- orgId: 1
name: loki 1 min eval
folder: rules
interval: 1m
rules:
- uid: ba943125-33ca-4e4e-85f8-13359a8e4d65
title: OVN claim storm
condition: B
data:
- refId: A
queryType: instant
relativeTimeRange:
from: 60
to: 0
datasourceUid: P8E80F9AEF21F6940
model:
editorMode: builder
expr: rate({app="ovs"} |= `binding|INFO|cr-lrp` [1m])
intervalMs: 60000
maxDataPoints: 43200
queryType: instant
relativeTimeRange:
from: 60
to: 0
datasourceUid: P8E80F9AEF21F6940
model:
editorMode: builder
expr: rate({app="ovs"} |= `binding|INFO|cr-lrp` [1m])
intervalMs: 60000
maxDataPoints: 43200
queryType: instant
refId: A
- refId: B
relativeTimeRange:
from: 60
to: 0
datasourceUid: __expr__
model:
conditions:
- evaluator:
params:
- 1
- 0
type: gt
operator:
type: and
query:
params: []
reducer:
params: []
type: avg
type: query
datasource:
name: Expression
type: __expr__
uid: __expr__
expression: A
intervalMs: 1000
maxDataPoints: 43200
refId: B
type: threshold
noDataState: OK
execErrState: Error
for: 0s
notifications:
- uid: prom-alertmanager-notification
annotations:
description: >-
Checks app=ovs (ovs-ovn) pod logs for lines with string
'binding|INFO|cr-lrp'
summary: >-
This alerts on rapid port claims for cr-lrp ports on OVN
gateway nodes, which overloads the OVN south database and
interferes with the function of the affected ports.
labels: {}
isPaused: false
refId: A
- refId: B
relativeTimeRange:
from: 60
to: 0
datasourceUid: __expr__
model:
conditions:
- evaluator:
params:
- 1
- 0
type: gt
operator:
type: and
query:
params: []
reducer:
params: []
type: avg
type: query
datasource:
name: Expression
type: __expr__
uid: __expr__
expression: A
intervalMs: 1000
maxDataPoints: 43200
refId: B
type: threshold
noDataState: OK
execErrState: Error
for: 0s
notifications:
- uid: prom-alertmanager-notification
annotations:
description: Checks app=ovs (ovs-ovn) pod logs for lines with string
'binding|INFO|cr-lrp'
summary: This alerts on rapid port claims for cr-lrp ports on OVN gateway nodes,
which overloads the OVN south database and interferes with the
function of the affected ports.
labels: {}
isPaused: false
contactpoints.yaml:
secret:
apiVersion: 1
Expand All @@ -146,13 +134,11 @@ alerting:
group_wait: 1s
group_interval: 1s
repeat_interval: 1s

plugins:
- camptocamp-prometheus-alertmanager-datasource

extraSecretMounts:
- name: grafana-db-secret-mount
secretName: grafana-db
defaultMode: 0440
defaultMode: 440
mountPath: /etc/secrets/grafana-db
readOnly: true
31 changes: 15 additions & 16 deletions base-helm-configs/prometheus/prometheus-helm-overrides.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
---
# Default values for kube-prometheus-stack.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
Expand Down Expand Up @@ -331,13 +332,13 @@ alertmanager:
repeat_interval: 12h
receiver: 'null'
routes:
- receiver: 'null'
matchers:
- alertname = "Watchdog"
- receiver: 'null'
matchers:
- alertname = "Watchdog"
receivers:
- name: 'null'
- name: 'null'
templates:
- '/etc/alertmanager/config/*.tmpl'
- '/etc/alertmanager/config/*.tmpl'

## Alertmanager configuration directives (as string type, preferred over the config hash map)
## stringConfig will be used only, if tplConfig is true
Expand Down Expand Up @@ -743,13 +744,12 @@ alertmanager:
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/user-guides/storage.md
##
storage:
volumeClaimTemplate:
spec:
storageClassName: general
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 15Gi
volumeClaimTemplate:
spec:
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 15Gi

## The external URL the Alertmanager instances will be available under. This is necessary to generate correct URLs. This is necessary if Alertmanager is not served from root of a DNS name. string false
##
Expand Down Expand Up @@ -2380,7 +2380,7 @@ prometheusOperator:
readOnlyRootFilesystem: true
capabilities:
drop:
- ALL
- ALL

# Security context for patch job container
patchWebhookJob:
Expand All @@ -2389,7 +2389,7 @@ prometheusOperator:
readOnlyRootFilesystem: true
capabilities:
drop:
- ALL
- ALL

# Use certmanager to generate webhook certs
certManager:
Expand Down Expand Up @@ -2651,7 +2651,7 @@ prometheusOperator:
readOnlyRootFilesystem: true
capabilities:
drop:
- ALL
- ALL

# Enable vertical pod autoscaler support for prometheus-operator
verticalPodAutoscaler:
Expand Down Expand Up @@ -3590,7 +3590,6 @@ prometheus:
storageSpec:
volumeClaimTemplate:
spec:
storageClassName: general
accessModes: ["ReadWriteOnce"]
resources:
requests:
Expand Down