Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Separate dashboards rules and alerts provisioning #62

Merged
merged 1 commit into from
Mar 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -210,15 +210,15 @@ manifests-common: $(KUSTOMIZE)
manifests-monolithic-mode: $(KUSTOMIZE)
$(info ******************** generates monolithic-mode manifests ********************)
@$(KUSTOMIZE) build --enable-helm kubernetes/monolithic-mode/logs > kubernetes/monolithic-mode/logs/k8s-all-in-one.yaml
@$(KUSTOMIZE) build kubernetes/monolithic-mode/metrics > kubernetes/monolithic-mode/metrics/k8s-all-in-one.yaml
@$(KUSTOMIZE) build --enable-helm kubernetes/monolithic-mode/metrics > kubernetes/monolithic-mode/metrics/k8s-all-in-one.yaml
@$(KUSTOMIZE) build --enable-helm kubernetes/monolithic-mode/profiles > kubernetes/monolithic-mode/profiles/k8s-all-in-one.yaml
@$(KUSTOMIZE) build --enable-helm kubernetes/monolithic-mode/traces > kubernetes/monolithic-mode/traces/k8s-all-in-one.yaml
@$(KUSTOMIZE) build --enable-helm kubernetes/monolithic-mode/all-in-one > kubernetes/monolithic-mode/all-in-one/k8s-all-in-one.yaml

manifests-read-write-mode: $(KUSTOMIZE)
$(info ******************** generates read-write-mode manifests ********************)
@$(KUSTOMIZE) build --enable-helm kubernetes/read-write-mode/logs > kubernetes/read-write-mode/logs/k8s-all-in-one.yaml
@$(KUSTOMIZE) build kubernetes/read-write-mode/metrics > kubernetes/read-write-mode/metrics/k8s-all-in-one.yaml
@$(KUSTOMIZE) build --enable-helm kubernetes/read-write-mode/metrics > kubernetes/read-write-mode/metrics/k8s-all-in-one.yaml

manifests-microservices-mode: $(KUSTOMIZE)
$(info ******************** generates microservices-mode manifests ********************)
Expand Down Expand Up @@ -248,6 +248,9 @@ deploy-minio:
@$(KUSTOMIZE) build --enable-helm kubernetes/common/minio-operator | kubectl apply -f -
kubectl rollout status -n minio-system deployment/minio-operator --watch --timeout=600s
@$(KUSTOMIZE) build --enable-helm kubernetes/common/minio-tenant | kubectl apply -f -
@echo "Waiting for Minio to be ready..."
@sleep 20
kubectl rollout status -n minio-system statefulset/codelab-pool-10gb --watch --timeout=600s || true
delete-minio:
@$(KUSTOMIZE) build --enable-helm kubernetes/common/minio-tenant | kubectl delete --ignore-not-found -f -

Expand All @@ -261,9 +264,7 @@ deploy-gateway:
deploy-grafana: deploy-prometheus-operator-crds deploy-minio deploy-gateway
$(info ******************** deploy grafana manifests ********************)
@$(KUSTOMIZE) build --enable-helm kubernetes/common/grafana | kubectl apply -f -
@$(KUSTOMIZE) build --enable-helm kubernetes/common/grafana-agent | kubectl apply -f -
delete-grafana:
@$(KUSTOMIZE) build --enable-helm kubernetes/common/grafana-agent | kubectl delete --ignore-not-found -f -
@$(KUSTOMIZE) build --enable-helm kubernetes/common/grafana | kubectl delete --ignore-not-found -f -

define echo_info
Expand All @@ -277,7 +278,8 @@ define config_changes_trigger_pod_restart
$(eval $@_MSG = $(1))
@kubectl rollout restart deployment -n gateway nginx
kubectl rollout status -n gateway deployment/nginx --watch --timeout=600s
@kubectl rollout restart daemonset -n monitoring-system grafana-agent
@echo "Provisioning Grafana dashboards Prometheus rules and alerts..."
@$(KUSTOMIZE) build monitoring-mixins | kubectl apply -f -
kubectl rollout status -n monitoring-system daemonset/grafana-agent --watch --timeout=600s
@$(call echo_info, ${$@_MSG})
endef
Expand Down
2 changes: 0 additions & 2 deletions kubernetes/common/grafana-agent/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@ helmCharts:
configMapGenerator:
- name: agent-config
namespace: monitoring-system
options:
disableNameSuffixHash: true
files:
- configs/config.river

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ data:
\"info\")\n\tformat = \"logfmt\"\n}\n"
kind: ConfigMap
metadata:
name: agent-config
name: agent-config-6d74m77mfd
namespace: monitoring-system
---
apiVersion: v1
Expand Down Expand Up @@ -836,7 +836,7 @@ spec:
serviceAccountName: grafana-agent
volumes:
- configMap:
name: agent-config
name: agent-config-6d74m77mfd
name: config
- hostPath:
path: /var/log
Expand Down
74 changes: 66 additions & 8 deletions kubernetes/common/grafana/configs/datasources.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,41 +16,99 @@ datasources:
type: prometheus
uid: metrics
access: proxy
url: http://nginx.gateway.svc.cluster.local:8080/prometheus
orgId: 1
url: http://nginx.gateway.svc.cluster.local.:8080/prometheus
basicAuth: false
isDefault: false
isDefault: true
version: 1
editable: true
editable: false
jsonData:
prometheusType: Mimir
exemplarTraceIdDestinations:
- name: traceID
datasourceUid: traces


# Loki for logs
- name: Logs
type: loki
uid: logs
access: proxy
url: http://nginx.gateway.svc.cluster.local:3100
orgId: 1
uid: logs
url: http://nginx.gateway.svc.cluster.local.:3100
basicAuth: false
isDefault: false
version: 1
editable: true
jsonData:
derivedFields:
- datasourceUid: traces
matcherRegex: "[tT]race_?[iI][dD]\"?[:=]\"?(\\w+)"
name: traceID
url: $${__value.raw}

# https://grafana.com/docs/grafana/latest/datasources/tempo/configure-tempo-data-source/#provision-the-data-source
# Tempo for traces
- name: Traces
type: tempo
access: proxy
orgId: 1
uid: traces
url: http://nginx.gateway.svc.cluster.local:3200
url: http://nginx.gateway.svc.cluster.local.:3200
basicAuth: false
isDefault: false
version: 1
editable: true
apiVersion: 1
jsonData:
search:
hide: false
lokiSearch:
datasourceUid: logs
nodeGraph:
enabled: true
serviceMap:
datasourceUid: metrics
traceQuery:
timeShiftEnabled: true
spanStartTimeShift: '-30m'
spanEndTimeShift: '30m'
spanBar:
type: 'Tag'
tag: 'http.path'
tracesToMetrics:
datasourceUid: metrics
spanStartTimeShift: '-30m'
spanEndTimeShift: '30m'
tags: [{ key: 'service.name', value: 'service' }]
queries:
- name: '(R) Rate'
query: 'sum(rate(traces_spanmetrics_calls_total{$$__tags}[$$__rate_interval]))'
- name: '(E) Error Rate'
query: 'sum(rate(traces_spanmetrics_calls_total{$$__tags, status_code="STATUS_CODE_ERROR"}[$$__rate_interval]))'
- name: '(D) Duration'
query: 'histogram_quantile(0.9, sum(rate(traces_spanmetrics_latency_bucket{$$__tags}[$$__rate_interval])) by (le))'
tracesToLogsV2:
datasourceUid: logs
spanStartTimeShift: '-30m'
spanEndTimeShift: '30m'
tags: [{ key: 'app', value: 'app' }]
filterByTraceID: false
filterBySpanID: false
tracesToProfiles:
customQuery: false
datasourceUid: "profiles"
profileTypeId: "process_cpu:cpu:nanoseconds:cpu:nanoseconds"
tags: [{ key: 'app', value: 'service_name' }]


# Pyroscope for profiles
- name: Profiles
type: grafana-pyroscope-datasource
uid: profiles
access: proxy
url: http://nginx.gateway.svc.cluster.local:4040
orgId: 1
uid: profiles
url: http://nginx.gateway.svc.cluster.local.:4040
basicAuth: false
isDefault: false
version: 1
Expand Down
76 changes: 67 additions & 9 deletions kubernetes/common/grafana/manifests/k8s-all-in-one.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -182,41 +182,99 @@ data:
type: prometheus
uid: metrics
access: proxy
url: http://nginx.gateway.svc.cluster.local:8080/prometheus
orgId: 1
url: http://nginx.gateway.svc.cluster.local.:8080/prometheus
basicAuth: false
isDefault: false
isDefault: true
version: 1
editable: true
editable: false
jsonData:
prometheusType: Mimir
exemplarTraceIdDestinations:
- name: traceID
datasourceUid: traces


# Loki for logs
- name: Logs
type: loki
uid: logs
access: proxy
url: http://nginx.gateway.svc.cluster.local:3100
orgId: 1
uid: logs
url: http://nginx.gateway.svc.cluster.local.:3100
basicAuth: false
isDefault: false
version: 1
editable: true
jsonData:
derivedFields:
- datasourceUid: traces
matcherRegex: "[tT]race_?[iI][dD]\"?[:=]\"?(\\w+)"
name: traceID
url: $${__value.raw}

# https://grafana.com/docs/grafana/latest/datasources/tempo/configure-tempo-data-source/#provision-the-data-source
# Tempo for traces
- name: Traces
type: tempo
access: proxy
orgId: 1
uid: traces
url: http://nginx.gateway.svc.cluster.local:3200
url: http://nginx.gateway.svc.cluster.local.:3200
basicAuth: false
isDefault: false
version: 1
editable: true
apiVersion: 1
jsonData:
search:
hide: false
lokiSearch:
datasourceUid: logs
nodeGraph:
enabled: true
serviceMap:
datasourceUid: metrics
traceQuery:
timeShiftEnabled: true
spanStartTimeShift: '-30m'
spanEndTimeShift: '30m'
spanBar:
type: 'Tag'
tag: 'http.path'
tracesToMetrics:
datasourceUid: metrics
spanStartTimeShift: '-30m'
spanEndTimeShift: '30m'
tags: [{ key: 'service.name', value: 'service' }]
queries:
- name: '(R) Rate'
query: 'sum(rate(traces_spanmetrics_calls_total{$$__tags}[$$__rate_interval]))'
- name: '(E) Error Rate'
query: 'sum(rate(traces_spanmetrics_calls_total{$$__tags, status_code="STATUS_CODE_ERROR"}[$$__rate_interval]))'
- name: '(D) Duration'
query: 'histogram_quantile(0.9, sum(rate(traces_spanmetrics_latency_bucket{$$__tags}[$$__rate_interval])) by (le))'
tracesToLogsV2:
datasourceUid: logs
spanStartTimeShift: '-30m'
spanEndTimeShift: '30m'
tags: [{ key: 'app', value: 'app' }]
filterByTraceID: false
filterBySpanID: false
tracesToProfiles:
customQuery: false
datasourceUid: "profiles"
profileTypeId: "process_cpu:cpu:nanoseconds:cpu:nanoseconds"
tags: [{ key: 'app', value: 'service_name' }]


# Pyroscope for profiles
- name: Profiles
type: grafana-pyroscope-datasource
uid: profiles
access: proxy
url: http://nginx.gateway.svc.cluster.local:4040
orgId: 1
uid: profiles
url: http://nginx.gateway.svc.cluster.local.:4040
basicAuth: false
isDefault: false
version: 1
Expand All @@ -226,7 +284,7 @@ kind: ConfigMap
metadata:
labels:
grafana_datasource: "1"
name: grafana-datasources-gcc7kf6bh4
name: grafana-datasources-22t6t9c8f8
namespace: monitoring-system
---
apiVersion: v1
Expand Down
1 change: 0 additions & 1 deletion kubernetes/common/memcached/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ helmCharts:

resources:
- namespace.yaml
- ../../../monitoring-mixins/memcached-mixin/deploy

# # only if auth.enabled=true
# secretGenerator:
Expand Down
Loading