diff --git a/chart/docs/upgrades.md b/chart/docs/upgrades.md index e91d0740..051915a9 100644 --- a/chart/docs/upgrades.md +++ b/chart/docs/upgrades.md @@ -8,6 +8,20 @@ Firstly upgrade the helm repo to pull the latest available tobs helm chart. We a helm repo update ``` +## Upgrading to 0.11.0 + +Starting with tobs `0.11.0` we are tackling mostly reliability improvements. One of such improvements is switching grafana database back to dedicated sqlite3 instead of sharing TimescaleDB between grafana and promscale. Sadly this change requires manual intervention from end-users. If you wish to temporarily still use TimescaleDB as a grafana backend, you need to change following value: + +```yaml +kube-prometheus-stack: + grafana: + timescale: + database: + enabled: true +``` + +Bear in mind that next tobs release will not support TimescaleDB as a grafana backend and you will need to migrate either to sqlite3 or to separate grafana instance. + ## Upgrading to 0.10.0 With tobs `0.10.0` release we are starting a process of redesigning tobs. Most notable changes that may require user interaction are listed below. diff --git a/chart/templates/NOTES.txt b/chart/templates/NOTES.txt index c15c058c..1ed726f2 100644 --- a/chart/templates/NOTES.txt +++ b/chart/templates/NOTES.txt @@ -190,17 +190,8 @@ You can access grafana locally by executing: Then you can point your browser to http://127.0.0.1:8080/. -{{- $grafanaPersistence := (or $grafana.persistence.enabled $grafana.timescale.database.enabled) }} -{{- if not $grafanaPersistence }} -WARNING! Persistence is disabled!!! You can lose your data when the Grafana pod is terminated. -{{- end }} -{{- if (and $grafanaPersistence .Release.IsUpgrade) }} -Persistence is enabled, and you did an upgrade. If you don't have the password +Grafana persistence is enabled, and you did an upgrade. If you don't have the password for '{{ $grafana.adminUser }}', it can not be retrieved again, you need to reset it (see next paragraph). -{{- else }} -Get your '{{ $grafana.adminUser }}' user password by running: - kubectl get secret --namespace {{ .Release.Namespace }} {{ .Release.Name }}-grafana -o jsonpath="{.data.admin-password}" | base64 --decode ; echo -{{- end }} To reset the admin user password you can use grafana-cli from inside the pod by executing: GRAFANA_POD="$(kubectl get pod -o name --namespace {{ .Release.Namespace }} -l app.kubernetes.io/name=grafana)" diff --git a/chart/templates/grafana-datasources-sec.yaml b/chart/templates/grafana-datasources-sec.yaml index be3c90ab..6c4be731 100644 --- a/chart/templates/grafana-datasources-sec.yaml +++ b/chart/templates/grafana-datasources-sec.yaml @@ -6,6 +6,7 @@ {{- $anyDataSources := or $tsdbDatasource.enabled $promEnabled -}} {{ if $anyDataSources -}} apiVersion: v1 +{{ if $grafana.enabled -}} kind: Secret metadata: name: {{ .Release.Name }}-grafana-datasources @@ -47,8 +48,8 @@ stringData: {{ if $isDBURI -}} {{- $hostURL = include "tobs.dburi.host" . -}} {{ else -}} - {{- $host := tpl $grafana.timescale.database.host $ -}} - {{- $port := $grafana.timescale.database.port | int -}} + {{- $host := tpl $grafana.timescale.datasource.host $ -}} + {{- $port := $grafana.timescale.datasource.port | int -}} {{- $hostURL = printf "%s:%d" $host $port -}} {{ end }} - name: Promscale-SQL @@ -69,4 +70,5 @@ stringData: timescaledb: true {{- end -}} {{- end -}} +{{- end -}} {{- end -}} \ No newline at end of file diff --git a/chart/templates/grafana-db-sec.yaml b/chart/templates/grafana-db-sec.yaml index a1ca0677..7e97b7cf 100644 --- a/chart/templates/grafana-db-sec.yaml +++ b/chart/templates/grafana-db-sec.yaml @@ -1,6 +1,5 @@ {{- $grafana := index .Values "kube-prometheus-stack" "grafana" -}} -{{- $isDBURI := (ne .Values.promscale.connection.uri "")}} -{{ if and $grafana.enabled $grafana.timescale.database.enabled -}} +{{ if $grafana.enabled -}} apiVersion: v1 kind: Secret metadata: @@ -10,15 +9,19 @@ metadata: app: {{ template "tobs.fullname" . }} chart: {{ template "tobs.chart" . }} release: {{ .Release.Name }} + annotations: + "helm.sh/resource-policy": keep type: Opaque +{{- if (($grafana.timescale).database).enabled }} data: -{{ if $isDBURI}} + {{- $isDBURI := (ne .Values.promscale.connection.uri "")}} + {{- if $isDBURI}} GF_DATABASE_HOST: {{ include "tobs.dburi.host" . | b64enc }} GF_DATABASE_NAME: {{ include "tobs.dburi.dbname" . | b64enc }} GF_DATABASE_USER: {{ include "tobs.dburi.user" . | b64enc }} GF_DATABASE_PASSWORD: {{ include "tobs.dburi.password" . | b64enc }} GF_DATABASE_SSL_MODE: {{ include "tobs.dburi.sslmode" . | b64enc }} -{{ else }} + {{- else }} {{- $host := tpl $grafana.timescale.database.host $ -}} {{- $port := $grafana.timescale.database.port | int -}} {{- $fullhost := printf "%s:%d" $host $port }} @@ -27,6 +30,10 @@ data: GF_DATABASE_USER: {{ $grafana.timescale.database.user | b64enc }} GF_DATABASE_PASSWORD: {{ $grafana.timescale.database.pass | b64enc }} GF_DATABASE_SSL_MODE: {{ $grafana.timescale.database.sslMode | b64enc }} -{{ end }} + {{- end }} GF_DATABASE_TYPE: {{ "postgres" | b64enc }} -{{- end -}} \ No newline at end of file +{{- else }} +data: + GF_DATABASE_TYPE: {{ "sqlite3" | b64enc }} +{{- end }} +{{- end }} diff --git a/chart/templates/grafana-db-user-conf.yaml b/chart/templates/grafana-db-user-conf.yaml deleted file mode 100644 index 865f52c7..00000000 --- a/chart/templates/grafana-db-user-conf.yaml +++ /dev/null @@ -1,66 +0,0 @@ -{{- $grafana := index .Values "kube-prometheus-stack" "grafana" -}} -{{- $isDBURI := (ne .Values.promscale.connection.uri "")}} -{{ if $grafana.enabled -}} -{{- $timescaleAsDB := $grafana.timescale.database -}} -{{- $timescaleAsDS := $grafana.timescale.datasource -}} -{{ if or $timescaleAsDB.enabled $timescaleAsDS.enabled -}} -apiVersion: v1 -kind: ConfigMap -metadata: - name: {{ .Release.Name }}-grafana-db - namespace: {{ template "tobs.namespace" . }} - labels: - app: {{ template "tobs.fullname" . }} - chart: {{ template "tobs.chart" . }} - release: {{ .Release.Name }} -data: - {{- $user := ternary (include "tobs.dburi.user" . ) ( index $grafana.timescale.adminUser ) ($isDBURI) }} - add-users.sql: |- - \set ON_ERROR_STOP on - {{ if $timescaleAsDB.enabled -}} - DO $$ - BEGIN - CREATE ROLE {{ $timescaleAsDB.user }} WITH LOGIN PASSWORD '{{ $timescaleAsDB.pass }}'; - EXCEPTION WHEN duplicate_object THEN - RAISE NOTICE 'role {{ $timescaleAsDB.user }} already exists, skipping create'; - END - $$; - GRANT {{ $timescaleAsDB.user }} TO {{ $user }}; - CREATE SCHEMA IF NOT EXISTS {{ $timescaleAsDB.schema }} AUTHORIZATION {{ $timescaleAsDB.user }}; - ALTER ROLE {{ $timescaleAsDB.user }} SET search_path = {{ $timescaleAsDB.schema }}; - {{- end }} - {{ if $timescaleAsDS.enabled -}} - DO $$ - BEGIN - CREATE ROLE prom_reader; - EXCEPTION WHEN duplicate_object THEN - RAISE NOTICE 'role prom_reader already exists, skipping create'; - END - $$; - DO $$ - BEGIN - CREATE ROLE {{ $timescaleAsDS.user }} WITH LOGIN PASSWORD '{{ $timescaleAsDS.pass }}'; - EXCEPTION WHEN duplicate_object THEN - RAISE NOTICE 'role {{ $timescaleAsDS.user }} already exists, skipping create'; - END - $$; - GRANT prom_reader TO {{ $timescaleAsDS.user }}; - {{- end }} - wait-for-ts.sh: |- - echo Checking if ${PGHOST} is up - NUM_TIMES_FAILED=0; - until nslookup ${PGHOST}; - do - echo Num times failed: ${NUM_TIMES_FAILED} - NUM_TIMES_FAILED=$(( $NUM_TIMES_FAILED + 1 )); - if [ "${NUM_TIMES_FAILED}" -gt "5" ] - then - echo Failed 5 times while waiting for ${PGHOST}, exiting - exit 1; - fi - echo Waiting 5 seconds; - sleep 5; - echo Checking if ${PGHOST} is up - done -{{- end -}} -{{- end -}} \ No newline at end of file diff --git a/chart/templates/grafana-db-user-job.yaml b/chart/templates/grafana-db-user-job.yaml deleted file mode 100644 index 1c97fc71..00000000 --- a/chart/templates/grafana-db-user-job.yaml +++ /dev/null @@ -1,62 +0,0 @@ -{{- $grafana := index .Values "kube-prometheus-stack" "grafana" -}} -{{- $isDBURI := (ne .Values.promscale.connection.uri "")}} -{{- if and $grafana.enabled $grafana.timescale.database.enabled -}} -apiVersion: batch/v1 -kind: Job -metadata: - name: {{ .Release.Name }}-grafana-db - namespace: {{ template "tobs.namespace" . }} - labels: - app: {{ template "tobs.fullname" . }} - chart: {{ template "tobs.chart" . }} - release: {{ .Release.Name }} - heritage: {{ .Release.Service }} -spec: - template: - spec: - containers: - - name: {{ $.Chart.Name }}-grafana-db - image: postgres:12-alpine - volumeMounts: - - name: sql-volume - mountPath: /add-users.sql - subPath: add-users.sql - env: - - name: PGPORT - value: {{ ternary (include "tobs.dburi.port" . ) ($grafana.timescale.database.port | quote ) ($isDBURI) }} - - name: PGUSER - value: {{ ternary (include "tobs.dburi.user" . ) ($grafana.timescale.adminUser ) ($isDBURI) }} - {{ if $isDBURI }} - - name: PGPASSWORD - value: {{ include "tobs.dburi.password" . }} - {{ else }} - - name: PGPASSWORD - valueFrom: - secretKeyRef: - name: {{ tpl $grafana.timescale.adminPassSecret $ }} - key: PROMSCALE_DB_PASSWORD - {{ end }} - - name: PGHOST - value: {{ ternary (include "tobs.dburi.hostwithoutport" . ) ( tpl $grafana.timescale.database.host $ ) ($isDBURI) }} - command: [ 'psql', '-d', {{ ternary (include "tobs.dburi.dbname" . ) ($grafana.timescale.database.dbName ) ($isDBURI) }}, '-f', '/add-users.sql' ] - {{ if .Values.grafanaDBJob.resources }} - resources: - {{ toYaml .Values.grafanaDBJob.resources | nindent 14 }} - {{ end }} - restartPolicy: OnFailure - volumes: - - name: sql-volume - configMap: - name: {{ $.Release.Name }}-grafana-db - initContainers: - - name: init-db - image: busybox:1.28 - volumeMounts: - - name: sql-volume - mountPath: /wait-for-ts.sh - subPath: wait-for-ts.sh - env: - - name: PGHOST - value: {{ ternary (include "tobs.dburi.hostwithoutport" . ) ( tpl $grafana.timescale.database.host $ ) ($isDBURI) }} - command: [ 'sh', '/wait-for-ts.sh' ] -{{- end -}} \ No newline at end of file diff --git a/chart/values.yaml b/chart/values.yaml index b30b2dac..a499cd7e 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -394,8 +394,14 @@ kube-prometheus-stack: - dashboards/apm-service-dependencies-upstream.json - dashboards/apm-service-overview.json - dashboards/promscale.json + adminUser: admin + # To configure password externally refer to https://github.com/grafana/helm-charts/blob/6578497320d3c4672bab3a3c7fd38dffba1c9aba/charts/grafana/values.yaml#L340-L345 adminPassword: "" - envFromSecret: "{{ .Release.Name }}-grafana-db" + persistence: + type: pvc + enabled: true + accessModes: + - ReadWriteOnce prometheus: datasource: enabled: true @@ -405,7 +411,7 @@ kube-prometheus-stack: url: "http://{{ .Release.Name }}-promscale-connector.{{ .Release.Namespace }}.svc:9201" timescale: database: - enabled: true + enabled: false host: *dbHost port: 5432 user: grafanadb diff --git a/cli/tests/helm-tests/helm_test.go b/cli/tests/helm-tests/helm_test.go index 5a2a6ab5..2f033ad0 100644 --- a/cli/tests/helm-tests/helm_test.go +++ b/cli/tests/helm-tests/helm_test.go @@ -147,7 +147,7 @@ func TestExportValueFromChart(t *testing.T) { if !ok { t.Fatal("failed to get expected value string from export chart value field") } - if v != "50Mi" { + if v != "2Gi" { t.Fatal("failed to verify exportChartValue") } } diff --git a/cli/tests/testdata/e2e-values.yaml b/cli/tests/testdata/e2e-values.yaml index 8f804c0d..83fc0c30 100644 --- a/cli/tests/testdata/e2e-values.yaml +++ b/cli/tests/testdata/e2e-values.yaml @@ -18,11 +18,11 @@ timescaledb-single: # disable the chart if an existing TimescaleDB instance is used enabled: &dbEnabled true - + # override default helm chart image to use one with newer promscale_extension image: repository: timescale/timescaledb-ha - tag: pg14.2-ts2.6.1-p4 + tag: pg14.3-ts2.7.0-p0 pullPolicy: IfNotPresent # create only a ClusterIP service @@ -60,7 +60,8 @@ promscale: openTelemetry: enabled: &otelEnabled true # to pass extra args - extraArgs: [] + extraArgs: + - "--metrics.high-availability=true" extraEnv: - name: "TOBS_TELEMETRY_INSTALLED_BY" @@ -121,7 +122,7 @@ promscale: requests: # By default this should be enough for a cluster # with only a few pods - memory: 50Mi + memory: 2Gi cpu: 10m # Enabling Kube-Prometheus will install @@ -137,6 +138,11 @@ kube-prometheus-stack: evaluationInterval: "1m" # Prometheus metric retention retention: 1d + # Number of replicas of each shard to deploy for a Prometheus deployment. + replicas: 2 + replicaExternalLabelName: "__replica__" + # Promscale requires a cluster label to be present for high availability mode. + prometheusExternalLabelName: "cluster" # The remote_read spec configuration for Prometheus. # ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#remotereadspec remoteRead: @@ -363,6 +369,11 @@ kube-prometheus-stack: # https://github.com/grafana/helm-charts/blob/main/charts/grafana/README.md grafana: enabled: true + # TODO(paulfantom): remove with kube-prometheus bump + image: + repository: grafana/grafana + tag: 8.5.5 + pullPolicy: IfNotPresent sidecar: datasources: enabled: true @@ -387,8 +398,14 @@ kube-prometheus-stack: - dashboards/apm-service-dependencies-upstream.json - dashboards/apm-service-overview.json - dashboards/promscale.json + adminUser: admin + # To configure password externally refer to https://github.com/grafana/helm-charts/blob/6578497320d3c4672bab3a3c7fd38dffba1c9aba/charts/grafana/values.yaml#L340-L345 adminPassword: "" - envFromSecret: "{{ .Release.Name }}-grafana-db" + persistence: + type: pvc + enabled: true + accessModes: + - ReadWriteOnce prometheus: datasource: enabled: true @@ -398,7 +415,7 @@ kube-prometheus-stack: url: "http://{{ .Release.Name }}-promscale-connector.{{ .Release.Namespace }}.svc:9201" timescale: database: - enabled: true + enabled: false host: *dbHost port: 5432 user: grafanadb @@ -477,7 +494,7 @@ opentelemetryOperator: tls: insecure: true prometheusremotewrite: - endpoint: "{{ .Release.Name }}-promscale-connector.{{ .Release.Namespace }}.svc:9201/write" + endpoint: "http://{{ .Release.Name }}-promscale-connector.{{ .Release.Namespace }}.svc:9201/write" tls: insecure: true