diff --git a/.github/workflows/generate.yml b/.github/workflows/generate.yml index b1457e30..8286d31c 100644 --- a/.github/workflows/generate.yml +++ b/.github/workflows/generate.yml @@ -57,6 +57,11 @@ jobs: run: | make helm-tests + - name: Dependency-Tests + working-directory: ${{env.working-directory}} + run: | + go test -v ./tests/dependency-tests + - name: E2E-Tests working-directory: ${{env.working-directory}} run: | diff --git a/.github/workflows/go-schedule.yml b/.github/workflows/go-schedule.yml index 0f120424..be242489 100644 --- a/.github/workflows/go-schedule.yml +++ b/.github/workflows/go-schedule.yml @@ -57,6 +57,11 @@ jobs: run: | make helm-tests + - name: Dependency-Tests + working-directory: ${{env.working-directory}} + run: | + go test -v ./tests/dependency-tests + - name: E2E-Tests working-directory: ${{env.working-directory}} run: | diff --git a/chart/Chart.yaml b/chart/Chart.yaml index 48f22d24..ebb10fb0 100644 --- a/chart/Chart.yaml +++ b/chart/Chart.yaml @@ -18,5 +18,5 @@ dependencies: repository: https://charts.timescale.com - name: kube-prometheus-stack condition: kube-prometheus-stack.enabled - version: 15.2.4 + version: 18.0.0 repository: https://prometheus-community.github.io/helm-charts diff --git a/chart/docs/ha-setup.md b/chart/docs/ha-setup.md deleted file mode 100644 index 80af1028..00000000 --- a/chart/docs/ha-setup.md +++ /dev/null @@ -1,39 +0,0 @@ -# Prometheus High-Availability - -**Note**: This is unnecessary if using the tobs CLI, To enable Prometheus high-availability with tobs CLI use `--enable-prometheus-ha`. - -The following steps will explain how to enable Prometheus high-availability with Promscale when using tobs helm chart (without tobs CLI). - -Update the tobs `values.yaml` with below HA configuration. - -Increase the TimescaleDB connection pool i.e. - -``` -timescaledb-single: - patroni: - bootstrap: - dcs: - postgresql: - parameters: - max_connections: 400 -``` - -Update the Promscale configuration to enable HA mode and increase the replicas to 3: - -``` -promscale: - replicaCount: 3 - args: - - --high-availability -``` - -Update Prometheus configuration to send prometheus pod name with `__replica__` and prometheus cluster name as `cluster` labelSets in the form of external labels and run Prometheus as 3 replicas for HA. - -``` -kube-prometheus-stack: - prometheus: - prometheusSpec: - replicaExternalLabelName: "__replica__" - prometheusExternalLabelName: "cluster" - replicas: 3 -``` \ No newline at end of file diff --git a/chart/docs/values-config.md b/chart/docs/values-config.md new file mode 100644 index 00000000..f803a560 --- /dev/null +++ b/chart/docs/values-config.md @@ -0,0 +1,130 @@ +# Tobs helm values config + +## Prometheus High-Availability + +**Note**: This is unnecessary if using the tobs CLI, To enable Prometheus high-availability with tobs CLI use `--enable-prometheus-ha`. + +The following steps will explain how to enable Prometheus high-availability with Promscale when using tobs helm chart (without tobs CLI). + +Update the tobs `values.yaml` with below HA configuration. + +Increase the TimescaleDB connection pool i.e. + +``` +timescaledb-single: + patroni: + bootstrap: + dcs: + postgresql: + parameters: + max_connections: 400 +``` + +Update the Promscale configuration to enable HA mode and increase the replicas to 3: + +``` +promscale: + replicaCount: 3 + args: + - --high-availability +``` + +Update Prometheus configuration to send prometheus pod name with `__replica__` and prometheus cluster name as `cluster` labelSets in the form of external labels and run Prometheus as 3 replicas for HA. + +``` +kube-prometheus-stack: + prometheus: + prometheusSpec: + replicaExternalLabelName: "__replica__" + prometheusExternalLabelName: "cluster" + replicas: 3 +``` + +## Multi-cluster support + +multi-cluster setup diagram + +In tobs you can enable multi-cluster support to install a data aggregation cluster to collect observability data coming from different observer clusters. + +With tobs you can deploy both observer clusters and data aggregation cluster. + +### Deploying the data aggregation cluster + +The data aggregation cluster acts as central observability cluster which stores, visualizes data flowing from observer clusters. + +Steps to deploy data aggregation cluster + +* Enable LoadBalancer service to Promscale to remote-wrtie, remote-read data from observer clusters. + +``` +promscale: + service: + loadBalancer: + enabled: true +``` + +* Add external cluster label to differentiate the visualisation in Grafana dashboards per cluster level. + +``` +kube-prometheus-stack: + prometheus: + prometheusSpec: + externalLabels: + cluster: +``` + +* Enable multi-cluster support in Grafana dashboards + +``` +kube-prometheus-stack: + grafana: + sidecar: + dashboards: + multicluster: + global: + enabled: true +``` + +### Deploying the observer cluster + +The observer cluster forwards the metrics to data-aggregation/centralised monitoring cluster which supports ingesting, visualising of metrics flowing from different observer clusters. + +Steps to install an observer cluster + +* Disable TimescaleDB + +``` +timescaledb-single: + enabled: false +``` + +* Disable Promscale + +``` +promscale: + enabled: false +``` + + +* Disable Grafana + +``` +kube-prometheus-stack: + grafana: + enabled: false +``` + +* Configure Prometheus remote-write to Promscale loadbalancer service in data aggregation cluster and add the external label representing the current cluster name to differentiate the visualisation on per cluster basis in Grafana. + +``` +kube-prometheus-stack: + prometheus: + prometheusSpec: + externalLabels: + cluster: + remoteRead: + - url: "/read" + readRecent: true + remoteWrite: + - url: "/write" +``` \ No newline at end of file diff --git a/chart/values.yaml b/chart/values.yaml index 734043fb..4cd43ba5 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -321,6 +321,11 @@ kube-prometheus-stack: # Promscale is the default datasource defaultDatasourceEnabled: false dashboards: + # option to enable multi-cluster support + # in Grafana dashboards by default disabled + multicluster: + global: + enabled: false enabled: true files: - dashboards/k8s-cluster.json diff --git a/cli/cmd/upgrade/upgrade.go b/cli/cmd/upgrade/upgrade.go index 30d70344..2180f3c6 100644 --- a/cli/cmd/upgrade/upgrade.go +++ b/cli/cmd/upgrade/upgrade.go @@ -469,18 +469,20 @@ func parsePgBackRestConf(data string) map[string]string { return newData } -var crdURLs = []string{ - "https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.47.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml", - "https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.47.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml", - "https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.47.0/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml", - "https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.47.0/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml", - "https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.47.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml", - "https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.47.0/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml", - "https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.47.0/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml", - "https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.47.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml", +var KubePrometheusCRDVersion = "v0.50.0" + +var kubePrometheusCRDURLs = []string{ + "https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/"+KubePrometheusCRDVersion+"/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml", + "https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/"+KubePrometheusCRDVersion+"/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml", + "https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/"+KubePrometheusCRDVersion+"/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml", + "https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/"+KubePrometheusCRDVersion+"/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml", + "https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/"+KubePrometheusCRDVersion+"/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml", + "https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/"+KubePrometheusCRDVersion+"/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml", + "https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/"+KubePrometheusCRDVersion+"/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml", + "https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/"+KubePrometheusCRDVersion+"/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml", } -var crdNames = []string{ +var kubePrometheusCRDNames = []string{ "alertmanagerconfigs.monitoring.coreos.com", "alertmanagers.monitoring.coreos.com", "podmonitors.monitoring.coreos.com", @@ -492,11 +494,11 @@ var crdNames = []string{ } func createCRDS() error { - err := k8s.CreateCRDS(crdURLs) + err := k8s.CreateCRDS(kubePrometheusCRDURLs) if err != nil { return err } - fmt.Println("Successfully created CRDs: ", crdNames) + fmt.Println("Successfully created CRDs: ", kubePrometheusCRDNames) return nil } diff --git a/cli/tests/dependency-tests/main_test.go b/cli/tests/dependency-tests/main_test.go new file mode 100644 index 00000000..cca54024 --- /dev/null +++ b/cli/tests/dependency-tests/main_test.go @@ -0,0 +1,64 @@ +package dependency_tests + +import ( + "fmt" + "io/ioutil" + "log" + "net/http" + "testing" + + "github.com/timescale/tobs/cli/cmd/upgrade" + "github.com/timescale/tobs/cli/pkg/helm" + "sigs.k8s.io/yaml" +) + +func TestMain(m *testing.M) { + validateKubePrometheusVersions() +} + +func validateKubePrometheusVersions() { + // Get existing tobs helm chart metadata + b, err := ioutil.ReadFile("./../../../chart/Chart.yaml") + if err != nil { + log.Fatal(err) + } + existingTobsChart := &helm.ChartMetadata{} + err = yaml.Unmarshal(b, existingTobsChart) + if err != nil { + log.Fatal(err) + } + var kubePrometheusVersion string + for _, i := range existingTobsChart.Dependencies { + if i.Name == "kube-prometheus-stack" { + kubePrometheusVersion = i.Version + break + } + } + + // Get upstream kube-prometheus chart metadata using kube-prometheus version used in tobs local Chart.yaml + resp, err := http.Get("https://raw.githubusercontent.com/prometheus-community/helm-charts/kube-prometheus-stack-" + kubePrometheusVersion + "/charts/kube-prometheus-stack/Chart.yaml") + if err != nil { + log.Fatalf("failed to get the kube-prometheus CHart.yaml info %v", err) + } + defer resp.Body.Close() + + bodyBytes, err := ioutil.ReadAll(resp.Body) + if err != nil { + log.Fatal(err) + } + + upstreamKPChart := &helm.ChartMetadata{} + err = yaml.Unmarshal(bodyBytes, upstreamKPChart) + if err != nil { + log.Fatal(err) + } + + upstreamKPChart.AppVersion = "v" + upstreamKPChart.AppVersion + // validate existing tobs kube-prometheus helm chart version & CRDs version with upstream version and CRDs that are being used + if upstreamKPChart.Version != kubePrometheusVersion || upstreamKPChart.AppVersion != upgrade.KubePrometheusCRDVersion { + log.Fatalf("failed to validate tobs kube-prometheus helm chart version and CRDs version with upstream versions."+ + "Mismatch in validation, tobs Kube-Prometheus version: %s, tobs kube-prometheus CRD version: %s, "+ + "upstream kube-prometheus CRD version: %s", kubePrometheusVersion, upgrade.KubePrometheusCRDVersion, upstreamKPChart.AppVersion) + } + fmt.Println("successfully validated kube-prometheus CRD versions with upstream versions.") +} diff --git a/docs/assets/multi-cluster.png b/docs/assets/multi-cluster.png new file mode 100644 index 00000000..744aa205 Binary files /dev/null and b/docs/assets/multi-cluster.png differ