timescale · VineethReddy02 · Aug 24, 2021 · Aug 12, 2021 · Aug 24, 2021 · cevian
diff --git a/.github/workflows/generate.yml b/.github/workflows/generate.yml
@@ -57,6 +57,11 @@ jobs:
         run: |
           make helm-tests
 
+      - name: Dependency-Tests
+        working-directory: ${{env.working-directory}}
+        run: |
+          go test -v ./tests/dependency-tests
+
       - name: E2E-Tests
         working-directory: ${{env.working-directory}}
         run: |

diff --git a/.github/workflows/go-schedule.yml b/.github/workflows/go-schedule.yml
@@ -57,6 +57,11 @@ jobs:
         run: |
           make helm-tests
 
+      - name: Dependency-Tests
+        working-directory: ${{env.working-directory}}
+        run: |
+          go test -v ./tests/dependency-tests
+
       - name: E2E-Tests
         working-directory: ${{env.working-directory}}
         run: |

diff --git a/chart/Chart.yaml b/chart/Chart.yaml
@@ -18,5 +18,5 @@ dependencies:
     repository: https://charts.timescale.com
   - name: kube-prometheus-stack
     condition: kube-prometheus-stack.enabled
-    version: 15.2.4
+    version: 18.0.0
     repository: https://prometheus-community.github.io/helm-charts
diff --git a/chart/docs/ha-setup.md b/chart/docs/ha-setup.md
diff --git a/chart/docs/values-config.md b/chart/docs/values-config.md
@@ -0,0 +1,130 @@
+# Tobs helm values config
+
+## Prometheus High-Availability
+
+**Note**: This is unnecessary if using the tobs CLI, To enable Prometheus high-availability with tobs CLI use `--enable-prometheus-ha`. 
+
+The following steps will explain how to enable Prometheus high-availability with Promscale when using tobs helm chart (without tobs CLI). 
+
+Update the tobs `values.yaml` with below HA configuration.
+
+Increase the TimescaleDB connection pool i.e.
+
+```
+timescaledb-single:
+  patroni:
+    bootstrap:
+      dcs:
+        postgresql:
+          parameters:
+            max_connections: 400
+```
+
+Update the Promscale configuration to enable HA mode and increase the replicas to 3:
+
+```
+promscale:
+  replicaCount: 3
+  args:
+    - --high-availability
+```
+
+Update Prometheus configuration to send prometheus pod name with `__replica__` and prometheus cluster name as `cluster` labelSets in the form of external labels and run Prometheus as 3 replicas for HA. 
+
+```
+kube-prometheus-stack:
+  prometheus:
+    prometheusSpec:
+      replicaExternalLabelName: "__replica__"
+      prometheusExternalLabelName: "cluster"
+      replicas: 3
+```
+
+## Multi-cluster support
+
+<img src="./../../docs/assets/multi-cluster.png" alt="multi-cluster setup diagram" width="800"/>
+
+In tobs you can enable multi-cluster support to install a data aggregation cluster to collect observability data coming from different observer clusters. 
+
+With tobs you can deploy both observer clusters and data aggregation cluster. 
+
+### Deploying the data aggregation cluster
+
+The data aggregation cluster acts as central observability cluster which stores, visualizes data flowing from observer clusters.
+
+Steps to deploy data aggregation cluster
+
+* Enable LoadBalancer service to Promscale to remote-wrtie, remote-read data from observer clusters.
+
+```
+promscale:
+  service:
+    loadBalancer:
+      enabled: true
+``` 
+
+* Add external cluster label to differentiate the visualisation in Grafana dashboards per cluster level. 
+
+```
+kube-prometheus-stack:
+  prometheus:
+    prometheusSpec:
+      externalLabels:
+        cluster: <clusterName>
+```
+
+* Enable multi-cluster support in Grafana dashboards 
+
+```
+kube-prometheus-stack:
+  grafana:
+    sidecar:
+      dashboards:
+        multicluster:
+          global:
+            enabled: true
+``` 
+
+### Deploying the observer cluster 
+
+The observer cluster forwards the metrics to data-aggregation/centralised monitoring cluster which supports ingesting, visualising of metrics flowing from different observer clusters.
+
+Steps to install an observer cluster
+
+* Disable TimescaleDB
+
+```
+timescaledb-single:
+  enabled: false
+```
+
+* Disable Promscale
+
+```
+promscale:
+  enabled: false
+```
+
+
+* Disable Grafana
+
+```
+kube-prometheus-stack:
+  grafana:
+    enabled: false
+```
+
+* Configure Prometheus remote-write to Promscale loadbalancer service in data aggregation cluster and add the external label representing the current cluster name to differentiate the visualisation on per cluster basis in Grafana. 
+
+```
+kube-prometheus-stack:
+  prometheus:
+    prometheusSpec:
+      externalLabels:
+        cluster: <clusterName>  
+      remoteRead:
+      - url: "<PROMSCALE_SERVICE_ENDPOINT_OF_DATA_AGGREGATION_CLUSTER>/read"
+        readRecent: true
+      remoteWrite:
+      - url: "<PROMSCALE_ENDPOINT>/write"  
+```
diff --git a/chart/values.yaml b/chart/values.yaml
@@ -321,6 +321,11 @@ kube-prometheus-stack:
         # Promscale is the default datasource
         defaultDatasourceEnabled: false
       dashboards:
+        # option to enable multi-cluster support
+        # in Grafana dashboards by default disabled
+        multicluster:
+          global:
+            enabled: false
         enabled: true
         files:
           - dashboards/k8s-cluster.json

diff --git a/cli/cmd/upgrade/upgrade.go b/cli/cmd/upgrade/upgrade.go
@@ -469,18 +469,20 @@ func parsePgBackRestConf(data string) map[string]string {
 	return newData
 }
 
-var crdURLs = []string{
-	"https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.47.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml",
-	"https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.47.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml",
-	"https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.47.0/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml",
-	"https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.47.0/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml",
-	"https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.47.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml",
-	"https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.47.0/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml",
-	"https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.47.0/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml",
-	"https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.47.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml",
+var KubePrometheusCRDVersion = "v0.50.0"
+
+var kubePrometheusCRDURLs = []string{
+	"https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/"+KubePrometheusCRDVersion+"/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml",
+	"https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/"+KubePrometheusCRDVersion+"/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml",
+	"https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/"+KubePrometheusCRDVersion+"/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml",
+	"https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/"+KubePrometheusCRDVersion+"/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml",
+	"https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/"+KubePrometheusCRDVersion+"/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml",
+	"https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/"+KubePrometheusCRDVersion+"/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml",
+	"https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/"+KubePrometheusCRDVersion+"/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml",
+	"https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/"+KubePrometheusCRDVersion+"/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml",
 }
 
-var crdNames = []string{
+var kubePrometheusCRDNames = []string{
 	"alertmanagerconfigs.monitoring.coreos.com",
 	"alertmanagers.monitoring.coreos.com",
 	"podmonitors.monitoring.coreos.com",
@@ -492,11 +494,11 @@ var crdNames = []string{
 }
 
 func createCRDS() error {
-	err := k8s.CreateCRDS(crdURLs)
+	err := k8s.CreateCRDS(kubePrometheusCRDURLs)
 	if err != nil {
 		return err
 	}
-	fmt.Println("Successfully created CRDs: ", crdNames)
+	fmt.Println("Successfully created CRDs: ", kubePrometheusCRDNames)
 	return nil
 }
 

diff --git a/cli/tests/dependency-tests/main_test.go b/cli/tests/dependency-tests/main_test.go
@@ -0,0 +1,64 @@
+package dependency_tests
+
+import (
+	"fmt"
+	"io/ioutil"
+	"log"
+	"net/http"
+	"testing"
+
+	"github.com/timescale/tobs/cli/cmd/upgrade"
+	"github.com/timescale/tobs/cli/pkg/helm"
+	"sigs.k8s.io/yaml"
+)
+
+func TestMain(m *testing.M) {
+	validateKubePrometheusVersions()
+}
+
+func validateKubePrometheusVersions() {
+	// Get existing tobs helm chart metadata
+	b, err := ioutil.ReadFile("./../../../chart/Chart.yaml")
+	if err != nil {
+		log.Fatal(err)
+	}
+	existingTobsChart := &helm.ChartMetadata{}
+	err = yaml.Unmarshal(b, existingTobsChart)
+	if err != nil {
+		log.Fatal(err)
+	}
+	var kubePrometheusVersion string
+	for _, i := range existingTobsChart.Dependencies {
+		if i.Name == "kube-prometheus-stack" {
+			kubePrometheusVersion = i.Version
+			break
+		}
+	}
+
+	// Get upstream kube-prometheus chart metadata using kube-prometheus version used in tobs local Chart.yaml
+	resp, err := http.Get("https://raw.githubusercontent.com/prometheus-community/helm-charts/kube-prometheus-stack-" + kubePrometheusVersion + "/charts/kube-prometheus-stack/Chart.yaml")
+	if err != nil {
+		log.Fatalf("failed to get the kube-prometheus CHart.yaml info %v", err)
+	}
+	defer resp.Body.Close()
+
+	bodyBytes, err := ioutil.ReadAll(resp.Body)
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	upstreamKPChart := &helm.ChartMetadata{}
+	err = yaml.Unmarshal(bodyBytes, upstreamKPChart)
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	upstreamKPChart.AppVersion = "v" + upstreamKPChart.AppVersion
+	// validate existing tobs kube-prometheus helm chart version & CRDs version with upstream version and CRDs that are being used
+	if upstreamKPChart.Version != kubePrometheusVersion || upstreamKPChart.AppVersion != upgrade.KubePrometheusCRDVersion {
+		log.Fatalf("failed to validate tobs kube-prometheus helm chart version and CRDs version with upstream versions."+
+			"Mismatch in validation, tobs Kube-Prometheus version: %s, tobs kube-prometheus CRD version: %s, "+
+			"upstream kube-prometheus CRD version: %s", kubePrometheusVersion, upgrade.KubePrometheusCRDVersion, upstreamKPChart.AppVersion)
+	}
+	fmt.Println("successfully validated kube-prometheus CRD versions with upstream versions.")
+}
diff --git a/docs/assets/multi-cluster.png b/docs/assets/multi-cluster.png