From 1be990b956c838c1efe4f6d5b6ad44a8c59e5f6d Mon Sep 17 00:00:00 2001 From: MasterPtato <23087326+MasterPtato@users.noreply.github.com> Date: Tue, 30 Apr 2024 17:05:53 +0000 Subject: [PATCH] fix: move grafana to its own helm chart (#741) ## Changes --- infra/tf/k8s_infra/cockroachdb.tf | 9 +- infra/tf/k8s_infra/grafana.tf | 125 ++++++++++++++++++++++++- infra/tf/k8s_infra/prometheus.tf | 75 +-------------- infra/tf/k8s_infra/traefik_tunnel.tf | 10 +- infra/tf/tls/root_ca.tf | 2 +- infra/tf/tls/tunnel_server.tf | 8 +- infra/tf/tls/vars.tf | 10 ++ lib/bolt/core/src/dep/terraform/gen.rs | 4 +- 8 files changed, 154 insertions(+), 89 deletions(-) diff --git a/infra/tf/k8s_infra/cockroachdb.tf b/infra/tf/k8s_infra/cockroachdb.tf index e381b5aa8e..7f48e1d590 100644 --- a/infra/tf/k8s_infra/cockroachdb.tf +++ b/infra/tf/k8s_infra/cockroachdb.tf @@ -103,7 +103,14 @@ data "kubernetes_secret" "crdb_ca" { } resource "kubernetes_config_map" "crdb_ca" { - for_each = local.cockroachdb_k8s ? toset(["rivet-service", "bolt", "prometheus"]) : toset([]) + for_each = toset( + local.cockroachdb_k8s ? + flatten([ + ["rivet-service", "bolt"], + var.prometheus_enabled ? ["grafana"] : [] + ]) + : [] + ) metadata { name = "crdb-ca" diff --git a/infra/tf/k8s_infra/grafana.tf b/infra/tf/k8s_infra/grafana.tf index 38cf626c7e..248ba37fc1 100644 --- a/infra/tf/k8s_infra/grafana.tf +++ b/infra/tf/k8s_infra/grafana.tf @@ -1,4 +1,12 @@ locals { + service_grafana = lookup(var.services, "grafana", { + count = 1 + resources = { + cpu = 500 + memory = 512 + } + }) + grafana_dashboards = { for f in fileset("${path.module}/grafana_dashboards/", "*.json"): "${trimsuffix(f, ".json")}" => { @@ -7,12 +15,124 @@ locals { } } +resource "kubernetes_namespace" "grafana" { + count = var.prometheus_enabled ? 1 : 0 + + metadata { + name = "grafana" + } +} + +resource "helm_release" "grafana" { + count = var.prometheus_enabled ? 1 : 0 + depends_on = [helm_release.vpa] + + name = "grafana" + namespace = kubernetes_namespace.grafana.0.metadata.0.name + repository = "https://grafana.github.io/helm-charts" + chart = "grafana" + version = "7.3.9" + values = [yamlencode({ + "grafana.ini" = { + auth = { + disable_login_form = true + } + "auth.anonymous" = { + enabled = true + org_role = "Admin" + } + } + + resources = var.limit_resources ? { + limits = { + memory = "${local.service_grafana.resources.memory}Mi" + cpu = "${local.service_grafana.resources.cpu}m" + } + } : null + + datasources = { + "datasources.yaml" = { + apiVersion = 1 + + datasources = [ + { + name = "Prometheus" + type = "prometheus" + uid = "prometheus" + url = "http://prometheus-kube-prometheus-prometheus.prometheus:9090/" + access = "proxy" + isDefault = true + jsonData = { + httpMethod = "POST" + # prometheus.prometheusSpec.scrapeInterval + timeInterval = "30s" + } + }, + { + name = "Loki" + type = "loki" + uid = "loki" + url = "http://loki-gateway.loki.svc.cluster.local:80/" + access = "proxy" + jsonData = {} + }, + { + name = "CockroachDB" + type = "postgres" + uid = "crdb" + url = local.crdb_host + user = module.crdb_user_grafana_secrets.values["crdb/user/grafana/username"] + secureJsonData = { + password = module.crdb_user_grafana_secrets.values["crdb/user/grafana/password"] + } + jsonData = { + sslmode = "verify-ca" + sslRootCertFile = "/local/crdb/ca.crt" + } + secret = true + } + ] + } + } + + extraConfigmapMounts = [ + # TLS Cert for postgres datasource + { + name = kubernetes_config_map.crdb_ca["grafana"].metadata.0.name + configMap = "crdb-ca" + mountPath = "/local/crdb/ca.crt" + subPath = "ca.crt" + readOnly = true + } + ] + + sidecar = { + dashboards = { + enabled = true + } + } + + serviceMonitor = { + enabled = true + path = "/metrics" + labels = {} + + interval = "" + scheme = "http" + tlsConfig = {} + scrapeTimeout = "15s" + + relabelings = [] + } + })] +} + resource "kubernetes_config_map" "grafana_dashboard" { for_each = var.prometheus_enabled ? local.grafana_dashboards : {} metadata { - namespace = kubernetes_namespace.prometheus.0.metadata.0.name - name = "prometheus-rivet-${each.key}" + namespace = kubernetes_namespace.grafana.0.metadata.0.name + name = "grafana-rivet-${each.key}" labels = { grafana_dashboard = "1" } @@ -22,4 +142,3 @@ resource "kubernetes_config_map" "grafana_dashboard" { "${each.key}.json" = each.value.body } } - diff --git a/infra/tf/k8s_infra/prometheus.tf b/infra/tf/k8s_infra/prometheus.tf index 78c248df68..e8dd15b03f 100644 --- a/infra/tf/k8s_infra/prometheus.tf +++ b/infra/tf/k8s_infra/prometheus.tf @@ -43,14 +43,6 @@ locals { } }) - service_grafana = lookup(var.services, "grafana", { - count = 1 - resources = { - cpu = 500 - memory = 512 - } - }) - has_slack_receiver = ( module.alertmanager_secrets.values["alertmanager/slack/url"] != "" && module.alertmanager_secrets.values["alertmanager/slack/channel"] != "" @@ -325,72 +317,9 @@ resource "helm_release" "prometheus" { } } + # Configured in grafana.tf grafana = { - "grafana.ini" = { - auth = { - disable_login_form = true - } - "auth.anonymous" = { - enabled = true - org_role = "Admin" - } - } - - resources = var.limit_resources ? { - limits = { - memory = "${local.service_grafana.resources.memory}Mi" - cpu = "${local.service_grafana.resources.cpu}m" - } - } : null - - additionalDataSources = [ - { - name = "Loki" - type = "loki" - uid = "loki" - url = "http://loki-gateway.loki.svc.cluster.local:80/" - access = "proxy" - jsonData = {} - }, - { - name = "CockroachDB" - type = "postgres" - uid = "crdb" - url = local.crdb_host - user = module.crdb_user_grafana_secrets.values["crdb/user/grafana/username"] - secureJsonData = { - password = module.crdb_user_grafana_secrets.values["crdb/user/grafana/password"] - } - jsonData = { - sslmode = "verify-ca" - sslRootCertFile = "/local/crdb/ca.crt" - } - } - ] - - extraConfigmapMounts = [ - # TLS Cert for postgres datasource - { - name = "crdb-ca" - configMap = "crdb-ca" - mountPath = "/local/crdb/ca.crt" - subPath = "ca.crt" - readOnly = true - } - ] - - serviceMonitor = { - enabled = true - path = "/metrics" - labels = {} - - interval = "" - scheme = "http" - tlsConfig = {} - scrapeTimeout = "15s" - - relabelings = [] - } + enabled = false } extraManifests = flatten([ diff --git a/infra/tf/k8s_infra/traefik_tunnel.tf b/infra/tf/k8s_infra/traefik_tunnel.tf index 5683bfa349..7793a4b248 100644 --- a/infra/tf/k8s_infra/traefik_tunnel.tf +++ b/infra/tf/k8s_infra/traefik_tunnel.tf @@ -1,7 +1,7 @@ locals { # Specify what services to expose via the tunnel server tunnel_services = merge(flatten([ - [{ + var.edge_enabled ? [{ "api-internal" = { service = "rivet-api-internal-monolith" service_namespace = kubernetes_namespace.rivet_service.metadata[0].name @@ -30,13 +30,7 @@ locals { service_namespace = kubernetes_namespace.nomad.0.metadata[0].name service_port = 4647 } - - "api-internal" = { - service = "rivet-api-internal-monolith" - service_namespace = kubernetes_namespace.rivet_service.metadata[0].name - service_port = 80 - } - }], + }] : [], var.prometheus_enabled ? [{ "vector" = { service = "vector" diff --git a/infra/tf/tls/root_ca.tf b/infra/tf/tls/root_ca.tf index a7e087dc35..32b7488ca2 100644 --- a/infra/tf/tls/root_ca.tf +++ b/infra/tf/tls/root_ca.tf @@ -25,7 +25,7 @@ resource "tls_self_signed_cert" "root_ca" { } resource "kubernetes_secret" "ingress_tls_ca_cert_locally_signed" { - for_each = toset(["traefik-tunnel"]) + for_each = toset(var.edge_enabled ? ["traefik-tunnel"] : []) metadata { name = "ingress-tls-ca-cert-locally-signed" diff --git a/infra/tf/tls/tunnel_server.tf b/infra/tf/tls/tunnel_server.tf index 101eed2e48..b54927dfe7 100644 --- a/infra/tf/tls/tunnel_server.tf +++ b/infra/tf/tls/tunnel_server.tf @@ -29,7 +29,13 @@ resource "tls_locally_signed_cert" "locally_signed_tunnel_server" { resource "kubernetes_secret" "ingress_tls_cert_tunnel_server" { type = "kubernetes.io/tls" - for_each = toset(["traefik-tunnel", "nomad", "rivet-service", "vector"]) + for_each = toset(flatten( + [ + ["rivet-service"], + var.edge_enabled ? ["traefik-tunnel", "nomad"] : [], + var.prometheus_enabled ? ["vector"] : [], + ] + )) metadata { name = "ingress-tls-cert-tunnel-server" diff --git a/infra/tf/tls/vars.tf b/infra/tf/tls/vars.tf index 5aa9a064bc..6f9bd1f3c4 100644 --- a/infra/tf/tls/vars.tf +++ b/infra/tf/tls/vars.tf @@ -15,6 +15,16 @@ variable "domain_job" { type = string } +# MARK: Nomad +variable "edge_enabled" { + type = bool +} + +# MARK: Prometheus +variable "prometheus_enabled" { + type = bool +} + # MARK: K8s variable "kubeconfig_path" { type = string diff --git a/lib/bolt/core/src/dep/terraform/gen.rs b/lib/bolt/core/src/dep/terraform/gen.rs index 80255b54a3..a8e19d348f 100644 --- a/lib/bolt/core/src/dep/terraform/gen.rs +++ b/lib/bolt/core/src/dep/terraform/gen.rs @@ -232,7 +232,7 @@ async fn vars(ctx: &ProjectContext) { // Edge nodes vars.insert( "edge_enabled".into(), - json!(&!config.rivet.provisioning.is_some()), + json!(config.rivet.provisioning.is_some()), ); // Tunnels @@ -248,7 +248,7 @@ async fn vars(ctx: &ProjectContext) { "grafana", json!({ "name": "Grafana", - "service": "http://prometheus-grafana.prometheus.svc.cluster.local:80", + "service": "http://grafana.grafana.svc.cluster.local:80", "access_groups": access.as_ref().map(|x| vec![x.groups.engineering.clone()]).unwrap_or_default(), "service_tokens": access.as_ref().map(|x| vec![x.services.grafana.clone()]).unwrap_or_default(), }),