diff --git a/.dockerignore b/.dockerignore index d83bad898a..379491ca22 100644 --- a/.dockerignore +++ b/.dockerignore @@ -4,8 +4,8 @@ # Dockerfiles to. # Git -**/.git/ -**/.gitignore +# **/.git/ +# **/.gitignore **/.DS_Store **/symbolCache.db diff --git a/infra/default-builds/dockerfiles/test-mm-lobby-echo/Dockerfile b/infra/default-builds/dockerfiles/test-mm-lobby-echo/Dockerfile index ca3678e616..b052729ee0 100644 --- a/infra/default-builds/dockerfiles/test-mm-lobby-echo/Dockerfile +++ b/infra/default-builds/dockerfiles/test-mm-lobby-echo/Dockerfile @@ -1,4 +1,4 @@ -FROM clux/muslrust:1.75.0 AS build +FROM clux/muslrust:1.77.2 AS build RUN cargo new --bin /app WORKDIR /app COPY Cargo.toml ./ diff --git a/infra/tf/cockroachdb_managed/main.tf b/infra/tf/cockroachdb_managed/main.tf index 74a4155886..b41d59a767 100644 --- a/infra/tf/cockroachdb_managed/main.tf +++ b/infra/tf/cockroachdb_managed/main.tf @@ -54,7 +54,10 @@ data "cockroach_cluster_cert" "main" { } resource "kubernetes_config_map" "crdb_ca" { - for_each = toset(["rivet-service", "bolt"]) + for_each = toset(flatten([ + ["rivet-service", "bolt"], + var.prometheus_enabled ? ["grafana"] : [] + ])) metadata { name = "crdb-ca" diff --git a/infra/tf/cockroachdb_managed/vars.tf b/infra/tf/cockroachdb_managed/vars.tf index edc72063a9..3352f580d2 100644 --- a/infra/tf/cockroachdb_managed/vars.tf +++ b/infra/tf/cockroachdb_managed/vars.tf @@ -17,3 +17,7 @@ variable "cockroachdb_request_unit_limit" { variable "cockroachdb_storage_limit" { type = string } + +variable "prometheus_enabled" { + type = bool +} diff --git a/infra/tf/grafana/grafana.tf b/infra/tf/grafana/grafana.tf new file mode 100644 index 0000000000..1051062ddb --- /dev/null +++ b/infra/tf/grafana/grafana.tf @@ -0,0 +1,142 @@ +locals { + service_grafana = lookup(var.services, "grafana", { + count = 1 + resources = { + cpu = 500 + memory = 512 + } + }) + + grafana_dashboards = { + for f in fileset("${path.module}/grafana_dashboards/", "*.json"): + "${trimsuffix(f, ".json")}" => { + body = file("${path.module}/grafana_dashboards/${f}") + } + } + + crdb_host = "${try(data.terraform_remote_state.cockroachdb_k8s.outputs.host, data.terraform_remote_state.cockroachdb_managed.outputs.host)}:${try(data.terraform_remote_state.cockroachdb_k8s.outputs.port, data.terraform_remote_state.cockroachdb_managed.outputs.port)}" +} + +module "crdb_user_grafana_secrets" { + source = "../modules/secrets" + + keys = [ "crdb/user/grafana/username", "crdb/user/grafana/password" ] +} + +resource "helm_release" "grafana" { + name = "grafana" + namespace = "grafana" + repository = "https://grafana.github.io/helm-charts" + chart = "grafana" + version = "7.3.9" + values = [yamlencode({ + "grafana.ini" = { + auth = { + disable_login_form = true + } + "auth.anonymous" = { + enabled = true + org_role = "Admin" + } + } + + resources = var.limit_resources ? { + limits = { + memory = "${local.service_grafana.resources.memory}Mi" + cpu = "${local.service_grafana.resources.cpu}m" + } + } : null + + datasources = { + "datasources.yaml" = { + apiVersion = 1 + + datasources = [ + { + name = "Prometheus" + type = "prometheus" + uid = "prometheus" + url = "http://prometheus-kube-prometheus-prometheus.prometheus:9090/" + access = "proxy" + isDefault = true + jsonData = { + httpMethod = "POST" + # prometheus.prometheusSpec.scrapeInterval + timeInterval = "30s" + } + }, + { + name = "Loki" + type = "loki" + uid = "loki" + url = "http://loki-gateway.loki.svc.cluster.local:80/" + access = "proxy" + jsonData = {} + }, + { + name = "CockroachDB" + type = "postgres" + uid = "crdb" + url = local.crdb_host + user = module.crdb_user_grafana_secrets.values["crdb/user/grafana/username"] + secureJsonData = { + password = module.crdb_user_grafana_secrets.values["crdb/user/grafana/password"] + } + jsonData = { + sslmode = "verify-ca" + sslRootCertFile = "/local/crdb/ca.crt" + } + secret = true + } + ] + } + } + + extraConfigmapMounts = [ + # TLS Cert for postgres datasource + { + name = "crdb-ca" + configMap = "crdb-ca" + mountPath = "/local/crdb/ca.crt" + subPath = "ca.crt" + readOnly = true + } + ] + + sidecar = { + dashboards = { + enabled = true + searchNamespace = ["grafana", "prometheus"] + } + } + + serviceMonitor = { + enabled = true + path = "/metrics" + labels = {} + + interval = "" + scheme = "http" + tlsConfig = {} + scrapeTimeout = "15s" + + relabelings = [] + } + })] +} + +resource "kubernetes_config_map" "grafana_dashboard" { + for_each = local.grafana_dashboards + + metadata { + namespace = "grafana" + name = "grafana-rivet-${each.key}" + labels = { + grafana_dashboard = "1" + } + } + + data = { + "${each.key}.json" = each.value.body + } +} diff --git a/infra/tf/k8s_infra/grafana_dashboards/cache.json b/infra/tf/grafana/grafana_dashboards/cache.json similarity index 100% rename from infra/tf/k8s_infra/grafana_dashboards/cache.json rename to infra/tf/grafana/grafana_dashboards/cache.json diff --git a/infra/tf/k8s_infra/grafana_dashboards/chirp-api.json b/infra/tf/grafana/grafana_dashboards/chirp-api.json similarity index 100% rename from infra/tf/k8s_infra/grafana_dashboards/chirp-api.json rename to infra/tf/grafana/grafana_dashboards/chirp-api.json diff --git a/infra/tf/k8s_infra/grafana_dashboards/chirp-operation.json b/infra/tf/grafana/grafana_dashboards/chirp-operation.json similarity index 100% rename from infra/tf/k8s_infra/grafana_dashboards/chirp-operation.json rename to infra/tf/grafana/grafana_dashboards/chirp-operation.json diff --git a/infra/tf/k8s_infra/grafana_dashboards/chirp-perf-spans.json b/infra/tf/grafana/grafana_dashboards/chirp-perf-spans.json similarity index 100% rename from infra/tf/k8s_infra/grafana_dashboards/chirp-perf-spans.json rename to infra/tf/grafana/grafana_dashboards/chirp-perf-spans.json diff --git a/infra/tf/k8s_infra/grafana_dashboards/chirp-service.json b/infra/tf/grafana/grafana_dashboards/chirp-service.json similarity index 100% rename from infra/tf/k8s_infra/grafana_dashboards/chirp-service.json rename to infra/tf/grafana/grafana_dashboards/chirp-service.json diff --git a/infra/tf/k8s_infra/grafana_dashboards/node-exporter-full.json b/infra/tf/grafana/grafana_dashboards/node-exporter-full.json similarity index 100% rename from infra/tf/k8s_infra/grafana_dashboards/node-exporter-full.json rename to infra/tf/grafana/grafana_dashboards/node-exporter-full.json diff --git a/infra/tf/k8s_infra/grafana_dashboards/node-exporter-multiple.json b/infra/tf/grafana/grafana_dashboards/node-exporter-multiple.json similarity index 100% rename from infra/tf/k8s_infra/grafana_dashboards/node-exporter-multiple.json rename to infra/tf/grafana/grafana_dashboards/node-exporter-multiple.json diff --git a/infra/tf/k8s_infra/grafana_dashboards/provisioning.json b/infra/tf/grafana/grafana_dashboards/provisioning.json similarity index 100% rename from infra/tf/k8s_infra/grafana_dashboards/provisioning.json rename to infra/tf/grafana/grafana_dashboards/provisioning.json diff --git a/infra/tf/k8s_infra/grafana_dashboards/resource-allocations.json b/infra/tf/grafana/grafana_dashboards/resource-allocations.json similarity index 100% rename from infra/tf/k8s_infra/grafana_dashboards/resource-allocations.json rename to infra/tf/grafana/grafana_dashboards/resource-allocations.json diff --git a/infra/tf/k8s_infra/grafana_dashboards/rivet-logs.json b/infra/tf/grafana/grafana_dashboards/rivet-logs.json similarity index 100% rename from infra/tf/k8s_infra/grafana_dashboards/rivet-logs.json rename to infra/tf/grafana/grafana_dashboards/rivet-logs.json diff --git a/infra/tf/k8s_infra/grafana_dashboards/rivet-sql.json b/infra/tf/grafana/grafana_dashboards/rivet-sql.json similarity index 100% rename from infra/tf/k8s_infra/grafana_dashboards/rivet-sql.json rename to infra/tf/grafana/grafana_dashboards/rivet-sql.json diff --git a/infra/tf/k8s_infra/grafana_dashboards/traefik-services.json b/infra/tf/grafana/grafana_dashboards/traefik-services.json similarity index 100% rename from infra/tf/k8s_infra/grafana_dashboards/traefik-services.json rename to infra/tf/grafana/grafana_dashboards/traefik-services.json diff --git a/infra/tf/grafana/providers.tf b/infra/tf/grafana/providers.tf new file mode 100644 index 0000000000..f4a763006c --- /dev/null +++ b/infra/tf/grafana/providers.tf @@ -0,0 +1,10 @@ +provider "kubernetes" { + config_path = var.kubeconfig_path +} + +provider "helm" { + kubernetes { + config_path = var.kubeconfig_path + } +} + diff --git a/infra/tf/grafana/vars.tf b/infra/tf/grafana/vars.tf new file mode 100644 index 0000000000..e338af62c0 --- /dev/null +++ b/infra/tf/grafana/vars.tf @@ -0,0 +1,23 @@ +variable "namespace" { + type = string +} + +# MARK: Services +variable "services" { + type = map(object({ + count = number + resources = object({ + cpu = number + memory = number + }) + })) +} + +# MARK: K8s +variable "kubeconfig_path" { + type = string +} + +variable "limit_resources" { + type = bool +} diff --git a/infra/tf/k8s_infra/grafana.tf b/infra/tf/k8s_infra/grafana.tf index 248ba37fc1..2a3346ee3e 100644 --- a/infra/tf/k8s_infra/grafana.tf +++ b/infra/tf/k8s_infra/grafana.tf @@ -1,20 +1,3 @@ -locals { - service_grafana = lookup(var.services, "grafana", { - count = 1 - resources = { - cpu = 500 - memory = 512 - } - }) - - grafana_dashboards = { - for f in fileset("${path.module}/grafana_dashboards/", "*.json"): - "${trimsuffix(f, ".json")}" => { - body = file("${path.module}/grafana_dashboards/${f}") - } - } -} - resource "kubernetes_namespace" "grafana" { count = var.prometheus_enabled ? 1 : 0 @@ -23,122 +6,3 @@ resource "kubernetes_namespace" "grafana" { } } -resource "helm_release" "grafana" { - count = var.prometheus_enabled ? 1 : 0 - depends_on = [helm_release.vpa] - - name = "grafana" - namespace = kubernetes_namespace.grafana.0.metadata.0.name - repository = "https://grafana.github.io/helm-charts" - chart = "grafana" - version = "7.3.9" - values = [yamlencode({ - "grafana.ini" = { - auth = { - disable_login_form = true - } - "auth.anonymous" = { - enabled = true - org_role = "Admin" - } - } - - resources = var.limit_resources ? { - limits = { - memory = "${local.service_grafana.resources.memory}Mi" - cpu = "${local.service_grafana.resources.cpu}m" - } - } : null - - datasources = { - "datasources.yaml" = { - apiVersion = 1 - - datasources = [ - { - name = "Prometheus" - type = "prometheus" - uid = "prometheus" - url = "http://prometheus-kube-prometheus-prometheus.prometheus:9090/" - access = "proxy" - isDefault = true - jsonData = { - httpMethod = "POST" - # prometheus.prometheusSpec.scrapeInterval - timeInterval = "30s" - } - }, - { - name = "Loki" - type = "loki" - uid = "loki" - url = "http://loki-gateway.loki.svc.cluster.local:80/" - access = "proxy" - jsonData = {} - }, - { - name = "CockroachDB" - type = "postgres" - uid = "crdb" - url = local.crdb_host - user = module.crdb_user_grafana_secrets.values["crdb/user/grafana/username"] - secureJsonData = { - password = module.crdb_user_grafana_secrets.values["crdb/user/grafana/password"] - } - jsonData = { - sslmode = "verify-ca" - sslRootCertFile = "/local/crdb/ca.crt" - } - secret = true - } - ] - } - } - - extraConfigmapMounts = [ - # TLS Cert for postgres datasource - { - name = kubernetes_config_map.crdb_ca["grafana"].metadata.0.name - configMap = "crdb-ca" - mountPath = "/local/crdb/ca.crt" - subPath = "ca.crt" - readOnly = true - } - ] - - sidecar = { - dashboards = { - enabled = true - } - } - - serviceMonitor = { - enabled = true - path = "/metrics" - labels = {} - - interval = "" - scheme = "http" - tlsConfig = {} - scrapeTimeout = "15s" - - relabelings = [] - } - })] -} - -resource "kubernetes_config_map" "grafana_dashboard" { - for_each = var.prometheus_enabled ? local.grafana_dashboards : {} - - metadata { - namespace = kubernetes_namespace.grafana.0.metadata.0.name - name = "grafana-rivet-${each.key}" - labels = { - grafana_dashboard = "1" - } - } - - data = { - "${each.key}.json" = each.value.body - } -} diff --git a/infra/tf/k8s_infra/prometheus.tf b/infra/tf/k8s_infra/prometheus.tf index e8dd15b03f..f9135dddcb 100644 --- a/infra/tf/k8s_infra/prometheus.tf +++ b/infra/tf/k8s_infra/prometheus.tf @@ -63,8 +63,6 @@ locals { ] }] : [] ]) - - crdb_host = "${try(data.terraform_remote_state.cockroachdb_k8s.outputs.host, data.terraform_remote_state.cockroachdb_managed.outputs.host)}:${try(data.terraform_remote_state.cockroachdb_k8s.outputs.port, data.terraform_remote_state.cockroachdb_managed.outputs.port)}" } module "alertmanager_secrets" { @@ -320,6 +318,7 @@ resource "helm_release" "prometheus" { # Configured in grafana.tf grafana = { enabled = false + forceDeployDashboards = true } extraManifests = flatten([ diff --git a/infra/tf/k8s_infra/traefik.tf b/infra/tf/k8s_infra/traefik.tf index 3e5565dabf..d2a4af33b5 100644 --- a/infra/tf/k8s_infra/traefik.tf +++ b/infra/tf/k8s_infra/traefik.tf @@ -8,6 +8,7 @@ module "traefik_secrets" { source = "../modules/secrets" keys = [ + "rivet/api_route/token", "rivet/api_traefik_provider/token", ] } @@ -64,7 +65,9 @@ resource "helm_release" "traefik" { } : null additionalArguments = [ - "--providers.http.endpoint=http://rivet-api-internal-monolith.rivet-service.svc.cluster.local/traefik-provider/config/core?token=${module.traefik_secrets.values["rivet/api_traefik_provider/token"]}", + # "--providers.http.endpoint=http://rivet-api-internal-monolith.rivet-service.svc.cluster.local/traefik-provider/config/core?token=${module.traefik_secrets.values["rivet/api_traefik_provider/token"]}", + # LEGACY: + "--providers.http.endpoint=http://rivet-api-route.rivet-service.svc.cluster.local/traefik/config/core?token=${module.traefik_secrets.values["rivet/api_route/token"]}", "--providers.http.pollInterval=2.5s", # See docs/infrastructure/TIMEOUTS.md "--entryPoints.web.transport.lifeCycle.graceTimeOut=60s", diff --git a/infra/tf/k8s_infra/traefik_tunnel.tf b/infra/tf/k8s_infra/traefik_tunnel.tf index 7793a4b248..01f95c442a 100644 --- a/infra/tf/k8s_infra/traefik_tunnel.tf +++ b/infra/tf/k8s_infra/traefik_tunnel.tf @@ -7,6 +7,12 @@ locals { service_namespace = kubernetes_namespace.rivet_service.metadata[0].name service_port = 80 }, + # LEGACY: Route to api-route + "api-route" = { + service = "rivet-api-route" + service_namespace = kubernetes_namespace.rivet_service.metadata[0].name + service_port = 80 + }, # LEGACY: Addresses a random Nomad server. "nomad" = { service = "nomad-server" diff --git a/lib/bolt/core/src/dep/cargo/cli.rs b/lib/bolt/core/src/dep/cargo/cli.rs index cf0741d5b5..88b01759bd 100644 --- a/lib/bolt/core/src/dep/cargo/cli.rs +++ b/lib/bolt/core/src/dep/cargo/cli.rs @@ -146,9 +146,9 @@ pub async fn build<'a, T: AsRef>(ctx: &ProjectContext, opts: BuildOpts<'a, r#" # syntax=docker/dockerfile:1.2 - FROM rust:1.72-slim + FROM rust:1.77.2-slim - RUN apt-get update && apt-get install -y protobuf-compiler pkg-config libssl-dev g++ + RUN apt-get update && apt-get install -y protobuf-compiler pkg-config libssl-dev g++ git RUN apt-get install --yes libpq-dev wget RUN wget https://github.com/mozilla/sccache/releases/download/v0.2.15/sccache-v0.2.15-x86_64-unknown-linux-musl.tar.gz \ diff --git a/lib/bolt/core/src/dep/terraform/remote_states.rs b/lib/bolt/core/src/dep/terraform/remote_states.rs index cad0450c9c..6676ab23ac 100644 --- a/lib/bolt/core/src/dep/terraform/remote_states.rs +++ b/lib/bolt/core/src/dep/terraform/remote_states.rs @@ -21,7 +21,6 @@ pub fn dependency_graph(_ctx: &ProjectContext) -> HashMap<&'static str, Vec vec![ RemoteStateBuilder::default().plan_id("cockroachdb_k8s").build().unwrap(), - RemoteStateBuilder::default().plan_id("cockroachdb_managed").build().unwrap(), ], "cockroachdb_managed" => vec![ RemoteStateBuilder::default().plan_id("k8s_cluster_aws").build().unwrap(), @@ -35,6 +34,10 @@ pub fn dependency_graph(_ctx: &ProjectContext) -> HashMap<&'static str, Vec vec![ RemoteStateBuilder::default().plan_id("dns").build().unwrap(), ], + "grafana" => vec![ + RemoteStateBuilder::default().plan_id("cockroachdb_k8s").build().unwrap(), + RemoteStateBuilder::default().plan_id("cockroachdb_managed").build().unwrap(), + ], } } diff --git a/lib/bolt/core/src/tasks/infra/mod.rs b/lib/bolt/core/src/tasks/infra/mod.rs index c78c5166a2..d5ee08ba88 100644 --- a/lib/bolt/core/src/tasks/infra/mod.rs +++ b/lib/bolt/core/src/tasks/infra/mod.rs @@ -126,6 +126,15 @@ pub fn build_plan( } } + // Kubernetes + plan.push(PlanStep { + name_id: "k8s-infra", + kind: PlanStepKind::Terraform { + plan_id: "k8s_infra".into(), + needs_destroy: false, + }, + }); + // CockroachDB match ctx.ns().cockroachdb.provider { ns::CockroachDBProvider::Kubernetes {} => { @@ -148,15 +157,6 @@ pub fn build_plan( } } - // Kubernetes - plan.push(PlanStep { - name_id: "k8s-infra", - kind: PlanStepKind::Terraform { - plan_id: "k8s_infra".into(), - needs_destroy: false, - }, - }); - if ctx.tls_enabled() { // TLS plan.push(PlanStep { @@ -225,6 +225,13 @@ pub fn build_plan( // Vector if ctx.ns().prometheus.is_some() { + plan.push(PlanStep { + name_id: "grafana", + kind: PlanStepKind::Terraform { + plan_id: "grafana".into(), + needs_destroy: false, + }, + }); plan.push(PlanStep { name_id: "vector", kind: PlanStepKind::Terraform { diff --git a/lib/job-runner/Dockerfile b/lib/job-runner/Dockerfile index 65df97b356..e29d332fac 100644 --- a/lib/job-runner/Dockerfile +++ b/lib/job-runner/Dockerfile @@ -1,4 +1,4 @@ -FROM clux/muslrust:1.73.0 +FROM clux/muslrust:1.77.2 WORKDIR /app COPY Cargo.toml Cargo.lock . diff --git a/svc/api/status/src/route/matchmaker.rs b/svc/api/status/src/route/matchmaker.rs index 584f6fe462..66c08779c0 100644 --- a/svc/api/status/src/route/matchmaker.rs +++ b/svc/api/status/src/route/matchmaker.rs @@ -147,10 +147,51 @@ pub async fn status( } }; - let port_default = unwrap!(res.lobby.ports.get("default")); + // Test connection, defer error + let lobby_id = res.lobby.lobby_id.clone(); + let test_res = tokio::time::timeout( + Duration::from_secs(15), + test_lobby_connection(res.lobby, res.player), + ) + .await; + + // Shut down lobby regardless of connection status + // + // This way if the connection fails to connect, we still clean up the lobby instead of spamming + // lobbies with unconnected players + msg!([ctx] mm::msg::lobby_stop(lobby_id) { + lobby_id: Some(lobby_id.into()), + }) + .await?; + + // Unwrap res + match test_res { + Ok(Ok(())) => {} + Ok(Err(err)) => { + return Err(err); + } + Err(_) => { + bail_with!( + INTERNAL_STATUS_CHECK_FAILED, + error = "test lobby connection timed out" + ) + } + } + // if let Err(err) = test_res { + // return Err(err); + // } + + Ok(serde_json::json!({})) +} + +async fn test_lobby_connection( + lobby: Box, + player: Box, +) -> GlobalResult<()> { + let port_default = unwrap!(lobby.ports.get("default")); let host = unwrap_ref!(port_default.host); let hostname = &port_default.hostname; - let token = &res.player.token; + let token = &player.token; // Look up IP for GG nodes let gg_ips = lookup_dns(hostname).await?; @@ -200,7 +241,7 @@ pub async fn status( ) })?; - Ok(serde_json::json!({})) + Ok(()) } /// Returns the IP addresses for a given hostname. diff --git a/svc/pkg/cluster/worker/src/workers/server_dns_create.rs b/svc/pkg/cluster/worker/src/workers/server_dns_create.rs index 1c62f40506..b7ce990cbb 100644 --- a/svc/pkg/cluster/worker/src/workers/server_dns_create.rs +++ b/svc/pkg/cluster/worker/src/workers/server_dns_create.rs @@ -140,7 +140,7 @@ async fn inner( [ctx, @tx tx] " UPDATE db_cluster.servers_cloudflare - SET dns_record_id = $2 + SET secondary_dns_record_id = $2 WHERE server_id = $1 AND destroy_ts IS NULL