From 5da29a4d718d0254c00814b80a130118387df61d Mon Sep 17 00:00:00 2001 From: NathanFlurry Date: Fri, 31 May 2024 21:58:45 +0000 Subject: [PATCH] chore(infra): disable vpa for prometheus & traffic server (#817) Currently VPAs cause these issues: - Autoscaling doesn't play nice with Karpenter - Prometheus takes a long time to replay the WAL, which causes frequent Prometheus outages - Prometheus is better managed manually, since the load varies dramatically --- infra/tf/k8s_infra/prometheus.tf | 39 -------------- infra/tf/k8s_infra/traffic_server.tf | 80 ++++++++++++++-------------- 2 files changed, 40 insertions(+), 79 deletions(-) diff --git a/infra/tf/k8s_infra/prometheus.tf b/infra/tf/k8s_infra/prometheus.tf index 2a86398dca..1d85d5019e 100644 --- a/infra/tf/k8s_infra/prometheus.tf +++ b/infra/tf/k8s_infra/prometheus.tf @@ -301,44 +301,5 @@ resource "helm_release" "prometheus" { enabled = false forceDeployDashboards = true } - - extraManifests = flatten([ - # Autoscale if resource limits enabled - # - # See PR for official support: https://github.com/prometheus-community/helm-charts/pull/3097 - var.limit_resources ? [{ - apiVersion = "autoscaling.k8s.io/v1" - kind = "VerticalPodAutoscaler" - metadata = { - name = "prometheus-prometheus-kube-prometheus-vpa" - # namespace = kubernetes_namespace.prometheus.metadata.0.name - } - spec = { - targetRef = { - apiVersion = "apps/v1" - kind = "StatefulSet" - name = "prometheus-prometheus-kube-prometheus-prometheus" - } - updatePolicy = { - updateMode = "Auto" - } - resourcePolicy = { - containerPolicies = [ - { - containerName = "prometheus" - minAllowed = { - cpu = "100m" - memory = "500Mi" - } - maxAllowed = { - cpu = "4" - memory = "8Gi" - } - } - ] - } - } - }] : [] - ]) })] } diff --git a/infra/tf/k8s_infra/traffic_server.tf b/infra/tf/k8s_infra/traffic_server.tf index c37ae945df..e4144cb53b 100644 --- a/infra/tf/k8s_infra/traffic_server.tf +++ b/infra/tf/k8s_infra/traffic_server.tf @@ -309,43 +309,43 @@ resource "kubectl_manifest" "traffic_server_transport" { }) } -resource "kubectl_manifest" "traffic_server_vpa" { - count = var.limit_resources ? 1 : 0 - depends_on = [helm_release.traefik, helm_release.vpa] - - yaml_body = yamlencode({ - apiVersion = "autoscaling.k8s.io/v1" - kind = "VerticalPodAutoscaler" - - metadata = { - name = "traffic-server-vpa" - namespace = kubernetes_namespace.traffic_server.metadata.0.name - } - - spec = { - targetRef = { - apiVersion = "apps/v1" - kind = "StatefulSet" - name = kubernetes_stateful_set.traffic_server.metadata.0.name - } - updatePolicy = { - updateMode = "Auto" - } - resourcePolicy = { - containerPolicies = [ - { - containerName = kubernetes_stateful_set.traffic_server.spec.0.template.0.spec.0.container.0.name - minAllowed = { - cpu = "500m" - memory = "500Mi" - } - maxAllowed = { - cpu = "8" - memory = "16Gi" - } - } - ] - } - } - }) -} +# resource "kubectl_manifest" "traffic_server_vpa" { +# count = var.limit_resources ? 1 : 0 +# depends_on = [helm_release.traefik, helm_release.vpa] +# +# yaml_body = yamlencode({ +# apiVersion = "autoscaling.k8s.io/v1" +# kind = "VerticalPodAutoscaler" +# +# metadata = { +# name = "traffic-server-vpa" +# namespace = kubernetes_namespace.traffic_server.metadata.0.name +# } +# +# spec = { +# targetRef = { +# apiVersion = "apps/v1" +# kind = "StatefulSet" +# name = kubernetes_stateful_set.traffic_server.metadata.0.name +# } +# updatePolicy = { +# updateMode = "Auto" +# } +# resourcePolicy = { +# containerPolicies = [ +# { +# containerName = kubernetes_stateful_set.traffic_server.spec.0.template.0.spec.0.container.0.name +# minAllowed = { +# cpu = "500m" +# memory = "500Mi" +# } +# maxAllowed = { +# cpu = "8" +# memory = "16Gi" +# } +# } +# ] +# } +# } +# }) +# }