Skip to content

Commit

Permalink
chore(infra): disable vpa for prometheus & traffic server (#817)
Browse files Browse the repository at this point in the history
Currently VPAs cause these issues:

- Autoscaling doesn't play nice with Karpenter
- Prometheus takes a long time to replay the WAL, which causes frequent Prometheus outages
- Prometheus is better managed manually, since the load varies dramatically
  • Loading branch information
NathanFlurry committed May 31, 2024
1 parent c52517b commit 2c1656c
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 79 deletions.
39 changes: 0 additions & 39 deletions infra/tf/k8s_infra/prometheus.tf
Original file line number Diff line number Diff line change
Expand Up @@ -301,44 +301,5 @@ resource "helm_release" "prometheus" {
enabled = false
forceDeployDashboards = true
}

extraManifests = flatten([
# Autoscale if resource limits enabled
#
# See PR for official support: https://github.com/prometheus-community/helm-charts/pull/3097
var.limit_resources ? [{
apiVersion = "autoscaling.k8s.io/v1"
kind = "VerticalPodAutoscaler"
metadata = {
name = "prometheus-prometheus-kube-prometheus-vpa"
# namespace = kubernetes_namespace.prometheus.metadata.0.name
}
spec = {
targetRef = {
apiVersion = "apps/v1"
kind = "StatefulSet"
name = "prometheus-prometheus-kube-prometheus-prometheus"
}
updatePolicy = {
updateMode = "Auto"
}
resourcePolicy = {
containerPolicies = [
{
containerName = "prometheus"
minAllowed = {
cpu = "100m"
memory = "500Mi"
}
maxAllowed = {
cpu = "4"
memory = "8Gi"
}
}
]
}
}
}] : []
])
})]
}
80 changes: 40 additions & 40 deletions infra/tf/k8s_infra/traffic_server.tf
Original file line number Diff line number Diff line change
Expand Up @@ -309,43 +309,43 @@ resource "kubectl_manifest" "traffic_server_transport" {
})
}

resource "kubectl_manifest" "traffic_server_vpa" {
count = var.limit_resources ? 1 : 0
depends_on = [helm_release.traefik, helm_release.vpa]

yaml_body = yamlencode({
apiVersion = "autoscaling.k8s.io/v1"
kind = "VerticalPodAutoscaler"

metadata = {
name = "traffic-server-vpa"
namespace = kubernetes_namespace.traffic_server.metadata.0.name
}

spec = {
targetRef = {
apiVersion = "apps/v1"
kind = "StatefulSet"
name = kubernetes_stateful_set.traffic_server.metadata.0.name
}
updatePolicy = {
updateMode = "Auto"
}
resourcePolicy = {
containerPolicies = [
{
containerName = kubernetes_stateful_set.traffic_server.spec.0.template.0.spec.0.container.0.name
minAllowed = {
cpu = "500m"
memory = "500Mi"
}
maxAllowed = {
cpu = "8"
memory = "16Gi"
}
}
]
}
}
})
}
# resource "kubectl_manifest" "traffic_server_vpa" {
# count = var.limit_resources ? 1 : 0
# depends_on = [helm_release.traefik, helm_release.vpa]
#
# yaml_body = yamlencode({
# apiVersion = "autoscaling.k8s.io/v1"
# kind = "VerticalPodAutoscaler"
#
# metadata = {
# name = "traffic-server-vpa"
# namespace = kubernetes_namespace.traffic_server.metadata.0.name
# }
#
# spec = {
# targetRef = {
# apiVersion = "apps/v1"
# kind = "StatefulSet"
# name = kubernetes_stateful_set.traffic_server.metadata.0.name
# }
# updatePolicy = {
# updateMode = "Auto"
# }
# resourcePolicy = {
# containerPolicies = [
# {
# containerName = kubernetes_stateful_set.traffic_server.spec.0.template.0.spec.0.container.0.name
# minAllowed = {
# cpu = "500m"
# memory = "500Mi"
# }
# maxAllowed = {
# cpu = "8"
# memory = "16Gi"
# }
# }
# ]
# }
# }
# })
# }

0 comments on commit 2c1656c

Please sign in to comment.