From 09762453914a679a4a60fbbd18513762f31193f2 Mon Sep 17 00:00:00 2001 From: NathanFlurry Date: Sat, 1 Jun 2024 08:33:50 +0000 Subject: [PATCH] fix(infra): upgrade karpenter to 0.32 & disable compaction (#834) ## Changes --- infra/tf/k8s_cluster_aws/eks.tf | 71 +++++++++---- infra/tf/k8s_cluster_aws/karpenter.tf | 140 ++++++++++++++++---------- infra/tf/k8s_cluster_aws/main.tf | 2 +- 3 files changed, 139 insertions(+), 74 deletions(-) diff --git a/infra/tf/k8s_cluster_aws/eks.tf b/infra/tf/k8s_cluster_aws/eks.tf index 20254b4393..a7262b9a7d 100644 --- a/infra/tf/k8s_cluster_aws/eks.tf +++ b/infra/tf/k8s_cluster_aws/eks.tf @@ -2,7 +2,7 @@ module "eks" { source = "terraform-aws-modules/eks/aws" - version = "19.16.0" + version = "20.12.0" cluster_name = local.name cluster_version = local.cluster_version @@ -83,30 +83,11 @@ module "eks" { create_cluster_security_group = false create_node_security_group = false - manage_aws_auth_configmap = true - aws_auth_roles = [ - # Allow users to assume the admin role - { - rolearn = aws_iam_role.eks_admin.arn - username = local.eks_admin_username - groups = [ - "system:masters" - ] - }, - # We need to add in the Karpenter node IAM role for nodes launched by Karpenter - { - rolearn = module.karpenter.role_arn - username = "system:node:{{EC2PrivateDNSName}}" - groups = [ - "system:bootstrappers", - "system:nodes", - ] - }, - ] - # Enable root account to manage KMS kms_key_enable_default_policy = true + authentication_mode = "API_AND_CONFIG_MAP" + fargate_profiles = { karpenter = { selectors = [ @@ -128,3 +109,49 @@ module "eks" { }) } +# TODO: +# terraform state rm 'module.eks.kubernetes_config_map_v1_data.aws_auth[0]' +# terraform state rm 'module.eks.kubernetes_config_map.aws_auth[0]' +# removed { +# from = module.eks.kubernetes_config_map_v1_data.aws_auth[0] +# lifecycle { +# destroy = false +# } +# } +# +# removed { +# from = module.eks.kubernetes_config_map.aws_auth[0] +# lifecycle { +# destroy = false +# } +# } + +module "aws_auth" { + depends_on = [module.eks] + + source = "terraform-aws-modules/eks/aws//modules/aws-auth" + version = "~> 20.0" + + manage_aws_auth_configmap = true + + aws_auth_roles = [ + # Allow users to assume the admin role + { + rolearn = aws_iam_role.eks_admin.arn + username = local.eks_admin_username + groups = [ + "system:masters" + ] + }, + # We need to add in the Karpenter node IAM role for nodes launched by Karpenter + { + rolearn = module.karpenter.iam_role_arn + username = "system:node:{{EC2PrivateDNSName}}" + groups = [ + "system:bootstrappers", + "system:nodes", + ] + }, + ] +} + diff --git a/infra/tf/k8s_cluster_aws/karpenter.tf b/infra/tf/k8s_cluster_aws/karpenter.tf index 6a6dd1367c..b70cc31131 100644 --- a/infra/tf/k8s_cluster_aws/karpenter.tf +++ b/infra/tf/k8s_cluster_aws/karpenter.tf @@ -1,15 +1,24 @@ # TODO: Wait until fargate is up module "karpenter" { source = "terraform-aws-modules/eks/aws//modules/karpenter" - version = "19.16.0" + version = "20.12.0" cluster_name = module.eks.cluster_name irsa_oidc_provider_arn = module.eks.oidc_provider_arn - policies = { + node_iam_role_additional_policies = { AmazonSSMManagedInstanceCore = "arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore" } + # IRSA backwards compatability + enable_irsa = true + create_instance_profile = true + create_iam_role = true + iam_role_name = "KarpenterIRSA-${module.eks.cluster_name}" + iam_role_description = "Karpenter IAM role for service account" + iam_policy_name = "KarpenterIRSA-${module.eks.cluster_name}" + iam_policy_description = "Karpenter IAM role for service account" + tags = local.tags } @@ -20,7 +29,7 @@ resource "helm_release" "karpenter" { name = "karpenter" repository = "oci://public.ecr.aws/karpenter" chart = "karpenter" - version = "v0.31.0" + version = "v0.32.10" values = [yamlencode({ controller = { @@ -37,85 +46,114 @@ resource "helm_release" "karpenter" { serviceAccount = { annotations = { - "eks.amazonaws.com/role-arn" = module.karpenter.irsa_arn + "eks.amazonaws.com/role-arn" = module.karpenter.iam_role_arn } } settings = { - aws = { - clusterName = module.eks.cluster_name - clusterEndpoint = module.eks.cluster_endpoint - defaultInstanceProfile = module.karpenter.instance_profile_name - interruptionQueueName = module.karpenter.queue_name - } + clusterName = module.eks.cluster_name + clusterEndpoint = module.eks.cluster_endpoint + interruptionQueue = module.karpenter.queue_name } })] } -resource "kubectl_manifest" "karpenter_provisioner" { +resource "kubectl_manifest" "karpenter_node_class" { depends_on = [helm_release.karpenter] yaml_body = yamlencode({ - apiVersion = "karpenter.sh/v1alpha5" - kind = "Provisioner" + apiVersion = "karpenter.k8s.aws/v1beta1" + kind = "EC2NodeClass" metadata = { name = "default" } spec = { - requirements = [ - # See how Karpenter selects instance types: - # https://karpenter.sh/v0.31/faq/#how-does-karpenter-dynamically-select-instance-types - + amiFamily = "AL2" + role = module.karpenter.node_iam_role_name + subnetSelectorTerms = [ { - key = "kubernetes.io/os" - operator = "In" - values = ["linux"] - }, - { - key = "topology.kubernetes.io/zone" - operator = "In" - values = local.azs - }, - { - key = "karpenter.sh/capacity-type" - operator = "In" - values = ["on-demand"] - }, + tags = { + "karpenter.sh/discovery" = module.eks.cluster_name + } + } ] - limits = { - resources = { - cpu = 1000 - memory = "1000Gi" + securityGroupSelectorTerms = [ + { + tags = { + "karpenter.sh/discovery" = module.eks.cluster_name + } } - } - providerRef = { - name = "default" - } - consolidation = { - enabled = true + ] + tags = { + "karpenter.sh/discovery" = module.eks.cluster_name } } }) } -resource "kubectl_manifest" "karpenter_node_template" { - depends_on = [helm_release.karpenter] +resource "kubectl_manifest" "karpenter_node_pool" { + depends_on = [helm_release.karpenter, kubectl_manifest.karpenter_node_class] yaml_body = yamlencode({ - apiVersion = "karpenter.k8s.aws/v1alpha1" - kind = "AWSNodeTemplate" + apiVersion = "karpenter.sh/v1beta1" + kind = "NodePool" metadata = { name = "default" } spec = { - subnetSelector = { - "karpenter.sh/discovery" = module.eks.cluster_name + template = { + spec = { + nodeClassRef = { + name = "default" + } + requirements = [ + # See recommended requirements: + # https://karpenter.sh/v0.37/concepts/nodepools/#capacity-type + + { + key = "topology.kubernetes.io/zone" + operator = "In" + values = local.azs + }, + { + key = "kubernetes.io/arch" + operator = "In" + values = ["amd64"] + }, + { + key = "kubernetes.io/os" + operator = "In" + values = ["linux"] + }, + { + key = "karpenter.sh/capacity-type" + operator = "In" + values = ["on-demand"] + }, + { + key = "karpenter.k8s.aws/instance-category" + operator = "In" + values = ["c", "m", "r"] + }, + { + key = "karpenter.k8s.aws/instance-generation" + operator = "Gt" + values = ["2"] + } + ] + } } - securityGroupSelector = { - "karpenter.sh/discovery" = module.eks.cluster_name + limits = { + cpu = 1000 + memory = "1000Gi" } - tags = { - "karpenter.sh/discovery" = module.eks.cluster_name + disruption = { + # Never kill pods that are currently running + consolidationPolicy = "WhenEmpty" + consolidateAfter = "30s" + # Don't kill nodes arbitrarily + expireAfter = "Never" + # TODO: If switching to WhenUnderutilized, add `budgets` here } } }) diff --git a/infra/tf/k8s_cluster_aws/main.tf b/infra/tf/k8s_cluster_aws/main.tf index d988480f6c..3795162dac 100644 --- a/infra/tf/k8s_cluster_aws/main.tf +++ b/infra/tf/k8s_cluster_aws/main.tf @@ -2,7 +2,7 @@ terraform { required_providers { aws = { source = "hashicorp/aws" - version = "5.16.0" + version = "5.52.0" } # TODO Revert to gavinbunney/kubectl once https://github.com/gavinbunney/terraform-provider-kubectl/issues/270 is resolved kubectl = {