Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(infra): upgrade karpenter to 0.32 & disable compaction #834

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 49 additions & 22 deletions infra/tf/k8s_cluster_aws/eks.tf
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

module "eks" {
source = "terraform-aws-modules/eks/aws"
version = "19.16.0"
version = "20.12.0"

cluster_name = local.name
cluster_version = local.cluster_version
Expand Down Expand Up @@ -83,30 +83,11 @@ module "eks" {
create_cluster_security_group = false
create_node_security_group = false

manage_aws_auth_configmap = true
aws_auth_roles = [
# Allow users to assume the admin role
{
rolearn = aws_iam_role.eks_admin.arn
username = local.eks_admin_username
groups = [
"system:masters"
]
},
# We need to add in the Karpenter node IAM role for nodes launched by Karpenter
{
rolearn = module.karpenter.role_arn
username = "system:node:{{EC2PrivateDNSName}}"
groups = [
"system:bootstrappers",
"system:nodes",
]
},
]

# Enable root account to manage KMS
kms_key_enable_default_policy = true

authentication_mode = "API_AND_CONFIG_MAP"

fargate_profiles = {
karpenter = {
selectors = [
Expand All @@ -128,3 +109,49 @@ module "eks" {
})
}

# TODO:
# terraform state rm 'module.eks.kubernetes_config_map_v1_data.aws_auth[0]'
# terraform state rm 'module.eks.kubernetes_config_map.aws_auth[0]'
# removed {
# from = module.eks.kubernetes_config_map_v1_data.aws_auth[0]
# lifecycle {
# destroy = false
# }
# }
#
# removed {
# from = module.eks.kubernetes_config_map.aws_auth[0]
# lifecycle {
# destroy = false
# }
# }

module "aws_auth" {
depends_on = [module.eks]

source = "terraform-aws-modules/eks/aws//modules/aws-auth"
version = "~> 20.0"

manage_aws_auth_configmap = true

aws_auth_roles = [
# Allow users to assume the admin role
{
rolearn = aws_iam_role.eks_admin.arn
username = local.eks_admin_username
groups = [
"system:masters"
]
},
# We need to add in the Karpenter node IAM role for nodes launched by Karpenter
{
rolearn = module.karpenter.iam_role_arn
username = "system:node:{{EC2PrivateDNSName}}"
groups = [
"system:bootstrappers",
"system:nodes",
]
},
]
}

140 changes: 89 additions & 51 deletions infra/tf/k8s_cluster_aws/karpenter.tf
Original file line number Diff line number Diff line change
@@ -1,15 +1,24 @@
# TODO: Wait until fargate is up
module "karpenter" {
source = "terraform-aws-modules/eks/aws//modules/karpenter"
version = "19.16.0"
version = "20.12.0"

cluster_name = module.eks.cluster_name
irsa_oidc_provider_arn = module.eks.oidc_provider_arn

policies = {
node_iam_role_additional_policies = {
AmazonSSMManagedInstanceCore = "arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore"
}

# IRSA backwards compatability
enable_irsa = true
create_instance_profile = true
create_iam_role = true
iam_role_name = "KarpenterIRSA-${module.eks.cluster_name}"
iam_role_description = "Karpenter IAM role for service account"
iam_policy_name = "KarpenterIRSA-${module.eks.cluster_name}"
iam_policy_description = "Karpenter IAM role for service account"

tags = local.tags
}

Expand All @@ -20,7 +29,7 @@ resource "helm_release" "karpenter" {
name = "karpenter"
repository = "oci://public.ecr.aws/karpenter"
chart = "karpenter"
version = "v0.31.0"
version = "v0.32.10"

values = [yamlencode({
controller = {
Expand All @@ -37,85 +46,114 @@ resource "helm_release" "karpenter" {

serviceAccount = {
annotations = {
"eks.amazonaws.com/role-arn" = module.karpenter.irsa_arn
"eks.amazonaws.com/role-arn" = module.karpenter.iam_role_arn
}
}

settings = {
aws = {
clusterName = module.eks.cluster_name
clusterEndpoint = module.eks.cluster_endpoint
defaultInstanceProfile = module.karpenter.instance_profile_name
interruptionQueueName = module.karpenter.queue_name
}
clusterName = module.eks.cluster_name
clusterEndpoint = module.eks.cluster_endpoint
interruptionQueue = module.karpenter.queue_name
}
})]
}

resource "kubectl_manifest" "karpenter_provisioner" {
resource "kubectl_manifest" "karpenter_node_class" {
depends_on = [helm_release.karpenter]

yaml_body = yamlencode({
apiVersion = "karpenter.sh/v1alpha5"
kind = "Provisioner"
apiVersion = "karpenter.k8s.aws/v1beta1"
kind = "EC2NodeClass"
metadata = {
name = "default"
}
spec = {
requirements = [
# See how Karpenter selects instance types:
# https://karpenter.sh/v0.31/faq/#how-does-karpenter-dynamically-select-instance-types

amiFamily = "AL2"
role = module.karpenter.node_iam_role_name
subnetSelectorTerms = [
{
key = "kubernetes.io/os"
operator = "In"
values = ["linux"]
},
{
key = "topology.kubernetes.io/zone"
operator = "In"
values = local.azs
},
{
key = "karpenter.sh/capacity-type"
operator = "In"
values = ["on-demand"]
},
tags = {
"karpenter.sh/discovery" = module.eks.cluster_name
}
}
]
limits = {
resources = {
cpu = 1000
memory = "1000Gi"
securityGroupSelectorTerms = [
{
tags = {
"karpenter.sh/discovery" = module.eks.cluster_name
}
}
}
providerRef = {
name = "default"
}
consolidation = {
enabled = true
]
tags = {
"karpenter.sh/discovery" = module.eks.cluster_name
}
}
})
}

resource "kubectl_manifest" "karpenter_node_template" {
depends_on = [helm_release.karpenter]
resource "kubectl_manifest" "karpenter_node_pool" {
depends_on = [helm_release.karpenter, kubectl_manifest.karpenter_node_class]

yaml_body = yamlencode({
apiVersion = "karpenter.k8s.aws/v1alpha1"
kind = "AWSNodeTemplate"
apiVersion = "karpenter.sh/v1beta1"
kind = "NodePool"
metadata = {
name = "default"
}
spec = {
subnetSelector = {
"karpenter.sh/discovery" = module.eks.cluster_name
template = {
spec = {
nodeClassRef = {
name = "default"
}
requirements = [
# See recommended requirements:
# https://karpenter.sh/v0.37/concepts/nodepools/#capacity-type

{
key = "topology.kubernetes.io/zone"
operator = "In"
values = local.azs
},
{
key = "kubernetes.io/arch"
operator = "In"
values = ["amd64"]
},
{
key = "kubernetes.io/os"
operator = "In"
values = ["linux"]
},
{
key = "karpenter.sh/capacity-type"
operator = "In"
values = ["on-demand"]
},
{
key = "karpenter.k8s.aws/instance-category"
operator = "In"
values = ["c", "m", "r"]
},
{
key = "karpenter.k8s.aws/instance-generation"
operator = "Gt"
values = ["2"]
}
]
}
}
securityGroupSelector = {
"karpenter.sh/discovery" = module.eks.cluster_name
limits = {
cpu = 1000
memory = "1000Gi"
}
tags = {
"karpenter.sh/discovery" = module.eks.cluster_name
disruption = {
# Never kill pods that are currently running
consolidationPolicy = "WhenEmpty"
consolidateAfter = "30s"
# Don't kill nodes arbitrarily
expireAfter = "Never"
# TODO: If switching to WhenUnderutilized, add `budgets` here
}
}
})
Expand Down
2 changes: 1 addition & 1 deletion infra/tf/k8s_cluster_aws/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ terraform {
required_providers {
aws = {
source = "hashicorp/aws"
version = "5.16.0"
version = "5.52.0"
}
# TODO Revert to gavinbunney/kubectl once https://github.com/gavinbunney/terraform-provider-kubectl/issues/270 is resolved
kubectl = {
Expand Down
Loading