diff --git a/infra/aws/terraform/prow-build-cluster/resources/karpenter/ec2nodeclass.yaml b/infra/aws/terraform/prow-build-cluster/resources/karpenter/ec2nodeclass.yaml new file mode 100644 index 00000000000..df8de9b4f59 --- /dev/null +++ b/infra/aws/terraform/prow-build-cluster/resources/karpenter/ec2nodeclass.yaml @@ -0,0 +1,21 @@ +--- +apiVersion: karpenter.k8s.aws/v1beta1 +kind: EC2NodeClass +metadata: + name: default +spec: + amiFamily: Bottlerocket + ## this needs an update! + role: "Karpenter-prow-build-cluster-20240527081538530000000004" + subnetSelectorTerms: + - tags: + karpenter.sh/discovery: "prow-build-cluster" + securityGroupSelectorTerms: + - tags: + Name: "prow-build-cluster-node" + amiSelectorTerms: + - id: ami-066c62bb333a37820 + tags: + Cluster: prow-build-cluster + Name: build-karpenter + detailedMonitoring: true diff --git a/infra/aws/terraform/prow-build-cluster/resources/karpenter/nodepool.yaml b/infra/aws/terraform/prow-build-cluster/resources/karpenter/nodepool.yaml new file mode 100644 index 00000000000..6fe59eec14b --- /dev/null +++ b/infra/aws/terraform/prow-build-cluster/resources/karpenter/nodepool.yaml @@ -0,0 +1,53 @@ +--- +apiVersion: karpenter.sh/v1beta1 +kind: NodePool +metadata: + name: default +spec: + template: + spec: + requirements: + - key: topology.kubernetes.io/zone + operator: In + values: ["us-east-2a", "us-east-2b", "us-east-2c"] + # https://karpenter.sh/docs/reference/instance-types/#r5adxlarge + - key: kubernetes.io/arch + operator: In + values: ["amd64"] + - key: kubernetes.io/os + operator: In + values: ["linux"] + - key: karpenter.sh/capacity-type + operator: NotIn + values: ["spot"] + - key: "karpenter.k8s.aws/instance-family" + operator: In + values: ["r5ad"] + - key: karpenter.k8s.aws/instance-category + operator: In + values: ["r"] + - key: "karpenter.k8s.aws/instance-size" + operator: In + values: ["2xlarge"] + - key: karpenter.k8s.aws/instance-generation + operator: Gt + values: ["2"] + nodeClassRef: + apiVersion: karpenter.k8s.aws/v1beta1 + kind: EC2NodeClass + name: default + limits: + cpu: 7500 + disruption: + # Describes which types of Nodes Karpenter should consider for consolidation + # If using 'WhenUnderutilized', Karpenter will consider all nodes for consolidation and attempt to remove or replace Nodes when it discovers that the Node is underutilized and could be changed to reduce cost + # If using `WhenEmpty`, Karpenter will only consider nodes for consolidation that contain no workload pods + consolidationPolicy: WhenEmpty + # The amount of time Karpenter should wait after discovering a consolidation decision + # This value can currently only be set when the consolidationPolicy is 'WhenEmpty' + # You can choose to disable consolidation entirely by setting the string value 'Never' here + consolidateAfter: 30s + # The amount of time a Node can live on the cluster before being removed + # Avoiding long-running Nodes helps to reduce security vulnerabilities as well as to reduce the chance of issues that can plague Nodes with long uptimes such as file fragmentation or memory leaks from system processes + # You can choose to disable expiration entirely by setting the string value 'Never' here + expireAfter: 720h