diff --git a/.github/workflows/terraform.yml b/.github/workflows/terraform.yml index 638e0fb..d6df430 100644 --- a/.github/workflows/terraform.yml +++ b/.github/workflows/terraform.yml @@ -23,4 +23,4 @@ jobs: # Checks that all Terraform configuration files adhere to a canonical format - name: Terraform Format - run: terraform fmt --recursive -check + run: terraform fmt -recursive -check diff --git a/.gitignore b/.gitignore index 681f2bc..0002a35 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ # Local .terraform directories -**/.terraform/* +**/.terraform +**.lock.hcl # .tfstate files *.tfstate @@ -8,6 +9,9 @@ # Crash log files crash.log +# Ignore test examples directory +examples/test + # Ignore any .tfvars files that are generated automatically for each Terraform run. Most # .tfvars files are managed as part of configuration and so should be included in # version control. @@ -35,3 +39,5 @@ override.tf.json *.pem rke2.yaml admin.conf + +**.DS_Store \ No newline at end of file diff --git a/README.md b/README.md index e968cd4..e5d91b6 100644 --- a/README.md +++ b/README.md @@ -145,7 +145,7 @@ Optional policies have the option of being created by default, but are specified |------|---------| | aws | n/a | | random | n/a | -| template | n/a | +| cloudinit | n/a | ## Inputs @@ -177,6 +177,7 @@ Optional policies have the option of being created by default, but are specified | unique\_suffix | Enables/disables generation of a unique suffix to cluster name | `bool` | `true` | yes | | vpc\_id | VPC ID to create resources in | `string` | n/a | yes | | wait_for_capacity_timeout | How long Terraform should wait for ASG instances to be healthy before timing out. | `string` | `"10m"` | no | +| metadata_options | Instance Metadata Options | `map` |
{
http_endpoint: "enabled",
http_tokens: "required",
http_put_response_hop_limit: 1,
instance_metadata_tags: "disabled"}
| no | ## Outputs diff --git a/data.tf b/data.tf index 69860b6..6e13919 100644 --- a/data.tf +++ b/data.tf @@ -11,7 +11,7 @@ module "init" { agent = false } -data "template_cloudinit_config" "this" { +data "cloudinit_config" "this" { gzip = true base64_encode = true @@ -20,7 +20,8 @@ data "template_cloudinit_config" "this" { filename = "cloud-config.yaml" content_type = "text/cloud-config" content = templatefile("${path.module}/modules/nodepool/files/cloud-config.yaml", { - ssh_authorized_keys = var.ssh_authorized_keys + ssh_authorized_keys = var.ssh_authorized_keys + extra_cloud_config_config = var.extra_cloud_config_config }) } diff --git a/examples/quickstart/main.tf b/examples/quickstart/main.tf index bff80ec..462b16a 100644 --- a/examples/quickstart/main.tf +++ b/examples/quickstart/main.tf @@ -4,12 +4,13 @@ provider "aws" { locals { cluster_name = "quickstart" - aws_region = "us-gov-west-1" + aws_region = "us-gov-east-1" tags = { "terraform" = "true", "env" = "quickstart", } + server_iam_role = "K8sUnrestrictedCloudProviderRole" } # Query for defaults @@ -53,32 +54,31 @@ data "aws_ami" "rhel8" { # Server # module "rke2" { - source = "../.." - + source = "../.." cluster_name = local.cluster_name vpc_id = data.aws_vpc.default.id subnets = [data.aws_subnet.default.id] ami = data.aws_ami.rhel8.image_id ssh_authorized_keys = [tls_private_key.ssh.public_key_openssh] + iam_instance_profile = local.server_iam_role controlplane_internal = false # Note this defaults to best practice of true, but is explicitly set to public for demo purposes + tags = local.tags - tags = local.tags } # # Generic Agent Pool # module "agents" { - source = "../../modules/agent-nodepool" - + source = "../../modules/agent-nodepool" name = "generic" vpc_id = data.aws_vpc.default.id subnets = [data.aws_subnet.default.id] ami = data.aws_ami.rhel8.image_id ssh_authorized_keys = [tls_private_key.ssh.public_key_openssh] tags = local.tags + cluster_data = module.rke2.cluster_data - cluster_data = module.rke2.cluster_data } # For demonstration only, lock down ssh access in production diff --git a/main.tf b/main.tf index 7ed72ba..e4b044f 100644 --- a/main.tf +++ b/main.tf @@ -16,6 +16,7 @@ locals { cluster_sg = aws_security_group.cluster.id token = module.statestore.token } + target_group_arns = module.cp_lb.target_group_arns } resource "random_string" "uid" { @@ -24,7 +25,7 @@ resource "random_string" "uid" { special = false lower = true upper = false - number = true + numeric = true } # @@ -46,7 +47,7 @@ module "statestore" { # Controlplane Load Balancer # module "cp_lb" { - source = "./modules/elb" + source = "./modules/nlb" name = local.uname vpc_id = var.vpc_id subnets = var.subnets @@ -185,13 +186,16 @@ module "servers" { instance_type = var.instance_type block_device_mappings = var.block_device_mappings extra_block_device_mappings = var.extra_block_device_mappings - vpc_security_group_ids = concat([aws_security_group.server.id, aws_security_group.cluster.id], var.extra_security_group_ids) + vpc_security_group_ids = concat([aws_security_group.server.id, aws_security_group.cluster.id, module.cp_lb.security_group], var.extra_security_group_ids) spot = var.spot - load_balancers = [module.cp_lb.name] + #load_balancers = [module.cp_lb.name] + target_group_arns = local.target_group_arns wait_for_capacity_timeout = var.wait_for_capacity_timeout + metadata_options = var.metadata_options + associate_public_ip_address = var.associate_public_ip_address # Overrideable variables - userdata = data.template_cloudinit_config.this.rendered + userdata = data.cloudinit_config.this.rendered iam_instance_profile = var.iam_instance_profile == "" ? module.iam[0].iam_instance_profile : var.iam_instance_profile # Don't allow something not recommended within etcd scaling, set max deliberately and only control desired diff --git a/modules/agent-nodepool/README.md b/modules/agent-nodepool/README.md index 24ca61d..f2e49f8 100644 --- a/modules/agent-nodepool/README.md +++ b/modules/agent-nodepool/README.md @@ -28,7 +28,7 @@ | tags | Map of additional tags to add to all resources created | `map(string)` | `{}` | no | | vpc\_id | VPC ID to create resources in | `string` | n/a | yes | | wait_for_capacity_timeout | How long Terraform should wait for ASG instances to be healthy before timing out. | `string` | `"10m"` | no | -## Outputs +| metadata_options | Instance Metadata Options | `map` |
{
http_endpoint: "enabled",
http_tokens: "required",
http_put_response_hop_limit: 1,
instance_metadata_tags: "disabled"}
| no | | Name | Description | |------|-------------| diff --git a/modules/agent-nodepool/data.tf b/modules/agent-nodepool/data.tf index 92ac7d1..71db84d 100644 --- a/modules/agent-nodepool/data.tf +++ b/modules/agent-nodepool/data.tf @@ -37,7 +37,8 @@ data "aws_iam_policy_document" "aws_autoscaler" { "autoscaling:DescribeTags", "autoscaling:SetDesiredCapacity", "autoscaling:TerminateInstanceInAutoScalingGroup", - "ec2:DescribeLaunchTemplateVersions" + "ec2:DescribeLaunchTemplateVersions", + "ec2:DescribeInstanceTypes" ] } } diff --git a/modules/agent-nodepool/files/cloud-config.yaml b/modules/agent-nodepool/files/cloud-config.yaml new file mode 100644 index 0000000..8f1ebb7 --- /dev/null +++ b/modules/agent-nodepool/files/cloud-config.yaml @@ -0,0 +1,13 @@ +#cloud-config +ssh_authorized_keys: +%{ for _ in ssh_authorized_keys } + - ${_} +%{ endfor } + +users: + - default + - name: rke2 + homedir: /var/lib/rancher/rke2 + system: true + +${extra_cloud_config_config} \ No newline at end of file diff --git a/modules/agent-nodepool/main.tf b/modules/agent-nodepool/main.tf index 02518be..81c1c3c 100644 --- a/modules/agent-nodepool/main.tf +++ b/modules/agent-nodepool/main.tf @@ -69,7 +69,7 @@ module "init" { agent = true } -data "template_cloudinit_config" "init" { +data "cloudinit_config" "init" { gzip = true base64_encode = true @@ -77,8 +77,9 @@ data "template_cloudinit_config" "init" { part { filename = "cloud-config.yaml" content_type = "text/cloud-config" - content = templatefile("${path.module}/../nodepool/files/cloud-config.yaml", { - ssh_authorized_keys = var.ssh_authorized_keys + content = templatefile("${path.module}/files/cloud-config.yaml", { + ssh_authorized_keys = var.ssh_authorized_keys, + extra_cloud_config_config = var.extra_cloud_config_config }) } @@ -116,11 +117,12 @@ module "nodepool" { block_device_mappings = var.block_device_mappings extra_block_device_mappings = var.extra_block_device_mappings vpc_security_group_ids = concat([var.cluster_data.cluster_sg], var.extra_security_group_ids) - userdata = data.template_cloudinit_config.init.rendered + userdata = data.cloudinit_config.init.rendered iam_instance_profile = var.iam_instance_profile == "" ? module.iam[0].iam_instance_profile : var.iam_instance_profile asg = var.asg spot = var.spot wait_for_capacity_timeout = var.wait_for_capacity_timeout + metadata_options = var.metadata_options tags = merge({ "Role" = "agent", diff --git a/modules/agent-nodepool/variables.tf b/modules/agent-nodepool/variables.tf index a8e89c6..ec71d9b 100644 --- a/modules/agent-nodepool/variables.tf +++ b/modules/agent-nodepool/variables.tf @@ -61,6 +61,12 @@ variable "block_device_mappings" { } } +variable "extra_cloud_config_config" { + description = "extra config to append to cloud-config" + type = string + default = "" +} + variable "extra_block_device_mappings" { description = "Used to specify additional block device mapping configurations" type = list(map(string)) @@ -95,6 +101,17 @@ variable "extra_security_group_ids" { default = [] } +variable "metadata_options" { + type = map(any) + default = { + http_endpoint = "enabled" + http_tokens = "required" # IMDS-v2 + http_put_response_hop_limit = 2 # allow pods to use IMDS as well + instance_metadata_tags = "disabled" + } + description = "Instance Metadata Options" +} + # # RKE2 Variables # diff --git a/modules/common/download.sh b/modules/common/download.sh index 470500c..427b6bc 100644 --- a/modules/common/download.sh +++ b/modules/common/download.sh @@ -47,7 +47,7 @@ do_download() { get_installer case $ID in - centos) + centos | rocky) yum install -y unzip install_awscli @@ -74,6 +74,7 @@ do_download() { 7*) info "Identified RHEL 7" + rpm --import http://mirror.centos.org/centos/RPM-GPG-KEY-CentOS-7 yum install -y http://mirror.centos.org/centos/7/extras/x86_64/Packages/container-selinux-2.119.2-1.911c772.el7_8.noarch.rpm INSTALL_RKE2_METHOD='yum' INSTALL_RKE2_TYPE="${type}" ./install.sh ;; diff --git a/modules/nlb/main.tf b/modules/nlb/main.tf new file mode 100644 index 0000000..21187d3 --- /dev/null +++ b/modules/nlb/main.tf @@ -0,0 +1,98 @@ +locals { + # Handle case where target group/load balancer name exceeds 32 character limit without creating illegal names + controlplane_name = "${substr(var.name, 0, 23)}-rke2-cp" + server_name = "${substr(var.name, 0, 18)}-rke2-server" + supervisor_name = "${substr(var.name, 0, 15)}-rke2-supervisor" +} + +resource "aws_security_group" "controlplane" { + name = local.controlplane_name + description = "${local.controlplane_name} sg" + vpc_id = var.vpc_id + + tags = merge({}, var.tags) +} + +resource "aws_security_group_rule" "apiserver" { + from_port = var.cp_port + to_port = var.cp_port + protocol = "tcp" + security_group_id = aws_security_group.controlplane.id + type = "ingress" + + cidr_blocks = var.cp_ingress_cidr_blocks +} + +resource "aws_security_group_rule" "supervisor" { + from_port = var.cp_supervisor_port + to_port = var.cp_supervisor_port + protocol = "tcp" + security_group_id = aws_security_group.controlplane.id + type = "ingress" + + cidr_blocks = var.cp_supervisor_ingress_cidr_blocks +} + +resource "aws_security_group_rule" "egress" { + from_port = "0" + to_port = "0" + protocol = "-1" + security_group_id = aws_security_group.controlplane.id + type = "egress" + + cidr_blocks = ["0.0.0.0/0"] +} + +resource "aws_lb_listener" "apiserver" { + load_balancer_arn = aws_lb.controlplane.arn + port = var.cp_port + protocol = "TCP" + + default_action { + type = "forward" + target_group_arn = aws_lb_target_group.apiserver.arn + } +} + +resource "aws_lb_target_group" "apiserver" { + name = "${local.controlplane_name}-${var.cp_port}" + port = var.cp_port + protocol = "TCP" + vpc_id = var.vpc_id +} + +resource "aws_lb_listener" "supervisor" { + load_balancer_arn = aws_lb.controlplane.arn + port = var.cp_supervisor_port + protocol = "TCP" + + default_action { + type = "forward" + target_group_arn = aws_lb_target_group.supervisor.arn + } +} + +resource "aws_lb_target_group" "supervisor" { + name = "${local.controlplane_name}-${var.cp_supervisor_port}" + port = var.cp_supervisor_port + protocol = "TCP" + vpc_id = var.vpc_id +} + +resource "aws_lb" "controlplane" { + name = local.controlplane_name + + internal = var.internal + load_balancer_type = "network" + subnets = var.subnets + + enable_cross_zone_load_balancing = var.enable_cross_zone_load_balancing + + access_logs { + # the bucket name isn't allowed to be empty in this block, so use its default value as the flag + bucket = var.access_logs_bucket + enabled = var.access_logs_bucket != "disabled" + } + + tags = merge({}, var.tags) +} \ No newline at end of file diff --git a/modules/nlb/outputs.tf b/modules/nlb/outputs.tf new file mode 100644 index 0000000..e0bc452 --- /dev/null +++ b/modules/nlb/outputs.tf @@ -0,0 +1,23 @@ +output "dns" { + value = aws_lb.controlplane.dns_name +} + +output "id" { + value = aws_lb.controlplane.id +} + +output "arn" { + value = aws_lb.controlplane.arn +} + +output "name" { + value = aws_lb.controlplane.name +} + +output "security_group" { + value = aws_security_group.controlplane.id +} + +output "target_group_arns" { + value = [aws_lb_target_group.apiserver.arn, aws_lb_target_group.supervisor.arn] +} \ No newline at end of file diff --git a/modules/nlb/variables.tf b/modules/nlb/variables.tf new file mode 100644 index 0000000..99fbede --- /dev/null +++ b/modules/nlb/variables.tf @@ -0,0 +1,51 @@ +variable "name" { + type = string +} + +variable "vpc_id" { + type = string +} + +variable "subnets" { + type = list(string) +} + +variable "internal" { + default = true + type = bool +} + +variable "enable_cross_zone_load_balancing" { + default = true + type = bool +} + +variable "cp_port" { + type = number + default = 6443 +} + +variable "cp_ingress_cidr_blocks" { + type = list(string) + default = ["0.0.0.0/0"] +} + +variable "cp_supervisor_port" { + type = number + default = 9345 +} + +variable "cp_supervisor_ingress_cidr_blocks" { + type = list(string) + default = ["0.0.0.0/0"] +} + +variable "tags" { + type = map(string) + default = {} +} + +variable "access_logs_bucket" { + type = string + default = "disabled" +} \ No newline at end of file diff --git a/modules/nodepool/files/cloud-config.yaml b/modules/nodepool/files/cloud-config.yaml index 984cbf6..cd81814 100644 --- a/modules/nodepool/files/cloud-config.yaml +++ b/modules/nodepool/files/cloud-config.yaml @@ -9,3 +9,5 @@ users: - name: rke2 homedir: /var/lib/rancher/rke2 system: true + +${extra_cloud_config_config} diff --git a/modules/nodepool/main.tf b/modules/nodepool/main.tf index 3c441b9..921c2d6 100644 --- a/modules/nodepool/main.tf +++ b/modules/nodepool/main.tf @@ -11,11 +11,23 @@ resource "aws_security_group" "this" { # Launch template # resource "aws_launch_template" "this" { - name = "${var.name}-rke2-nodepool" - image_id = var.ami - instance_type = var.instance_type - user_data = var.userdata - vpc_security_group_ids = concat([aws_security_group.this.id], var.vpc_security_group_ids) + name = "${var.name}-rke2-nodepool" + image_id = var.ami + instance_type = var.instance_type + user_data = var.userdata + + metadata_options { + http_endpoint = var.metadata_options["http_endpoint"] + http_tokens = var.metadata_options["http_tokens"] + http_put_response_hop_limit = var.metadata_options["http_put_response_hop_limit"] + instance_metadata_tags = var.metadata_options["instance_metadata_tags"] + } + + network_interfaces { + associate_public_ip_address = var.associate_public_ip_address + delete_on_termination = true + security_groups = var.vpc_security_group_ids + } block_device_mappings { device_name = lookup(var.block_device_mappings, "device_name", "/dev/sda1") diff --git a/modules/nodepool/variables.tf b/modules/nodepool/variables.tf index 0cfcb6f..c14689b 100644 --- a/modules/nodepool/variables.tf +++ b/modules/nodepool/variables.tf @@ -88,7 +88,24 @@ variable "spot" { type = bool } +variable "associate_public_ip_address" { + default = false + type = bool +} + variable "min_elb_capacity" { type = number default = null } + +variable "metadata_options" { + type = map(any) + description = "Instance Metadata Options" +} + +variable "extra_cloud_config_config" { + description = "extra config to append to cloud-config" + type = string + default = "" +} + diff --git a/modules/statestore/main.tf b/modules/statestore/main.tf index 0a886e8..99b14e9 100644 --- a/modules/statestore/main.tf +++ b/modules/statestore/main.tf @@ -20,7 +20,7 @@ resource "aws_s3_bucket_server_side_encryption_configuration" "ssec" { } } -resource "aws_s3_bucket_object" "token" { +resource "aws_s3_object" "token" { bucket = aws_s3_bucket.bucket.id key = "token" content_type = "text/plain" @@ -33,7 +33,7 @@ data "aws_iam_policy_document" "getter" { effect = "Allow" actions = ["s3:GetObject"] resources = [ - "${aws_s3_bucket.bucket.arn}/${aws_s3_bucket_object.token.id}", + "${aws_s3_bucket.bucket.arn}/${aws_s3_object.token.id}", ] } } diff --git a/modules/statestore/outputs.tf b/modules/statestore/outputs.tf index 96420dd..90fcc6c 100644 --- a/modules/statestore/outputs.tf +++ b/modules/statestore/outputs.tf @@ -1,9 +1,9 @@ output "bucket" { - value = aws_s3_bucket_object.token.bucket + value = aws_s3_object.token.bucket } output "token_object" { - value = aws_s3_bucket_object.token.id + value = aws_s3_object.token.id } output "kubeconfig_put_policy" { @@ -12,8 +12,8 @@ output "kubeconfig_put_policy" { output "token" { value = { - bucket = aws_s3_bucket_object.token.bucket - object = aws_s3_bucket_object.token.id + bucket = aws_s3_object.token.bucket + object = aws_s3_object.token.id policy_document = data.aws_iam_policy_document.getter.json bucket_arn = aws_s3_bucket.bucket.arn } diff --git a/modules/userdata/data.tf b/modules/userdata/data.tf deleted file mode 100644 index 180d205..0000000 --- a/modules/userdata/data.tf +++ /dev/null @@ -1,16 +0,0 @@ -data "template_file" "init" { - template = file("${path.module}/files/rke2-init.sh") - - vars = { - type = var.agent ? "agent" : "server" - - server_url = var.server_url - token_bucket = var.token_bucket - token_object = var.token_object - config = var.config - ccm = var.ccm - - pre_userdata = var.pre_userdata - post_userdata = var.post_userdata - } -} \ No newline at end of file diff --git a/modules/userdata/files/rke2-init.sh b/modules/userdata/files/rke2-init.sh index f143309..12728ce 100644 --- a/modules/userdata/files/rke2-init.sh +++ b/modules/userdata/files/rke2-init.sh @@ -38,7 +38,8 @@ append_config() { # The most simple "leader election" you've ever seen in your life elect_leader() { # Fetch other running instances in ASG - instance_id=$(curl -s http://169.254.169.254/latest/meta-data/instance-id) + TOKEN=$(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 21600") + instance_id=$(curl -s -H "X-aws-ec2-metadata-token: $TOKEN" http://169.254.169.254/latest/meta-data/instance-id) asg_name=$(aws autoscaling describe-auto-scaling-instances --instance-ids "$instance_id" --query 'AutoScalingInstances[*].AutoScalingGroupName' --output text) instances=$(aws autoscaling describe-auto-scaling-groups --auto-scaling-group-name "$asg_name" --query 'AutoScalingGroups[*].Instances[?HealthStatus==`Healthy`].InstanceId' --output text) @@ -112,7 +113,8 @@ local_cp_api_wait() { fetch_token() { info "Fetching rke2 join token..." - aws configure set default.region "$(curl -s http://169.254.169.254/latest/meta-data/placement/region)" + TOKEN=$(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 21600") + aws configure set default.region "$(curl -s -H "X-aws-ec2-metadata-token: $TOKEN" http://169.254.169.254/latest/meta-data/placement/region)" # Validate aws caller identity, fatal if not valid if ! aws sts get-caller-identity 2>/dev/null; then diff --git a/modules/userdata/outputs.tf b/modules/userdata/outputs.tf index ee2aacc..0bb4e77 100644 --- a/modules/userdata/outputs.tf +++ b/modules/userdata/outputs.tf @@ -1,3 +1,14 @@ output "templated" { - value = data.template_file.init.rendered + value = templatefile("${path.module}/files/rke2-init.sh", { + type = var.agent ? "agent" : "server" + + server_url = var.server_url + token_bucket = var.token_bucket + token_object = var.token_object + config = var.config + ccm = var.ccm + + pre_userdata = var.pre_userdata + post_userdata = var.post_userdata + }) } \ No newline at end of file diff --git a/variables.tf b/variables.tf index ddfa1e8..41dc778 100644 --- a/variables.tf +++ b/variables.tf @@ -70,7 +70,7 @@ variable "extra_block_device_mappings" { variable "servers" { description = "Number of servers to create" type = number - default = 1 + default = 3 } variable "spot" { @@ -118,6 +118,17 @@ variable "controlplane_access_logs_bucket" { default = "disabled" } +variable "metadata_options" { + type = map(any) + default = { + http_endpoint = "enabled" + http_tokens = "required" # IMDS-v2 + http_put_response_hop_limit = 2 # allow pods to use IMDS as well + instance_metadata_tags = "disabled" + } + description = "Instance Metadata Options" +} + # # RKE2 Variables # @@ -162,3 +173,15 @@ variable "wait_for_capacity_timeout" { type = string default = "10m" } + +variable "associate_public_ip_address" { + default = false + type = bool +} + +variable "extra_cloud_config_config" { + description = "extra config to append to cloud-config" + type = string + default = "" +} +