Skip to content

Commit

Permalink
data/aws/vpc: Only create subnet infrastucture for zones with Machine…
Browse files Browse the repository at this point in the history
…(Set)s

This commit updates our Terraform variables to include the worker
subnets, and then switches on that (and the master zones) in Terraform
to avoid creating subnet infrastructure (NAT gateways, routes, etc.)
in zones that have no Machine(Set)s.  This helps address limit issues
in high-zone regions like us-east-1, as seen in the limits.md change.
Note that without a reduction in our default MachineSet creation, the
installer defaults will still not work on us-east-1 without a limit
bump.

The drawback is that users are now on the hook to provision their own
subnets in other zones if they decide that they want to grow into a
new zone as a day-2 Machine(Set) operation.  For now, they'll have to
provide their own infrastructure for that, and our
user-provided-infrastructure docs should give them sufficient
grounding to do so.  It's possible that in the future the machine-API
or other infrastructure operator could dynamically provision subnets
in zones that were not populated at install-time, but I can't hazard a
guess as to how likely that will be.

The HCL functions for combining the zone lists are documented in [1,2].

[1]: https://www.terraform.io/docs/configuration-0-11/interpolation.html#concat-list1-list2-
[2]: https://www.terraform.io/docs/configuration-0-11/interpolation.html#distinct-list-
  • Loading branch information
wking committed Mar 28, 2019
1 parent 6da4cbe commit 644f705
Show file tree
Hide file tree
Showing 10 changed files with 82 additions and 54 deletions.
7 changes: 4 additions & 3 deletions data/data/aws/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,10 @@ module "dns" {
module "vpc" {
source = "./vpc"

cidr_block = "${var.machine_cidr}"
cluster_id = "${var.cluster_id}"
region = "${var.aws_region}"
cidr_block = "${var.machine_cidr}"
cluster_id = "${var.cluster_id}"
region = "${var.aws_region}"
availability_zones = "${distinct(concat(var.aws_master_availability_zones, var.aws_worker_availability_zones))}"

tags = "${local.tags}"
}
Expand Down
5 changes: 5 additions & 0 deletions data/data/aws/variables-aws.tf
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,8 @@ variable "aws_master_availability_zones" {
type = "list"
description = "The availability zones in which to create the masters. The length of this list must match master_count."
}

variable "aws_worker_availability_zones" {
type = "list"
description = "The availability zones to provision for workers. Worker instances are created by the machine-API operator, but this variable controls their supporting infrastructure (subnets, routing, etc.)."
}
11 changes: 2 additions & 9 deletions data/data/aws/vpc/common.tf
Original file line number Diff line number Diff line change
@@ -1,17 +1,10 @@
# Canonical internal state definitions for this module.
# read only: only locals and data source definitions allowed. No resources or module blocks in this file
// Fetch a list of available AZs
data "aws_availability_zones" "azs" {
state = "available"
}

// Only reference data sources which are gauranteed to exist at any time (above) in this locals{} block
// Only reference data sources which are guaranteed to exist at any time (above) in this locals{} block
locals {
// List of possible AZs for each type of subnet
new_subnet_azs = "${data.aws_availability_zones.azs.names}"

// How many AZs to create subnets in
new_az_count = "${length(local.new_subnet_azs)}"
new_az_count = "${length(var.availability_zones)}"

// The VPC ID to use to build the rest of the vpc data sources
vpc_id = "${aws_vpc.new_vpc.id}"
Expand Down
4 changes: 2 additions & 2 deletions data/data/aws/vpc/outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@ output "vpc_id" {
}

output "az_to_private_subnet_id" {
value = "${zipmap(local.new_subnet_azs, local.private_subnet_ids)}"
value = "${zipmap(var.availability_zones, local.private_subnet_ids)}"
}

output "az_to_public_subnet_id" {
value = "${zipmap(local.new_subnet_azs, local.public_subnet_ids)}"
value = "${zipmap(var.availability_zones, local.public_subnet_ids)}"
}

output "public_subnet_ids" {
Expand Down
5 changes: 5 additions & 0 deletions data/data/aws/vpc/variables.tf
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
variable "availability_zones" {
type = "list"
description = "The availability zones in which to provision subnets."
}

variable "cidr_block" {
type = "string"
}
Expand Down
6 changes: 3 additions & 3 deletions data/data/aws/vpc/vpc-private.tf
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ resource "aws_route_table" "private_routes" {
vpc_id = "${data.aws_vpc.cluster_vpc.id}"

tags = "${merge(map(
"Name","${var.cluster_id}-private-${local.new_subnet_azs[count.index]}",
"Name","${var.cluster_id}-private-${var.availability_zones[count.index]}",
), var.tags)}"
}

Expand All @@ -22,10 +22,10 @@ resource "aws_subnet" "private_subnet" {

cidr_block = "${cidrsubnet(local.new_private_cidr_range, 3, count.index)}"

availability_zone = "${local.new_subnet_azs[count.index]}"
availability_zone = "${var.availability_zones[count.index]}"

tags = "${merge(map(
"Name", "${var.cluster_id}-private-${local.new_subnet_azs[count.index]}",
"Name", "${var.cluster_id}-private-${var.availability_zones[count.index]}",
"kubernetes.io/role/internal-elb", "",
), var.tags)}"
}
Expand Down
8 changes: 4 additions & 4 deletions data/data/aws/vpc/vpc-public.tf
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,10 @@ resource "aws_subnet" "public_subnet" {

cidr_block = "${cidrsubnet(local.new_public_cidr_range, 3, count.index)}"

availability_zone = "${local.new_subnet_azs[count.index]}"
availability_zone = "${var.availability_zones[count.index]}"

tags = "${merge(map(
"Name", "${var.cluster_id}-public-${local.new_subnet_azs[count.index]}",
"Name", "${var.cluster_id}-public-${var.availability_zones[count.index]}",
), var.tags)}"
}

Expand All @@ -49,7 +49,7 @@ resource "aws_eip" "nat_eip" {
vpc = true

tags = "${merge(map(
"Name", "${var.cluster_id}-eip-${local.new_subnet_azs[count.index]}",
"Name", "${var.cluster_id}-eip-${var.availability_zones[count.index]}",
), var.tags)}"

# Terraform does not declare an explicit dependency towards the internet gateway.
Expand All @@ -64,6 +64,6 @@ resource "aws_nat_gateway" "nat_gw" {
subnet_id = "${aws_subnet.public_subnet.*.id[count.index]}"

tags = "${merge(map(
"Name", "${var.cluster_id}-nat-${local.new_subnet_azs[count.index]}",
"Name", "${var.cluster_id}-nat-${var.availability_zones[count.index]}",
), var.tags)}"
}
24 changes: 13 additions & 11 deletions docs/user/aws/limits.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,25 +23,27 @@ limit.

## Elastic Network Interfaces (ENI)

The default installation creates 21 + the number of availability zones of ENIs (e.g. us-east-1 = 21 + 6 = 27 ENIs).
The default installation creates 21 + the number of availability zones of ENIs (e.g. 21 + 3 = 24 ENIs for a three-zone cluster).
The default limit per region is 350. Additional ENIs are created for additional machines and elastic load balancers
created by cluster usage and deployed workloads. A service limit increase here may be required to satisfy the needs of
additional clusters and deployed workloads.

## Elastic IP (EIP)

For a single, default cluster, your account will have the needed capacity limits required. There is one exception,
"EC2-VPC Elastic IPs". The installer creates a public and private subnet for each
[availability zone within a region][availability-zones] to provision the cluster in a highly available configuration. In
each private subnet, a separate [NAT Gateway][nat-gateways] is created and requires a separate [elastic IP][elastic-ip].
The default limit of 5 is sufficient for most regions and a single cluster. For the us-east-1 region, a higher limit is
required. For multiple clusters, a higher limit is required. Please see [this map][az-map] for a current region map with
availability zone count. We recommend selecting regions with 3 or more availability zones.
By default, the installer distributes control-plane and compute machines across [all availability zones within a region][availability-zones] to provision the cluster in a highly available configuration.
Please see [this map][az-map] for a current region map with availability zone count.
We recommend selecting regions with 3 or more availability zones.
You can [provide an install-config](../overview.md#multiple-invocations) to [configure](customization.md) the installer to use specific zones to override that default.

### Example: Using N. Virginia (us-east-1)
The installer creates a public and private subnet for each configured availability zone.
In each private subnet, a separate [NAT Gateway][nat-gateways] is created and requires a separate [EC2-VPC Elastic IP (EIP)][elastic-ip].
The default limit of 5 is sufficient for a single cluster, unless you have configured your cluster to use more than five zones.
For multiple clusters, a higher limit will likely be required (and will certainly be required to support more than five clusters, even if they are each single-zone clusters).

To use N. Virginia (us-east-1) for a new cluster, please submit a limit increase for VPC Elastic IPs similar to the
following in the support dashboard (to create more than one cluster, a higher limit will be necessary):
### Example: Using North Virginia (us-east-1)

North Virginia (us-east-1) has six availablity zones, so a higher limit is required unless you configure your cluster to use fewer zones.
To support the default, all-zone installation, please submit a limit increase for VPC Elastic IPs similar to the following in the support dashboard (to create more than one cluster, a higher limit will be necessary):

![Increase Elastic IP limit in AWS](images/support_increase_elastic_ip.png)

Expand Down
14 changes: 12 additions & 2 deletions pkg/asset/cluster/tfvars.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ func (t *TerraformVariables) Dependencies() []asset.Asset {
&bootstrap.Bootstrap{},
&machine.Master{},
&machines.Master{},
&machines.Worker{},
}
}

Expand All @@ -71,8 +72,9 @@ func (t *TerraformVariables) Generate(parents asset.Parents) error {
bootstrapIgnAsset := &bootstrap.Bootstrap{}
masterIgnAsset := &machine.Master{}
mastersAsset := &machines.Master{}
workersAsset := &machines.Worker{}
rhcosImage := new(rhcos.Image)
parents.Get(clusterID, installConfig, bootstrapIgnAsset, masterIgnAsset, mastersAsset, rhcosImage)
parents.Get(clusterID, installConfig, bootstrapIgnAsset, masterIgnAsset, mastersAsset, workersAsset, rhcosImage)

platform := installConfig.Config.Platform.Name()
switch platform {
Expand Down Expand Up @@ -117,7 +119,15 @@ func (t *TerraformVariables) Generate(parents asset.Parents) error {
for i, m := range masters {
masterConfigs[i] = m.Spec.ProviderSpec.Value.Object.(*awsprovider.AWSMachineProviderConfig)
}
data, err := awstfvars.TFVars(masterConfigs)
workers, err := workersAsset.MachineSets()
if err != nil {
return err
}
workerConfigs := make([]*awsprovider.AWSMachineProviderConfig, len(workers))
for i, m := range workers {
workerConfigs[i] = m.Spec.Template.Spec.ProviderSpec.Value.Object.(*awsprovider.AWSMachineProviderConfig)
}
data, err := awstfvars.TFVars(masterConfigs, workerConfigs)
if err != nil {
return errors.Wrapf(err, "failed to get %s Terraform variables", platform)
}
Expand Down
52 changes: 32 additions & 20 deletions pkg/tfvars/aws/aws.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,29 +11,40 @@ import (
)

type config struct {
AMI string `json:"aws_ami"`
ExtraTags map[string]string `json:"aws_extra_tags,omitempty"`
BootstrapInstanceType string `json:"aws_bootstrap_instance_type,omitempty"`
MasterInstanceType string `json:"aws_master_instance_type,omitempty"`
AvailabilityZones []string `json:"aws_master_availability_zones"`
IOPS int64 `json:"aws_master_root_volume_iops"`
Size int64 `json:"aws_master_root_volume_size,omitempty"`
Type string `json:"aws_master_root_volume_type,omitempty"`
Region string `json:"aws_region,omitempty"`
AMI string `json:"aws_ami"`
ExtraTags map[string]string `json:"aws_extra_tags,omitempty"`
BootstrapInstanceType string `json:"aws_bootstrap_instance_type,omitempty"`
MasterInstanceType string `json:"aws_master_instance_type,omitempty"`
MasterAvailabilityZones []string `json:"aws_master_availability_zones"`
WorkerAvailabilityZones []string `json:"aws_worker_availability_zones"`
IOPS int64 `json:"aws_master_root_volume_iops"`
Size int64 `json:"aws_master_root_volume_size,omitempty"`
Type string `json:"aws_master_root_volume_type,omitempty"`
Region string `json:"aws_region,omitempty"`
}

// TFVars generates AWS-specific Terraform variables launching the cluster.
func TFVars(masterConfigs []*v1beta1.AWSMachineProviderConfig) ([]byte, error) {
func TFVars(masterConfigs []*v1beta1.AWSMachineProviderConfig, workerConfigs []*v1beta1.AWSMachineProviderConfig) ([]byte, error) {
masterConfig := masterConfigs[0]

tags := make(map[string]string, len(masterConfig.Tags))
for _, tag := range masterConfig.Tags {
tags[tag.Name] = tag.Value
}

availabilityZones := make([]string, len(masterConfigs))
masterAvailabilityZones := make([]string, len(masterConfigs))
for i, c := range masterConfigs {
availabilityZones[i] = c.Placement.AvailabilityZone
masterAvailabilityZones[i] = c.Placement.AvailabilityZone
}

exists := struct{}{}
availabilityZoneMap := map[string]struct{}{}
for _, c := range workerConfigs {
availabilityZoneMap[c.Placement.AvailabilityZone] = exists
}
workerAvailabilityZones := make([]string, 0, len(availabilityZoneMap))
for zone := range availabilityZoneMap {
workerAvailabilityZones = append(workerAvailabilityZones, zone)
}

if len(masterConfig.BlockDevices) == 0 {
Expand All @@ -60,14 +71,15 @@ func TFVars(masterConfigs []*v1beta1.AWSMachineProviderConfig) ([]byte, error) {
instanceClass := defaults.InstanceClass(masterConfig.Placement.Region)

cfg := &config{
Region: masterConfig.Placement.Region,
ExtraTags: tags,
AMI: *masterConfig.AMI.ID,
AvailabilityZones: availabilityZones,
BootstrapInstanceType: fmt.Sprintf("%s.large", instanceClass),
MasterInstanceType: masterConfig.InstanceType,
Size: *rootVolume.EBS.VolumeSize,
Type: *rootVolume.EBS.VolumeType,
Region: masterConfig.Placement.Region,
ExtraTags: tags,
AMI: *masterConfig.AMI.ID,
MasterAvailabilityZones: masterAvailabilityZones,
WorkerAvailabilityZones: workerAvailabilityZones,
BootstrapInstanceType: fmt.Sprintf("%s.large", instanceClass),
MasterInstanceType: masterConfig.InstanceType,
Size: *rootVolume.EBS.VolumeSize,
Type: *rootVolume.EBS.VolumeType,
}

if rootVolume.EBS.Iops != nil {
Expand Down

0 comments on commit 644f705

Please sign in to comment.