From adaa14e752c795fcb3303d7f058ed0d014441074 Mon Sep 17 00:00:00 2001 From: Adam Savitzky Date: Mon, 24 Oct 2016 12:11:46 -0700 Subject: [PATCH 1/5] Attach EBS volumes to etcd nodes for persistence If you ever lose your etcd cluster for whatever reason, or if you should ever need to restart it, you should be able to recover your state. Mentioned in this issue: https://github.com/kz8s/tack/issues/75 --- modules/etcd/ec2.tf | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/modules/etcd/ec2.tf b/modules/etcd/ec2.tf index f9b28b1..9f6b4cb 100644 --- a/modules/etcd/ec2.tf +++ b/modules/etcd/ec2.tf @@ -1,3 +1,30 @@ +resource "aws_ebs_volume" "etcd" { + count = "${ length( split(",", var.azs) ) }" + + availability_zone = "${ element( split(",", var.azs), 0 ) }" + + type = "gp2" + size = 100 + + tags { + builtWith = "terraform" + Cluster = "${ var.name }" + depends-id = "${ var.depends-id }" + KubernetesCluster = "${ var.name }" + Name = "etcd${ count.index + 1 }-${ var.name }" + role = "etcd,apiserver" + version = "${ var.coreos-hyperkube-tag }" + } +} + +resource "aws_volume_attachment" "etcd" { + count = "${ length( split(",", var.azs) ) }" + + device_name = "/dev/xvdf" + volume_id = "${ element(aws_ebs_volume.etcd.*.id, count.index) }" + instance_id = "${ element(aws_instance.etcd.*.id, count.index) }" +} + resource "aws_instance" "etcd" { count = "${ length( split(",", var.etcd-ips) ) }" @@ -32,5 +59,9 @@ resource "aws_instance" "etcd" { } resource "null_resource" "dummy_dependency" { - depends_on = [ "aws_instance.etcd" ] + depends_on = [ + "aws_instance.etcd", + "aws_ebs_volume.etcd", + "aws_volume_attachment.etcd" + ] } From c217df161a98b44786bcdfa6ebc340498908241d Mon Sep 17 00:00:00 2001 From: Adam Savitzky Date: Mon, 24 Oct 2016 13:16:59 -0700 Subject: [PATCH 2/5] Add snapshotting module --- modules.tf | 11 ++++++ modules/iam/io.tf | 1 + modules/iam/snapshot.tf | 62 ++++++++++++++++++++++++++++++++ modules/snapshot/cloudwatch.tf | 11 ++++++ modules/snapshot/io.tf | 5 +++ modules/snapshot/lambda.tf | 37 +++++++++++++++++++ modules/snapshot/snapshot.py.tpl | 15 ++++++++ 7 files changed, 142 insertions(+) create mode 100644 modules/iam/snapshot.tf create mode 100644 modules/snapshot/cloudwatch.tf create mode 100644 modules/snapshot/io.tf create mode 100644 modules/snapshot/lambda.tf create mode 100644 modules/snapshot/snapshot.py.tpl diff --git a/modules.tf b/modules.tf index 18a2c23..d0e9c20 100644 --- a/modules.tf +++ b/modules.tf @@ -122,6 +122,17 @@ module "worker" { worker-name = "general" } +module "snapshot" { + source = "./modules/snapshot" + + bucket-prefix = "${ var.s3-bucket }" + name = "${ var.name }" + iam-role-snapshot-arn = "${ module.iam.iam-role-snapshot-arn }" + name = "${ var.name }" + security-groups = "${ module.security.etcd-id },${ module.security.worker-id }" + subnet-ids = "${ module.vpc.subnet-ids-private },${ module.vpc.subnet-ids-public }" +} + /* module "worker2" { source = "./modules/worker" diff --git a/modules/iam/io.tf b/modules/iam/io.tf index 05726e9..2166999 100644 --- a/modules/iam/io.tf +++ b/modules/iam/io.tf @@ -5,5 +5,6 @@ variable "name" {} output "depends-id" { value = "${ null_resource.dummy_dependency.id }" } output "aws-iam-role-etcd-id" { value = "${ aws_iam_role.master.id }" } output "aws-iam-role-worker-id" { value = "${ aws_iam_role.worker.id }" } +output "iam-role-snapshot-arn" { value = "${ aws_iam_role.snapshot.arn }" } output "instance-profile-name-master" { value = "${ aws_iam_instance_profile.master.name }" } output "instance-profile-name-worker" { value = "${ aws_iam_instance_profile.worker.name }" } diff --git a/modules/iam/snapshot.tf b/modules/iam/snapshot.tf new file mode 100644 index 0000000..d5485e9 --- /dev/null +++ b/modules/iam/snapshot.tf @@ -0,0 +1,62 @@ +resource "aws_iam_role" "snapshot" { + name = "snapshot" + assume_role_policy = < Date: Mon, 24 Oct 2016 13:20:50 -0700 Subject: [PATCH 3/5] Remove unneeded vars --- modules.tf | 2 -- modules/snapshot/io.tf | 1 - 2 files changed, 3 deletions(-) diff --git a/modules.tf b/modules.tf index d0e9c20..b716cb3 100644 --- a/modules.tf +++ b/modules.tf @@ -125,8 +125,6 @@ module "worker" { module "snapshot" { source = "./modules/snapshot" - bucket-prefix = "${ var.s3-bucket }" - name = "${ var.name }" iam-role-snapshot-arn = "${ module.iam.iam-role-snapshot-arn }" name = "${ var.name }" security-groups = "${ module.security.etcd-id },${ module.security.worker-id }" diff --git a/modules/snapshot/io.tf b/modules/snapshot/io.tf index 63f13d0..998a046 100644 --- a/modules/snapshot/io.tf +++ b/modules/snapshot/io.tf @@ -1,4 +1,3 @@ -variable "bucket-prefix" {} variable "iam-role-snapshot-arn" {} variable "name" {} variable "security-groups" {} From 0653e766dd995b86584999537426a118af518eed Mon Sep 17 00:00:00 2001 From: Adam Savitzky Date: Mon, 24 Oct 2016 14:25:02 -0700 Subject: [PATCH 4/5] Add info about ebs volumes to readme --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index afdf266..c189867 100644 --- a/README.md +++ b/README.md @@ -49,6 +49,8 @@ creation * Bastion Host * Multi-AZ Auto-Scaling Worker Nodes * [NAT Gateway](http://docs.aws.amazon.com/AmazonVPC/latest/UserGuide/vpc-nat-gateway.html) +* Cluster state persisted using EBS +* Automatic snapshotting of all EBS volumes, including dynamically generated persistent volumes ### CoreOS (1122.3.0, 1185.2.0, 1192.2.0) * etcd DNS Discovery Bootstrap @@ -107,6 +109,7 @@ Terraform v0.7.7 - Route 53 internal zone for VPC - Etcd cluster bootstrapped from Route 53 - High Availability Kubernetes configuration (masters running on etcd nodes) +- EBS volumes for etcd cluster with automatic snapshots - Autoscaling worker node group across subnets in selected region - kube-system namespace and addons: DNS, UI, Dashboard From 8e73f42685b89f7144ad69c4649a5bf534c275d8 Mon Sep 17 00:00:00 2001 From: Adam Savitzky Date: Tue, 25 Oct 2016 09:52:10 -0700 Subject: [PATCH 5/5] Mount /dev/xvdf in cloud-config for etcd --- modules/etcd/cloud-config.tf | 38 ++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/modules/etcd/cloud-config.tf b/modules/etcd/cloud-config.tf index 322340d..cd3efbd 100644 --- a/modules/etcd/cloud-config.tf +++ b/modules/etcd/cloud-config.tf @@ -11,6 +11,7 @@ coreos: advertise-client-urls: http://${ fqdn }:2379 # cert-file: /etc/kubernetes/ssl/k8s-etcd.pem # debug: true + data-dir: /media/etcd2 discovery-srv: ${ internal-tld } initial-advertise-peer-urls: https://${ fqdn }:2380 initial-cluster-state: new @@ -25,6 +26,43 @@ coreos: peer-key-file: /etc/kubernetes/ssl/k8s-etcd-key.pem units: + - name: format-ebs-volume.service + command: start + content: | + [Unit] + Description=Formats the ebs volume + After=dev-xvdf.device + Requires=dev-xvdf.device + [Service] + ExecStart=/bin/bash -c "(/usr/sbin/blkid -t TYPE=ext4 | grep /dev/xvdf) || (/usr/sbin/wipefs -fa /dev/xvdf && /usr/sbin/mkfs.ext4 /dev/xvdf)" + RemainAfterExit=yes + Type=oneshot + + - name: media-etcd2.mount + command: start + content: | + [Unit] + Description=Mount ebs to /media/etcd2 + Requires=format-ebs-volume.service + After=format-ebs-volume.service + [Mount] + What=/dev/xvdf + Where=/media/etcd2 + Type=ext4 + + - name: prepare-etcd-data-dir.service + command: start + content: | + [Unit] + Description=Prepares the etcd data directory + Requires=media-etcd2.mount + After=media-etcd2.mount + Before=etcd2.service + [Service] + Type=oneshot + RemainAfterExit=yes + ExecStart=/usr/bin/chown -R etcd:etcd /media/etcd2 + - name: etcd2.service command: start drop-ins: