Skip to content

Commit

Permalink
fix: switch to amazon linux 2 amis & fix final snapshot identifier (#4)
Browse files Browse the repository at this point in the history
* fix: switch to amazon linux 2 which has single volume

* add separate gpu launch template
  • Loading branch information
kldavis4 authored May 12, 2021
1 parent 3d615a0 commit 91a8f26
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 12 deletions.
12 changes: 6 additions & 6 deletions aws/terraform/modules/metaflow/modules/computation/batch.tf
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ resource "aws_batch_compute_environment" "cpu" {
and this compute environment will not have to be destroyed and then created to point to a new Launch Template.
*/
launch_template {
launch_template_id = aws_launch_template.this.id
version = aws_launch_template.this.latest_version
launch_template_id = aws_launch_template.cpu.id
version = aws_launch_template.cpu.latest_version
}

# Security group to apply to the instances launched.
Expand Down Expand Up @@ -100,8 +100,8 @@ resource "aws_batch_compute_environment" "large-cpu" {
and this compute environment will not have to be destroyed and then created to point to a new Launch Template.
*/
launch_template {
launch_template_id = aws_launch_template.this.id
version = aws_launch_template.this.latest_version
launch_template_id = aws_launch_template.cpu.id
version = aws_launch_template.cpu.latest_version
}

# Security group to apply to the instances launched.
Expand Down Expand Up @@ -167,8 +167,8 @@ resource "aws_batch_compute_environment" "gpu" {
and this compute environment will not have to be destroyed and then created to point to a new Launch Template.
*/
launch_template {
launch_template_id = aws_launch_template.this.id
version = aws_launch_template.this.latest_version
launch_template_id = aws_launch_template.gpu.id
version = aws_launch_template.gpu.latest_version
}

# Security group to apply to the instances launched.
Expand Down
8 changes: 8 additions & 0 deletions aws/terraform/modules/metaflow/modules/computation/data.tf
Original file line number Diff line number Diff line change
@@ -1 +1,9 @@
data "aws_region" "current" {}

data "aws_ssm_parameter" "ecs_optimized_cpu_ami" {
name = "/aws/service/ecs/optimized-ami/amazon-linux-2/recommended"
}

data "aws_ssm_parameter" "ecs_optimized_gpu_ami" {
name = "/aws/service/ecs/optimized-ami/amazon-linux-2/gpu/recommended"
}
38 changes: 35 additions & 3 deletions aws/terraform/modules/metaflow/modules/computation/ec2.tf
Original file line number Diff line number Diff line change
@@ -1,18 +1,50 @@
resource "aws_launch_template" "this" {
resource "aws_launch_template" "cpu" {
/* To provide a large disk space than the default 8GB for AWS Batch.
AWS Batch points to this using the latest version, so we can update the disk size here
and AWS Batch will use that.
This is used for all Metaflow AWS Batch remote jobs.
This is used for all Metaflow AWS CPU Batch remote jobs.
*/
name = "${var.resource_prefix}batch-launch-template-100gb${var.resource_suffix}"
name = "${var.resource_prefix}batch-launch-tmpl-cpu-100gb${var.resource_suffix}"

# Defines what IAM Role to assume to grant an EC2 instance
# This role must have a policy to access the kms_key_id used to encrypt the EBS volume
iam_instance_profile {
arn = aws_iam_instance_profile.ecs_instance_role.arn
}

image_id = jsondecode(data.aws_ssm_parameter.ecs_optimized_cpu_ami.value)["image_id"]

block_device_mappings {
device_name = "/dev/xvda"

ebs {
volume_size = 100
delete_on_termination = true
encrypted = true
}
}

tags = var.standard_tags
}

resource "aws_launch_template" "gpu" {
/* To provide a large disk space than the default 8GB for AWS Batch.
AWS Batch points to this using the latest version, so we can update the disk size here
and AWS Batch will use that.
This is used for all Metaflow AWS GPU Batch remote jobs.
*/
name = "${var.resource_prefix}batch-launch-tmpl-gpu-100gb${var.resource_suffix}"

# Defines what IAM Role to assume to grant an EC2 instance
# This role must have a policy to access the kms_key_id used to encrypt the EBS volume
iam_instance_profile {
arn = aws_iam_instance_profile.ecs_instance_role.arn
}

image_id = jsondecode(data.aws_ssm_parameter.ecs_optimized_gpu_ami.value)["image_id"]

block_device_mappings {
device_name = "/dev/xvda"

Expand Down
8 changes: 5 additions & 3 deletions aws/terraform/modules/metaflow/modules/datastore/rds.tf
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ resource "random_password" "this" {
override_special = "!#$%&*()-_=+[]{}<>:?"
}

resource "random_pet" "final_snapshot_id" {}

/*
Define rds db instance.
*/
Expand All @@ -68,9 +70,9 @@ resource "aws_db_instance" "this" {
username = var.db_username
password = random_password.this.result
db_subnet_group_name = aws_db_subnet_group.this.id
max_allocated_storage = 1000 # Upper limit of automatic scaled storage
multi_az = true # Multiple availability zone?
final_snapshot_identifier = "${var.resource_prefix}${var.db_name}-final-snapshot${var.resource_suffix}-${formatdate("YYYYMMMDDhhmm", timestamp())}" # Snapshot upon delete
max_allocated_storage = 1000 # Upper limit of automatic scaled storage
multi_az = true # Multiple availability zone?
final_snapshot_identifier = "${var.resource_prefix}${var.db_name}-final-snapshot${var.resource_suffix}-${random_pet.final_snapshot_id.id}" # Snapshot upon delete
vpc_security_group_ids = [aws_security_group.rds_security_group.id]

tags = merge(
Expand Down

0 comments on commit 91a8f26

Please sign in to comment.