From 1dc0c317762ffd06867dbd0c0464e5256b3c145b Mon Sep 17 00:00:00 2001 From: seph Date: Wed, 24 Feb 2021 09:42:08 -0500 Subject: [PATCH 1/9] Aarch64 CI support --- ci-infra/ami-build/agent-setup.sh | 76 +++++++++++++++++++ ci-infra/ami-build/azp-docker.json | 40 ++++++++++ .../ami-build/scripts/aws-metadata-refresh.sh | 14 ++++ ci-infra/ami-build/scripts/detach-self.sh | 49 ++++++++++++ 4 files changed, 179 insertions(+) create mode 100644 ci-infra/ami-build/agent-setup.sh create mode 100644 ci-infra/ami-build/azp-docker.json create mode 100755 ci-infra/ami-build/scripts/aws-metadata-refresh.sh create mode 100755 ci-infra/ami-build/scripts/detach-self.sh diff --git a/ci-infra/ami-build/agent-setup.sh b/ci-infra/ami-build/agent-setup.sh new file mode 100644 index 0000000..9c4dc26 --- /dev/null +++ b/ci-infra/ami-build/agent-setup.sh @@ -0,0 +1,76 @@ +#!/bin/bash + +set -ex + +cat < /tmp/preseed.cfg +debconf debconf/frontend select Noninteractive +tzdata tzdata/Areas select Etc +tzdata tzdata/Zones/Etc select UTC +EOF + +sudo debconf-set-selections /tmp/preseed.cfg + +ARCH=$(dpkg --print-architecture) + +export DEBIAN_FRONTEND=noninteractive +sudo apt-get update +apt-get install -y tzdata +sudo apt-get -y upgrade + +echo "Installing required packages for apt repos" +sudo apt-get install -y apt-transport-https ca-certificates gnupg-agent software-properties-common curl + +curl -L https://download.docker.com/linux/ubuntu/gpg \ + | sudo apt-key add - +sudo apt-key adv --list-public-keys --with-fingerprint --with-colons 0EBFCD88 2>/dev/null \ + | grep 'fpr' | head -n1 | grep '9DC858229FC7DD38854AE2D88D81803C0EBFCD88' +sudo add-apt-repository -y "deb [arch=${ARCH}] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" +sudo apt-add-repository -y ppa:git-core/ppa + +sudo apt-get update + +echo "Installing docker" +sudo apt-get install -y docker-ce docker-ce-cli git awscli jq inotify-tools + +sudo mkdir -p /etc/docker +echo '{ + "ipv6": true, + "fixed-cidr-v6": "2001:db8:1::/64" +}' | sudo tee /etc/docker/daemon.json +echo "::1 localhost" | sudo tee -a /etc/hosts + +sudo systemctl enable docker +##FIXME##sudo systemctl start docker + +sudo useradd -ms /bin/bash -G docker github-runner +sudo mkdir -p /srv/runner +sudo chown -R github-runner:github-runner /srv/runner/ + +## +## Setup GitHub Runner Agent +## + +# Normalize ARCH variable +[[ "${ARCH}" == "amd64" ]] && ARCH=x64 + +AGENT_VERSION=2.277.1 +AGENT_FILE=actions-runner-linux-${ARCH}-${AGENT_VERSION}.tar.gz + +curl -L https://github.com/actions/runner/releases/download/v${AGENT_VERSION}/${AGENT_FILE} \ + | sudo -u github-runner tar xz -C /srv/runner + +sudo /srv/runner/bin/installdependencies.sh + + +# Setup github ssh key. Not totally sure we need it, but... +sudo -u github-runner mkdir /home/github-runner/.ssh +ssh-keyscan github.com \ + | sudo -u github-runner tee /home/github-runner/.ssh/known_hosts + + + +sudo chown root:root /srv/runner/tmpscripts/*.sh +sudo chmod 0755 /srv/runner/tmpscripts/*.sh +sudo mv /srv/runner/tmpscripts/*.sh /usr/local/bin + +rm -rf /srv/runner/tmpscripts diff --git a/ci-infra/ami-build/azp-docker.json b/ci-infra/ami-build/azp-docker.json new file mode 100644 index 0000000..6a18e0d --- /dev/null +++ b/ci-infra/ami-build/azp-docker.json @@ -0,0 +1,40 @@ +{ + "variables": { + }, + "builders": [ + { + "export_path": "image.tar", + "image": "ubuntu:20.04", + "type": "docker" + } + ], + "provisioners": [ + { + "type": "shell", + "inline": [ + "mkdir -p /srv/runner/tmpscripts" + ] + }, + { + "type": "file", + "source": "scripts", + "destination": "/srv/runner/tmpscripts" + }, + { + "type": "shell", + "inline": [ + "ln -snf /usr/share/zoneinfo/$TZ /etc/localtime", + "echo $TZ > /etc/timezone", + "apt-get update", + "apt-get install -y sudo systemd" + ], + "environment_vars": [ + "TZ=Etc/UTC" + ] + }, + { + "script": "agent-setup.sh", + "type": "shell" + } + ] +} \ No newline at end of file diff --git a/ci-infra/ami-build/scripts/aws-metadata-refresh.sh b/ci-infra/ami-build/scripts/aws-metadata-refresh.sh new file mode 100755 index 0000000..e1955dc --- /dev/null +++ b/ci-infra/ami-build/scripts/aws-metadata-refresh.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash + +mkdir -p /run/aws-metadata/ + +role_name=$(wget -q -O - http://169.254.169.254/latest/meta-data/iam/security-credentials) +wget -q -O - "http://169.254.169.254/latest/meta-data/iam/security-credentials/$role_name" > /run/aws-metadata/creds.json +wget -q -O - http://169.254.169.254/latest/dynamic/instance-identity/document > /run/aws-metadata/iid.json + +chmod 0400 /run/aws-metadata/creds.json +chmod 0400 /run/aws-metadata/iid.json +chmod 0400 /run/aws-metadata/asg-name +chown azure-pipelines:azure-pipelines /run/aws-metadata/creds.json +chown azure-pipelines:azure-pipelines /run/aws-metadata/iid.json +chown azure-pipelines:azure-pipelines /run/aws-metadata/asg-name diff --git a/ci-infra/ami-build/scripts/detach-self.sh b/ci-infra/ami-build/scripts/detach-self.sh new file mode 100755 index 0000000..3a39984 --- /dev/null +++ b/ci-infra/ami-build/scripts/detach-self.sh @@ -0,0 +1,49 @@ +#!/usr/bin/env bash + +set -eu -o pipefail + +# Check Pre-Reqs, and that we're running on an AWS Instance Seemingly. +if ! hash aws >/dev/null 2>&1 ; then + echo "Need the AWS Cli in order to set AWS Protection." + exit 1 +fi +if ! hash jq >/dev/null 2>&1 ; then + echo "Need JQ in order to query credentials." + exit 2 +fi +if [[ ! -f "/sys/devices/virtual/dmi/id/board_asset_tag" ]]; then + echo "Doesn't seem to be an AWS Instance: [/sys/devices/virtual/dmi/id/board_asset_tag] does not exist". + exit 3 +fi +instance_id=$(< /sys/devices/virtual/dmi/id/board_asset_tag) +if [[ ! "$instance_id" =~ ^i- ]]; then + echo "Retrieved Instance ID: [$instance_id] does not start with [i-]" + exit 4 +fi + +function ensureCredentials() { + if [[ ! -f "/run/aws-metadata/creds.json" ]] || [[ ! -f "/run/aws-metadata/asg-name" ]] || [[ ! -f "/run/aws-metadata/iid.json" ]] || \ + [[ ! -r "/run/aws-metadata/creds.json" ]] || [[ ! -r "/run/aws-metadata/asg-name" ]] || [[ ! -r "/run/aws-metadata/iid.json" ]]; then + echo "Failed to find Credentials for AWS Instance." + exit 5 + fi + + local readonly credentials_json=$(< /run/aws-metadata/creds.json) + local readonly iid_json=$(< /run/aws-metadata/iid.json) + local readonly asg_name=$(< /run/aws-metadata/asg-name) + local readonly aws_access_key=$(echo -n "$credentials_json" | jq -r .AccessKeyId) + local readonly secret_access_key=$(echo -n "$credentials_json" | jq -r .SecretAccessKey) + local readonly session_token=$(echo -n "$credentials_json" | jq -r .Token) + local readonly expiration=$(echo -n "$credentials_json" | jq -r .Expiration) + local readonly region=$(echo -n "$iid_json" | jq -r .region) + + echo "Fetched Cached Credentials, Expire At: [$expiration]" + export AWS_ACCESS_KEY_ID="$aws_access_key" + export AWS_SECRET_ACCESS_KEY="$secret_access_key" + export AWS_SESSION_TOKEN="$session_token" + export AWS_DEFAULT_REGION="$region" + export CURRENT_ASG_NAME="$asg_name" +} + +ensureCredentials +aws autoscaling detach-instances --instance-ids "$instance_id" --auto-scaling-group-name "$CURRENT_ASG_NAME" --no-should-decrement-desired-capacity From 5422f1dc86532babdbd3bca349f463c62764ac03 Mon Sep 17 00:00:00 2001 From: seph Date: Wed, 17 Mar 2021 19:55:04 -0400 Subject: [PATCH 2/9] WIP --- ci-infra/ami-build/Makefile | 7 +++ ci-infra/ami-build/README.md | 5 ++ ci-infra/ami-build/TODO.md | 12 +++++ ci-infra/ami-build/agent-setup.sh | 15 +++--- .../ami-build/github-runner-ami-arm64.json | 51 +++++++++++++++++++ ...-docker.json => github-runner-docker.json} | 0 6 files changed, 82 insertions(+), 8 deletions(-) create mode 100644 ci-infra/ami-build/Makefile create mode 100644 ci-infra/ami-build/README.md create mode 100644 ci-infra/ami-build/TODO.md create mode 100644 ci-infra/ami-build/github-runner-ami-arm64.json rename ci-infra/ami-build/{azp-docker.json => github-runner-docker.json} (100%) diff --git a/ci-infra/ami-build/Makefile b/ci-infra/ami-build/Makefile new file mode 100644 index 0000000..2205f49 --- /dev/null +++ b/ci-infra/ami-build/Makefile @@ -0,0 +1,7 @@ +all: + +docker: + packer build github-runner-docker.json + +ami-arm64: + aws-vault exec osquery-dev -- packer build github-runner-ami-arm64.json diff --git a/ci-infra/ami-build/README.md b/ci-infra/ami-build/README.md new file mode 100644 index 0000000..6f9b3c8 --- /dev/null +++ b/ci-infra/ami-build/README.md @@ -0,0 +1,5 @@ +# AMI Build # + +This is a series of [Packer](https://www.packer.io/) scripts to build AMIs +which will then launch inside of the ASG. In order to build these AMIs you +will need access to the Envoy AWS Account. \ No newline at end of file diff --git a/ci-infra/ami-build/TODO.md b/ci-infra/ami-build/TODO.md new file mode 100644 index 0000000..d4b2d17 --- /dev/null +++ b/ci-infra/ami-build/TODO.md @@ -0,0 +1,12 @@ +Move /srv/runner/tmpscripts (which needs root) to /tmp. It's only part +of the provisioning. + +Figure out the security group thing + +Figure out more consistency about `sudo` + +Consider an instance disk AMI, not a EBS one? + +Upgrade to 20.04? + +AMI builds seem really flakey. This just fails 75% of the time. Issues with preseed, or network connectivity. And generally WTF diff --git a/ci-infra/ami-build/agent-setup.sh b/ci-infra/ami-build/agent-setup.sh index 9c4dc26..be94ceb 100644 --- a/ci-infra/ami-build/agent-setup.sh +++ b/ci-infra/ami-build/agent-setup.sh @@ -14,7 +14,7 @@ ARCH=$(dpkg --print-architecture) export DEBIAN_FRONTEND=noninteractive sudo apt-get update -apt-get install -y tzdata +sudo apt-get install -y tzdata sudo apt-get -y upgrade echo "Installing required packages for apt repos" @@ -25,7 +25,7 @@ curl -L https://download.docker.com/linux/ubuntu/gpg \ sudo apt-key adv --list-public-keys --with-fingerprint --with-colons 0EBFCD88 2>/dev/null \ | grep 'fpr' | head -n1 | grep '9DC858229FC7DD38854AE2D88D81803C0EBFCD88' sudo add-apt-repository -y "deb [arch=${ARCH}] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" -sudo apt-add-repository -y ppa:git-core/ppa +#sudo apt-add-repository -y ppa:git-core/ppa sudo apt-get update @@ -50,7 +50,7 @@ sudo chown -R github-runner:github-runner /srv/runner/ ## Setup GitHub Runner Agent ## -# Normalize ARCH variable +# Normalize ARCH variable for x64 [[ "${ARCH}" == "amd64" ]] && ARCH=x64 AGENT_VERSION=2.277.1 @@ -69,8 +69,7 @@ ssh-keyscan github.com \ -sudo chown root:root /srv/runner/tmpscripts/*.sh -sudo chmod 0755 /srv/runner/tmpscripts/*.sh -sudo mv /srv/runner/tmpscripts/*.sh /usr/local/bin - -rm -rf /srv/runner/tmpscripts +sudo chown root:root /tmp/transfer/scripts/*.sh +sudo chmod 0755 /tmp/transfer/scripts/*.sh +sudo mv /tmp/transfer/scripts/*.sh /usr/local/bin +rm -rf /tmp/transfer diff --git a/ci-infra/ami-build/github-runner-ami-arm64.json b/ci-infra/ami-build/github-runner-ami-arm64.json new file mode 100644 index 0000000..90da4b3 --- /dev/null +++ b/ci-infra/ami-build/github-runner-ami-arm64.json @@ -0,0 +1,51 @@ +{ + "provisioners": [ + { + "inline": [ + "mkdir -p /tmp/transfer" + ], + "type": "shell" + }, + { + "destination": "/tmp/transfer", + "source": "scripts", + "type": "file" + }, + { + "type": "shell", + "script": "agent-setup.sh" + } + ], + "builders": [ + { + "encrypt_boot": true, + "security_group_ids": [], + "tags": { + }, + "run_tags": { + "Project": "Packer" + }, + "run_volume_tags": { + "Project": "Packer" + }, + "ami_name": "github-runner-ami-arm64-{{timestamp}}", + "ssh_username": "ubuntu", + "instance_type": "r6g.large", + "source_ami_filter": { + "most_recent": true, + "owners": [ + "099720109477" + ], + "filters": { + "root-device-type": "ebs", + "name": "ubuntu/images/*ubuntu-focal-20.04-arm64-server-*", + "virtualization-type": "hvm" + } + }, + "region": "us-east-1", + "type": "amazon-ebs" + } + ], + "variables": { + } +} \ No newline at end of file diff --git a/ci-infra/ami-build/azp-docker.json b/ci-infra/ami-build/github-runner-docker.json similarity index 100% rename from ci-infra/ami-build/azp-docker.json rename to ci-infra/ami-build/github-runner-docker.json From 6f1843fcc7f517d222e9a5878ebc88106cb131e8 Mon Sep 17 00:00:00 2001 From: seph Date: Fri, 19 Mar 2021 21:07:09 -0400 Subject: [PATCH 3/9] bucket state --- terraform/.gitignore | 1 + terraform/common.tf | 21 ++++++++++++ terraform/provider.tf | 45 +++++++++++++++++++++++++ terraform/state_bucket/README.md | 34 +++++++++++++++++++ terraform/state_bucket/bucket.tf | 54 ++++++++++++++++++++++++++++++ terraform/state_bucket/common.tf | 1 + terraform/state_bucket/provider.tf | 1 + 7 files changed, 157 insertions(+) create mode 100644 terraform/.gitignore create mode 100644 terraform/common.tf create mode 100644 terraform/provider.tf create mode 100644 terraform/state_bucket/README.md create mode 100644 terraform/state_bucket/bucket.tf create mode 120000 terraform/state_bucket/common.tf create mode 120000 terraform/state_bucket/provider.tf diff --git a/terraform/.gitignore b/terraform/.gitignore new file mode 100644 index 0000000..3fa8c86 --- /dev/null +++ b/terraform/.gitignore @@ -0,0 +1 @@ +.terraform diff --git a/terraform/common.tf b/terraform/common.tf new file mode 100644 index 0000000..742cb69 --- /dev/null +++ b/terraform/common.tf @@ -0,0 +1,21 @@ +# common.tfvars +# +# Shared variables between the terraform stacks? workspaces? +# Whatevers. Configured by symlinking. +locals { + + remote_state_bucket = "osquery-terraform-state" + + logging_bucket = "osquery-logging" + + main_region = "us-east-1" + + aws_account_ids = { + org: "032511868142", + identity: "834249036484", + logs: "072219116274", + infra: "107349553668", + storage: "680817131363", + dev: "204725418487", + } +} diff --git a/terraform/provider.tf b/terraform/provider.tf new file mode 100644 index 0000000..fffe422 --- /dev/null +++ b/terraform/provider.tf @@ -0,0 +1,45 @@ +terraform { + required_version = "0.14.8" + + # When bootstrapping this, you will have to manually disable this and set the bucket up first. + backend "s3" { + bucket = "osquery-terraform-state" + region = "us-east-1" + key = "tf/osquery/aws/state_bucket.tfstate" + role_arn = "arn:aws:iam::107349553668:role/IdentityAccountAccessRole" + dynamodb_table = "osquery-terraform-state" + } +} + +# This is the default provider. It will use whatever from the environment. +provider "aws" { + region = local.main_region + +} + +## +## Various targetted providers. It would be nice if this could read +## the ARNs from .config, but this works. +## + +# NOTE: durations of under 900 will cause errors + +provider "aws" { + alias = "osquery-infra" + assume_role { + duration_seconds = 900 + external_id = "terraform" + role_arn = "arn:aws:iam::107349553668:role/IdentityAccountAccessRole" + } + region = local.main_region +} + +provider "aws" { + alias = "osquery-dev" + assume_role { + duration_seconds = 900 + external_id = "terraform" + role_arn = "arn:aws:iam::204725418487:role/IdentityAccountAccessRole" + } + region = local.main_region +} diff --git a/terraform/state_bucket/README.md b/terraform/state_bucket/README.md new file mode 100644 index 0000000..881e441 --- /dev/null +++ b/terraform/state_bucket/README.md @@ -0,0 +1,34 @@ + + +# STS crap + +`aws sts get-caller-identity` + +This works: + +``` +aws sts assume-role --role-arn arn:aws:iam::107349553668:role/IdentityAccountAccessRole --role-session-name test +``` + +## Bootstrap + +This is a lot nicer now that terraform supports state migration +stuff. Sweet. + +Comment out the bucket, and then invoke: + +``` +rm -rf .terraform +aws-vault exec osquery-identity-initial-tmp -- terraform init +aws-vault exec osquery-identity-initial-tmp -- terraform plan +aws-vault exec osquery-identity-initial-tmp -- terraform apply + +# uncomment the s3 backend +vi provider.tf + +# terraform will now move it for you +aws-vault exec osquery-identity-initial-tmp -- terraform init +aws-vault exec osquery-identity-initial-tmp -- terraform plan + + +``` diff --git a/terraform/state_bucket/bucket.tf b/terraform/state_bucket/bucket.tf new file mode 100644 index 0000000..0f18f59 --- /dev/null +++ b/terraform/state_bucket/bucket.tf @@ -0,0 +1,54 @@ +resource "aws_s3_bucket" "bucket" { + provider = aws.osquery-infra + bucket = local.remote_state_bucket + acl = "private" + #region = local.main_region + + server_side_encryption_configuration { + rule { + apply_server_side_encryption_by_default { + sse_algorithm = "AES256" + } + } + } + + #logging { + # target_bucket = local.logging_bucket" + # target_prefix = "logs/osquery-terraform-state" + #} + + versioning { + enabled = true + } + + lifecycle_rule { + enabled = true + + noncurrent_version_transition { + days = 30 + storage_class = "STANDARD_IA" + } + + noncurrent_version_transition { + days = 60 + storage_class = "GLACIER" + } + + noncurrent_version_expiration { + days = 90 + } + } +} + +resource "aws_dynamodb_table" "lock_table" { + provider = aws.osquery-infra + + name = "osquery-terraform-state" + hash_key = "LockID" + read_capacity = 2 + write_capacity = 2 + attribute { + name = "LockID" + type = "S" + } +} diff --git a/terraform/state_bucket/common.tf b/terraform/state_bucket/common.tf new file mode 120000 index 0000000..a0251ab --- /dev/null +++ b/terraform/state_bucket/common.tf @@ -0,0 +1 @@ +../common.tf \ No newline at end of file diff --git a/terraform/state_bucket/provider.tf b/terraform/state_bucket/provider.tf new file mode 120000 index 0000000..ab16dd3 --- /dev/null +++ b/terraform/state_bucket/provider.tf @@ -0,0 +1 @@ +../provider.tf \ No newline at end of file From b5d6c4a1939fd6cda5ac3117a1fea786bd990222 Mon Sep 17 00:00:00 2001 From: seph Date: Fri, 19 Mar 2021 21:52:59 -0400 Subject: [PATCH 4/9] vpc --- terraform/aws-ci/common.tf | 1 + terraform/aws-ci/main.tf | 12 ++++++++++++ terraform/aws-ci/provider.tf | 1 + terraform/aws-ci/variables.tf | 14 ++++++++++++++ terraform/aws-ci/vpc.tf | 8 ++++++++ terraform/provider.tf | 13 ------------- terraform/state_bucket/main.tf | 12 ++++++++++++ 7 files changed, 48 insertions(+), 13 deletions(-) create mode 120000 terraform/aws-ci/common.tf create mode 100644 terraform/aws-ci/main.tf create mode 120000 terraform/aws-ci/provider.tf create mode 100644 terraform/aws-ci/variables.tf create mode 100644 terraform/aws-ci/vpc.tf create mode 100644 terraform/state_bucket/main.tf diff --git a/terraform/aws-ci/common.tf b/terraform/aws-ci/common.tf new file mode 120000 index 0000000..a0251ab --- /dev/null +++ b/terraform/aws-ci/common.tf @@ -0,0 +1 @@ +../common.tf \ No newline at end of file diff --git a/terraform/aws-ci/main.tf b/terraform/aws-ci/main.tf new file mode 100644 index 0000000..baa8e8e --- /dev/null +++ b/terraform/aws-ci/main.tf @@ -0,0 +1,12 @@ +terraform { + required_version = "0.14.8" + + # When bootstrapping this, you will have to manually disable this and set the bucket up first. + backend "s3" { + bucket = "osquery-terraform-state" + region = "us-east-1" + key = "tf/osquery/aws/aws_ci.tfstate" # NOTE THIS PATH IS PROJECT SPECIFIC + role_arn = "arn:aws:iam::107349553668:role/IdentityAccountAccessRole" + dynamodb_table = "osquery-terraform-state" + } +} diff --git a/terraform/aws-ci/provider.tf b/terraform/aws-ci/provider.tf new file mode 120000 index 0000000..ab16dd3 --- /dev/null +++ b/terraform/aws-ci/provider.tf @@ -0,0 +1 @@ +../provider.tf \ No newline at end of file diff --git a/terraform/aws-ci/variables.tf b/terraform/aws-ci/variables.tf new file mode 100644 index 0000000..88a5635 --- /dev/null +++ b/terraform/aws-ci/variables.tf @@ -0,0 +1,14 @@ +variable "vpc_cidr" { + description = "CIDR for the whole VPC" + default = "10.83.0.0/16" +} + +variable "public_subnet_cidr" { + description = "CIDR for the Public Subnet" + default = "10.83.0.0/24" +} + +variable "private_subnet_cidr" { + description = "CIDR for the Private Subnet" + default = "10.84.1.0/24" +} diff --git a/terraform/aws-ci/vpc.tf b/terraform/aws-ci/vpc.tf new file mode 100644 index 0000000..0c155aa --- /dev/null +++ b/terraform/aws-ci/vpc.tf @@ -0,0 +1,8 @@ +resource "aws_vpc" "runners" { + provider = aws.osquery-dev + cidr_block = var.vpc_cidr + tags = { + Name = "runners" + } + +} diff --git a/terraform/provider.tf b/terraform/provider.tf index fffe422..1d3139e 100644 --- a/terraform/provider.tf +++ b/terraform/provider.tf @@ -1,16 +1,3 @@ -terraform { - required_version = "0.14.8" - - # When bootstrapping this, you will have to manually disable this and set the bucket up first. - backend "s3" { - bucket = "osquery-terraform-state" - region = "us-east-1" - key = "tf/osquery/aws/state_bucket.tfstate" - role_arn = "arn:aws:iam::107349553668:role/IdentityAccountAccessRole" - dynamodb_table = "osquery-terraform-state" - } -} - # This is the default provider. It will use whatever from the environment. provider "aws" { region = local.main_region diff --git a/terraform/state_bucket/main.tf b/terraform/state_bucket/main.tf new file mode 100644 index 0000000..a6660c0 --- /dev/null +++ b/terraform/state_bucket/main.tf @@ -0,0 +1,12 @@ +terraform { + required_version = "0.14.8" + + # When bootstrapping this, you will have to manually disable this and set the bucket up first. + backend "s3" { + bucket = "osquery-terraform-state" + region = "us-east-1" + key = "tf/osquery/aws/state_bucket.tfstate" # NOTE THIS PATH IS PROJECT SPECIFIC + role_arn = "arn:aws:iam::107349553668:role/IdentityAccountAccessRole" + dynamodb_table = "osquery-terraform-state" + } +} From d79e821d6fdde46affda04bd8648eab9c4e54ef4 Mon Sep 17 00:00:00 2001 From: seph Date: Sat, 20 Mar 2021 00:35:25 -0400 Subject: [PATCH 5/9] iterate --- terraform/.gitignore | 1 + terraform/aws-ci/vpc.tf | 65 +++++++++++++++++++++++++++++++++++++---- 2 files changed, 61 insertions(+), 5 deletions(-) diff --git a/terraform/.gitignore b/terraform/.gitignore index 3fa8c86..c035e72 100644 --- a/terraform/.gitignore +++ b/terraform/.gitignore @@ -1 +1,2 @@ .terraform +.terraform.lock.hcl diff --git a/terraform/aws-ci/vpc.tf b/terraform/aws-ci/vpc.tf index 0c155aa..ee98191 100644 --- a/terraform/aws-ci/vpc.tf +++ b/terraform/aws-ci/vpc.tf @@ -1,8 +1,63 @@ -resource "aws_vpc" "runners" { - provider = aws.osquery-dev - cidr_block = var.vpc_cidr - tags = { - Name = "runners" +# See https://docs.aws.amazon.com/vpc/latest/userguide/VPC_Security.html + +locals { + network_acls = { + public_inbound = [ + { + rule_number = 120 + rule_action = "allow" + from_port = 22 + to_port = 22 + protocol = "tcp" + cidr_block = "24.61.10.2/32" # seph's house + }, + ] + public_outbound = [ + # the port range here appears to be the _destination_ port. Not + # the source port. eg: no real way to lock this down to packet + # replies. + { + rule_number = 120 + rule_action = "allow" + from_port = 0 + to_port = 65535 + protocol = "tcp" + cidr_block = "24.61.10.2/32" # seph's house + }, + ] + } +} + + + +module "vpc" { + source = "terraform-aws-modules/vpc/aws" + name = "GitHubRunners" + + cidr = "10.83.0.0/16" + + azs = ["us-east-1a"] + private_subnets = ["10.83.1.0/24"] + public_subnets = ["10.83.101.0/24"] + + public_dedicated_network_acl = true + public_inbound_acl_rules = local.network_acls["public_inbound"] + public_outbound_acl_rules = local.network_acls["public_outbound"] + + # TODO + private_dedicated_network_acl = false + + + enable_public_s3_endpoint = true + + enable_nat_gateway = false + enable_vpn_gateway = false + create_database_subnet_group = false + create_elasticache_subnet_group = false + create_redshift_subnet_group = false + + providers = { + aws = aws.osquery-dev } } From 8798c8e065b6a148b1992dcfc62938c7919743c5 Mon Sep 17 00:00:00 2001 From: seph Date: Wed, 24 Mar 2021 12:12:57 -0400 Subject: [PATCH 6/9] checkpoint --- terraform/aws-ci/iam.tf | 85 +++++++++++++++++++++++++++++++++++++++++ terraform/aws-ci/vpc.tf | 26 +++++++++++-- 2 files changed, 108 insertions(+), 3 deletions(-) create mode 100644 terraform/aws-ci/iam.tf diff --git a/terraform/aws-ci/iam.tf b/terraform/aws-ci/iam.tf new file mode 100644 index 0000000..9f5b895 --- /dev/null +++ b/terraform/aws-ci/iam.tf @@ -0,0 +1,85 @@ +# the crux of our permissions problem, is that we need to _bootstrap_ +# a machine, which requires elevated permissions. And then we need to +# drop permissions, and start a runner. This is implemented using IAM +# policies. These are gated on a tag, and setup to allow a one way +# transition. +# +# So, a boot process of: +# 1. Machine starts up with an `bootstrap` tag +# 2. that tag grants access to a role +# 3. That role allows assumption of the bootstrap permissions +# 4. credentials are fetched +# 5. tag is removed +# 6. Do we need to rotate/drop our assumed credentials? +# +# References: +# https://medium.com/swlh/aws-iam-assuming-an-iam-role-from-an-ec2-instance-882081386c49 +# https://aws.amazon.com/premiumsupport/knowledge-center/iam-ec2-resource-tags/ + + +data "aws_iam_policy_document" "runner_implicit_role" { + statement { + actions = ["sts:AssumeRole"] + + principals { + type = "Service" + identifiers = ["ec2.amazonaws.com"] + } + } +} + +resource "aws_iam_role" "runner_implicit_role" { + provider = aws.osquery-dev + name = "GitHubRunnerImplicitIamRole" + assume_role_policy = data.aws_iam_policy_document.runner_implicit_role.json +} + +# Create IAM policy to give implicit role permission to assume broad IAM Role +data "aws_iam_policy_document" "runner_role_permit_sts_assume" { + statement { + actions = ["sts:AssumeRole"] + resources = [ aws_iam_role.runner_bootstrap.arn ] + } +} + +resource "aws_iam_policy" "runner_role_permit_sts_assume" { + provider = aws.osquery-dev + name = "GitHubRunnerPolicyPermitStsAssume" + policy = data.aws_iam_policy_document.runner_role_permit_sts_assume.json +} + +resource "aws_iam_role_policy_attachment" "runner_attach_implicit_role_to_sts_assume_policy" { + provider = aws.osquery-dev + role = aws_iam_role.runner_implicit_role.name + policy_arn = aws_iam_policy.runner_role_permit_sts_assume.arn + lifecycle { + prevent_destroy = true + } +} + +resource "aws_iam_instance_profile" "runner_implicit_instance_profile" { + provider = aws.osquery-dev + name = "GitHubRunnerImplicitIamRole" + role = aws_iam_role.runner_implicit_role.name +} + + +## +## Bootstrap / Initialization Role +## + +data "aws_iam_policy_document" "runner_bootstrap" { + statement { + principals { + type = "AWS" + identifiers = [aws_iam_role.runner_implicit_role.arn] + } + actions = [ "sts:AssumeRole" ] + } +} + +resource "aws_iam_role" "runner_bootstrap" { + provider = aws.osquery-dev + name = "GitHubRunnerAssumedBootstrapRole" + assume_role_policy = data.aws_iam_policy_document.runner_bootstrap.json +} diff --git a/terraform/aws-ci/vpc.tf b/terraform/aws-ci/vpc.tf index ee98191..7bb23b5 100644 --- a/terraform/aws-ci/vpc.tf +++ b/terraform/aws-ci/vpc.tf @@ -19,7 +19,7 @@ locals { { rule_number = 120 rule_action = "allow" - from_port = 0 + from_port = 1024 to_port = 65535 protocol = "tcp" cidr_block = "24.61.10.2/32" # seph's house @@ -28,8 +28,6 @@ locals { } } - - module "vpc" { source = "terraform-aws-modules/vpc/aws" name = "GitHubRunners" @@ -44,9 +42,16 @@ module "vpc" { public_inbound_acl_rules = local.network_acls["public_inbound"] public_outbound_acl_rules = local.network_acls["public_outbound"] + manage_default_network_acl = true + # TODO private_dedicated_network_acl = false + # FIXME: I can't get this to work + enable_sts_endpoint = true + sts_endpoint_security_group_ids = [data.aws_security_group.default.id] + sts_endpoint_subnet_ids = module.vpc.public_subnets + #sts_endpoint_private_dns_enabled = true enable_public_s3_endpoint = true @@ -61,3 +66,18 @@ module "vpc" { } } + +# I'm not totally sure why we need these. It seems to be a +# self-referencial loop. But, the docs for this module suggest it, and +# it's probably to avoid a weird race in creation ordering. +data "aws_security_group" "default" { + provider = aws.osquery-dev + name = "default" + vpc_id = module.vpc.vpc_id +} + +#data "aws_subnet" "vpc-public" { +# provider = aws.osquery-dev +# name = "public_subnets" +# vpc_id = module.vpc.vpc_id +#} From 56df777861994cddfc71218501ac16edf6ac86a2 Mon Sep 17 00:00:00 2001 From: seph Date: Wed, 24 Mar 2021 20:49:28 -0400 Subject: [PATCH 7/9] closer --- terraform/aws-ci/ec2.tf | 56 ++++++++++++++++++++++++++++++++++++ terraform/aws-ci/iam.tf | 33 ++++++++++++++++++++- terraform/aws-ci/sts-test.sh | 25 ++++++++++++++++ 3 files changed, 113 insertions(+), 1 deletion(-) create mode 100644 terraform/aws-ci/ec2.tf create mode 100644 terraform/aws-ci/sts-test.sh diff --git a/terraform/aws-ci/ec2.tf b/terraform/aws-ci/ec2.tf new file mode 100644 index 0000000..0c6c90d --- /dev/null +++ b/terraform/aws-ci/ec2.tf @@ -0,0 +1,56 @@ +resource "aws_launch_template" "runner" { + provider = aws.osquery-dev + + name = "GitHubRunner" + + # This will cause terraform to autoupdate the version. Which can + # break a staging/prod seperation, but we're small. + update_default_version = true + + iam_instance_profile { + arn = aws_iam_instance_profile.runner_implicit_instance_profile.arn + } + + ebs_optimized = "true" + image_id = "ami-08f2dbe31f794898b" + key_name = "seph-osquery-dev" + + #network_interfaces { + # associate_public_ip_address = true + # delete_on_termination = true + #} + + # subnet_id = module.vpc.module.vpc[0].arn + + vpc_security_group_ids = [ + module.vpc.default_security_group_id + ] + + instance_type = "r6g.large" + instance_market_options { + market_type = "spot" + } +} + + +resource "aws_launch_template" "sephtestrunner" { + provider = aws.osquery-dev + name = "sephTestGitHubRunner" + + iam_instance_profile { + arn = "arn:aws:iam::204725418487:instance-profile/OsqueryGitHubRunners" + } + ebs_optimized = "false" + image_id = "ami-08f2dbe31f794898b" + key_name = "seph-osquery-dev" + + vpc_security_group_ids = [ + "sg-0447741384aa67749", + ] + + instance_type = "r6g.large" + instance_market_options { + market_type = "spot" + } + +} diff --git a/terraform/aws-ci/iam.tf b/terraform/aws-ci/iam.tf index 9f5b895..5beb5ae 100644 --- a/terraform/aws-ci/iam.tf +++ b/terraform/aws-ci/iam.tf @@ -15,7 +15,10 @@ # References: # https://medium.com/swlh/aws-iam-assuming-an-iam-role-from-an-ec2-instance-882081386c49 # https://aws.amazon.com/premiumsupport/knowledge-center/iam-ec2-resource-tags/ - +# +# Some testing snippets. These should get documented elsewhere +# +# data "aws_iam_policy_document" "runner_implicit_role" { statement { @@ -63,6 +66,31 @@ resource "aws_iam_instance_profile" "runner_implicit_instance_profile" { role = aws_iam_role.runner_implicit_role.name } +## +## Policies used in bootstrapping +## + +data "aws_iam_policy_document" "runner_secret_reader" { + statement { + actions = [ + "secretsmanager:DescribeSecret", + "secretsmanager:GetResourcePolicy", + "secretsmanager:GetSecretValue", + "secretsmanager:ListSecretVersionIds", + ] + resources = [ + "arn:aws:secretsmanager:*:204725418487:secret:OSQUERY_GITHUB_RUNNER_TOKEN-9N6Lwh", + ] + } +} + +resource "aws_iam_policy" "runner_secret_reader" { + provider = aws.osquery-dev + name = "OsqueryGitHubRunnerSecretReader" + description = "Read access to the github runner secrets" + policy = data.aws_iam_policy_document.runner_secret_reader.json +} + ## ## Bootstrap / Initialization Role @@ -82,4 +110,7 @@ resource "aws_iam_role" "runner_bootstrap" { provider = aws.osquery-dev name = "GitHubRunnerAssumedBootstrapRole" assume_role_policy = data.aws_iam_policy_document.runner_bootstrap.json + managed_policy_arns = [ + "arn:aws:iam::204725418487:policy/OsqueryGitHubRunnerSecretReader", # This was created out-of-band + ] } diff --git a/terraform/aws-ci/sts-test.sh b/terraform/aws-ci/sts-test.sh new file mode 100644 index 0000000..e05603e --- /dev/null +++ b/terraform/aws-ci/sts-test.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +set -e + +echo "Starting with Permissions: " +aws sts get-caller-identity + +echo "Assuming new role" +TMPFILE=$(mktemp) + +aws sts assume-role \ + --role-arn arn:aws:iam::204725418487:role/GitHubRunnerAssumedBootstrapRole \ + --role-session-name seph-test \ + > $TMPFILE + + +export AWS_ACCESS_KEY_ID=$(jq -rc .Credentials.AccessKeyId $TMPFILE) +export AWS_SECRET_ACCESS_KEY=$(jq -rc .Credentials.SecretAccessKey $TMPFILE) +export AWS_SESSION_TOKEN=$(jq -rc .Credentials.SessionToken $TMPFILE) + +echo "New Role:" +aws sts get-caller-identity + +echo "Test Secret Reading" +aws --region us-east-1 secretsmanager get-secret-value --secret-id OSQUERY_GITHUB_RUNNER_TOKEN | jq .ARN From 17acd14f96e1ab26f41a7ccca83d0f072ecc50d4 Mon Sep 17 00:00:00 2001 From: seph Date: Wed, 24 Mar 2021 21:46:54 -0400 Subject: [PATCH 8/9] iterate --- terraform/aws-ci/iam.tf | 13 ++++++++++++- terraform/aws-ci/sts-test.sh | 1 + 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/terraform/aws-ci/iam.tf b/terraform/aws-ci/iam.tf index 5beb5ae..fd02963 100644 --- a/terraform/aws-ci/iam.tf +++ b/terraform/aws-ci/iam.tf @@ -38,6 +38,7 @@ resource "aws_iam_role" "runner_implicit_role" { } # Create IAM policy to give implicit role permission to assume broad IAM Role +# Sadly, we cannot use tags to restrict this. So say the docs (and it doesn't work) data "aws_iam_policy_document" "runner_role_permit_sts_assume" { statement { actions = ["sts:AssumeRole"] @@ -51,6 +52,8 @@ resource "aws_iam_policy" "runner_role_permit_sts_assume" { policy = data.aws_iam_policy_document.runner_role_permit_sts_assume.json } + + resource "aws_iam_role_policy_attachment" "runner_attach_implicit_role_to_sts_assume_policy" { provider = aws.osquery-dev role = aws_iam_role.runner_implicit_role.name @@ -81,6 +84,13 @@ data "aws_iam_policy_document" "runner_secret_reader" { resources = [ "arn:aws:secretsmanager:*:204725418487:secret:OSQUERY_GITHUB_RUNNER_TOKEN-9N6Lwh", ] + + condition { + test = "StringEquals" + variable = "aws:TagKeys" + values = [ "Bootstrapping" ] + } + } } @@ -107,10 +117,11 @@ data "aws_iam_policy_document" "runner_bootstrap" { } resource "aws_iam_role" "runner_bootstrap" { + # FIXME: where to we set the trust policy here? provider = aws.osquery-dev name = "GitHubRunnerAssumedBootstrapRole" assume_role_policy = data.aws_iam_policy_document.runner_bootstrap.json managed_policy_arns = [ - "arn:aws:iam::204725418487:policy/OsqueryGitHubRunnerSecretReader", # This was created out-of-band + aws_iam_policy.runner_secret_reader.arn, ] } diff --git a/terraform/aws-ci/sts-test.sh b/terraform/aws-ci/sts-test.sh index e05603e..22050fa 100644 --- a/terraform/aws-ci/sts-test.sh +++ b/terraform/aws-ci/sts-test.sh @@ -17,6 +17,7 @@ aws sts assume-role \ export AWS_ACCESS_KEY_ID=$(jq -rc .Credentials.AccessKeyId $TMPFILE) export AWS_SECRET_ACCESS_KEY=$(jq -rc .Credentials.SecretAccessKey $TMPFILE) export AWS_SESSION_TOKEN=$(jq -rc .Credentials.SessionToken $TMPFILE) +rm -f "$TMPFILE" echo "New Role:" aws sts get-caller-identity From 023cfd3d7945d2ec3d4c4a1a785c8ab230f5692a Mon Sep 17 00:00:00 2001 From: seph Date: Thu, 25 Mar 2021 01:13:55 -0400 Subject: [PATCH 9/9] seems to work --- terraform/aws-ci/iam.tf | 109 +++++++++++++++++++++++++++++++---- terraform/aws-ci/sts-test.sh | 20 +++++++ 2 files changed, 117 insertions(+), 12 deletions(-) diff --git a/terraform/aws-ci/iam.tf b/terraform/aws-ci/iam.tf index fd02963..01c8584 100644 --- a/terraform/aws-ci/iam.tf +++ b/terraform/aws-ci/iam.tf @@ -1,3 +1,13 @@ +# FIXME: I have IAM conditions backwards. They apply to the _target_ +# not the source. As such, they are not suitable for this kind of +# scheme. Study https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_policies_condition-keys.html and be sad +# +# Maybe security groups??? +# +# Maybe assign a specific policy? +# +# attach/replace policy? + # the crux of our permissions problem, is that we need to _bootstrap_ # a machine, which requires elevated permissions. And then we need to # drop permissions, and start a runner. This is implemented using IAM @@ -20,6 +30,7 @@ # # +# This policy lets EC2 assume this node's role data "aws_iam_policy_document" "runner_implicit_role" { statement { actions = ["sts:AssumeRole"] @@ -31,19 +42,46 @@ data "aws_iam_policy_document" "runner_implicit_role" { } } + +# bootstrap instance profile resource "aws_iam_role" "runner_implicit_role" { provider = aws.osquery-dev name = "GitHubRunnerImplicitIamRole" assume_role_policy = data.aws_iam_policy_document.runner_implicit_role.json } +resource "aws_iam_instance_profile" "runner_implicit_instance_profile" { + provider = aws.osquery-dev + name = "GitHubRunnerImplicitIamRole" + role = aws_iam_role.runner_implicit_role.name +} + + +# Runtime intance profile +resource "aws_iam_role" "runner_runtime_implicit_role" { + provider = aws.osquery-dev + name = "GitHubRunnerRuntimeImplicitIamRole" + assume_role_policy = data.aws_iam_policy_document.runner_implicit_role.json +} + +resource "aws_iam_instance_profile" "runner_runtime_implicit_role" { + provider = aws.osquery-dev + name = "GitHubRunnerRuntimeImplicitIamRole" + role = aws_iam_role.runner_runtime_implicit_role.name +} + + + # Create IAM policy to give implicit role permission to assume broad IAM Role # Sadly, we cannot use tags to restrict this. So say the docs (and it doesn't work) data "aws_iam_policy_document" "runner_role_permit_sts_assume" { statement { actions = ["sts:AssumeRole"] resources = [ aws_iam_role.runner_bootstrap.arn ] + + # FIXME: tag conditions here? } + } resource "aws_iam_policy" "runner_role_permit_sts_assume" { @@ -63,11 +101,6 @@ resource "aws_iam_role_policy_attachment" "runner_attach_implicit_role_to_sts_as } } -resource "aws_iam_instance_profile" "runner_implicit_instance_profile" { - provider = aws.osquery-dev - name = "GitHubRunnerImplicitIamRole" - role = aws_iam_role.runner_implicit_role.name -} ## ## Policies used in bootstrapping @@ -85,11 +118,12 @@ data "aws_iam_policy_document" "runner_secret_reader" { "arn:aws:secretsmanager:*:204725418487:secret:OSQUERY_GITHUB_RUNNER_TOKEN-9N6Lwh", ] - condition { - test = "StringEquals" - variable = "aws:TagKeys" - values = [ "Bootstrapping" ] - } + #condition { + # test = "StringEquals" + # variable = "aws:TagKeys" + # values = [ "Bootstrapping" ] + #} + } } @@ -102,6 +136,41 @@ resource "aws_iam_policy" "runner_secret_reader" { } + +data "aws_iam_policy_document" "ec2_instance_downgrader" { + statement { + actions = [ + "ec2:ReplaceIamInstanceProfileAssociation", + "ec2:DescribeIamInstanceProfileAssociations", + "iam:PassRole", # Needed to scope this account to passing this role + ] + resources = [ + "*" + ] + + # TODO: conditions? + # iam:RoleName GitHubRunnerRuntimeImplicitIamRole + # aws:Resource role/GitHubRunnerRuntimeImplicitIamRole + #condition { + # test = "StringEquals" + # variable = "aws:TagKeys" + # values = [ "Bootstrapping" ] + #} + + + } +} + +resource "aws_iam_policy" "ec2_instance_downgrader" { + provider = aws.osquery-dev + name = "GitHubRunnerInstanceDowngrader" + description = "Permission to downgrade an instances IAM role" + policy = data.aws_iam_policy_document.ec2_instance_downgrader.json +} + + + + ## ## Bootstrap / Initialization Role ## @@ -113,15 +182,31 @@ data "aws_iam_policy_document" "runner_bootstrap" { identifiers = [aws_iam_role.runner_implicit_role.arn] } actions = [ "sts:AssumeRole" ] + + # https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_policies_condition-keys.html#condition-keys-principaltag + #condition { + # test = "StringEquals" + # variable = "aws:PrincipalTag/Bootstrapping" + # values = [ "true" ] + #} } } resource "aws_iam_role" "runner_bootstrap" { - # FIXME: where to we set the trust policy here? provider = aws.osquery-dev name = "GitHubRunnerAssumedBootstrapRole" assume_role_policy = data.aws_iam_policy_document.runner_bootstrap.json managed_policy_arns = [ - aws_iam_policy.runner_secret_reader.arn, + aws_iam_policy.runner_secret_reader.arn, + aws_iam_policy.ec2_instance_downgrader.arn, ] } + + +## +## Runtime Permissions +## + +data "aws_iam_policy_document" "runner_runtime_implicit_role" { + +} diff --git a/terraform/aws-ci/sts-test.sh b/terraform/aws-ci/sts-test.sh index 22050fa..4dbd014 100644 --- a/terraform/aws-ci/sts-test.sh +++ b/terraform/aws-ci/sts-test.sh @@ -2,6 +2,8 @@ set -e +AWS_INSTANCE_ID=$(curl http://169.254.169.254/latest/meta-data/instance-id) + echo "Starting with Permissions: " aws sts get-caller-identity @@ -24,3 +26,21 @@ aws sts get-caller-identity echo "Test Secret Reading" aws --region us-east-1 secretsmanager get-secret-value --secret-id OSQUERY_GITHUB_RUNNER_TOKEN | jq .ARN + +echo "Drop Permissions" + +# This is stupid +TMP_ASSOC=$(mktemp) + +aws --region us-east-1 ec2 describe-iam-instance-profile-associations \ + --filters Name=instance-id,Values="$AWS_INSTANCE_ID" \ + > $TMP_ASSOC + +ASSOC_ID=$(jq -rc .IamInstanceProfileAssociations[0].AssociationId $TMP_ASSOC) + +aws --region us-east-1 ec2 replace-iam-instance-profile-association \ + --association-id "$ASSOC_ID" \ + --iam-instance-profile Arn=arn:aws:iam::204725418487:instance-profile/GitHubRunnerRuntimeImplicitIamRole + +# Is there a better logout? +unset AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY AWS_SESSION_TOKEN