Skip to content
This repository has been archived by the owner on Oct 24, 2020. It is now read-only.

Commit

Permalink
feat: use cloudwatch logs for container output
Browse files Browse the repository at this point in the history
Uses a CloudWatch LogGroup for ardere test plan runs. Also includes
a refactor of the handler into a step_function file and a separate
handler.py for Lambda as it requires some quirky path manipulation
for AWS to see ardere as a top-level Python package.

Closes #27
  • Loading branch information
bbangert committed Mar 17, 2017
1 parent 4dd67db commit 8bafa09
Show file tree
Hide file tree
Showing 7 changed files with 287 additions and 206 deletions.
45 changes: 32 additions & 13 deletions ardere/aws.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,22 +38,31 @@ class ECSManager(object):
}

def __init__(self, plan):
# type: (str) -> None
# type: (Dict[str, Any]) -> None
"""Create and return a ECSManager for a cluster of the given name."""
self._ecs_client = self.boto.client('ecs')
self._ec2_client = self.boto.client('ec2')
self._ecs_name = plan["ecs_name"]
self._plan = plan

# Pull out the env vars
self.s3_ready_bucket = os.environ["s3_ready_bucket"]
self.container_log_group = os.environ["container_log_group"]
self.ecs_profile = os.environ["ecs_profile"]

if "plan_run_uuid" not in plan:
plan["plan_run_uuid"] = str(uuid.uuid4())

self._plan_uuid = plan["plan_run_uuid"]

@property
def plan_uuid(self):
return self._plan_uuid

@property
def s3_ready_file(self):
return "https://s3.amazonaws.com/{bucket}/{key}".format(
bucket=os.environ["s3_ready_bucket"],
bucket=self.s3_ready_bucket,
key="{}.ready".format(self._plan_uuid)
)

Expand Down Expand Up @@ -112,7 +121,7 @@ def request_instances(self, instances):
InstanceType=instance_type,
UserData="#!/bin/bash \necho ECS_CLUSTER='" + self._ecs_name +
"' >> /etc/ecs/ecs.config",
IamInstanceProfile={"Arn": os.environ["ecs_profile"]}
IamInstanceProfile={"Arn": self.ecs_profile}
)

# Track returned instances for tagging step
Expand All @@ -133,17 +142,17 @@ def create_service(self, step):
logger.info("CreateService called with: {}".format(step))

# Prep the shell command
shell_command = [
'sh', '-c', '"$WAITFORCLUSTER"',
'waitforcluster.sh', self.s3_ready_file,
str(step.get("run_delay", 0))
]
shell_command2 = ' '.join(shell_command) + ' && ' + step[
"additional_command_args"]
shell_command3 = ['sh', '-c', '{}'.format(shell_command2)]
wfc_var = '__ARDERE_WAITFORCLUSTER_SH__'
wfc_cmd = 'sh -c "${}" waitforcluster.sh {} {}'.format(
wfc_var,
self.s3_ready_file,
step.get("run_delay", 0)
)
service_cmd = step["additional_command_args"]
cmd = ['sh', '-c', '{} && {}'.format(wfc_cmd, service_cmd)]

# Prep the env vars
env_vars = [{"name": "WAITFORCLUSTER", "value": shell_script}]
env_vars = [{"name": wfc_var, "value": shell_script}]
for env_var in step.get("environment_data", []):
name, value = env_var.split("=", 1)
env_vars.append({"name": name, "value": value})
Expand All @@ -160,7 +169,17 @@ def create_service(self, step):
# using only memoryReservation sets no hard limit
"memoryReservation": 256,
"environment": env_vars,
"entryPoint": shell_command3
"entryPoint": cmd,
"logConfiguration": {
"logDriver": "awslogs",
"options": {
"awslogs-group": self.container_log_group,
"awslogs-region": "us-east-1",
"awslogs-stream-prefix": "ardere-{}".format(
self.plan_uuid
)
}
}
}
],
placementConstraints=[
Expand Down
129 changes: 0 additions & 129 deletions ardere/handler.py

This file was deleted.

159 changes: 159 additions & 0 deletions ardere/step_functions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
import os
import logging
import time
from collections import defaultdict

import boto3
import botocore
from typing import Any, Dict, List # noqa

from ardere.aws import ECSManager
from ardere.exceptions import (
ServicesStartingException,
ShutdownPlanException
)

logger = logging.getLogger()
logger.setLevel(logging.INFO)


class AsynchronousPlanRunner(object):
"""Asynchronous Test Plan Runner
This step function based runner handles running a test plan in an
asynchronous manner, where each step will wait for its run_delay if
present before running.
"""
# For testing purposes
boto = boto3

def __init__(self, event, context):
logger.info("Called with {}".format(event))
logger.info("Environ: {}".format(os.environ))
self.ecs = ECSManager(plan=event)
self.event = event
self.context = context

def _build_instance_map(self):
"""Given a JSON test-plan, build and return a dict of instance types
and how many should exist for each type."""
instances = defaultdict(int)
for step in self.event["steps"]:
instances[step["instance_type"]] += step["instance_count"]
return instances

def _find_test_plan_duration(self):
# type: (Dict[str, Any]) -> int
"""Locates and calculates the longest test plan duration from its
delay through its duration of the plan."""
return max(
[x.get("run_delay", 0) + x["run_max_time"] for x in
self.event["steps"]]
)

def populate_missing_instances(self):
"""Populate any missing EC2 instances needed for the test plan in the
cluster
Step 1
"""
needed = self._build_instance_map()
logger.info("Plan instances needed: {}".format(needed))
current_instances = self.ecs.query_active_instances()
missing_instances = self.ecs.calculate_missing_instances(
desired=needed, current=current_instances
)
if missing_instances:
logger.info("Requesting instances: {}".format(missing_instances))
self.ecs.request_instances(missing_instances)
return self.event

def create_ecs_services(self):
"""Create all the ECS services needed
Step 2
"""
self.ecs.create_services(self.event["steps"])
return self.event

def wait_for_cluster_ready(self):
"""Check all the ECS services to see if they're ready
Step 3
"""
if not self.ecs.all_services_ready(self.event["steps"]):
raise ServicesStartingException()
return self.event

def signal_cluster_start(self):
"""Drop a ready file in S3 to trigger the test plan to being
Step 4
"""
s3_client = self.boto.client('s3')
s3_client.put_object(
ACL="public-read",
Body=b'{}'.format(int(time.time())),
Bucket=os.environ["s3_ready_bucket"],
Key="{}.ready".format(self.ecs.plan_uuid),
Metadata={
"ECSCluster": self.event["ecs_name"]
}
)
return self.event

def check_for_cluster_done(self):
"""Check all the ECS services to see if they've run for their
specified duration
Step 5
"""
# Check to see if the S3 file is still around
s3 = self.boto.resource('s3')
try:
ready_file = s3.Object(
os.environ["s3_ready_bucket"],
"{}.ready".format(self.ecs.plan_uuid)
)
except botocore.exceptions.ClientError:
# Error getting to the bucket/key, abort test run
raise ShutdownPlanException("Error accessing ready file")

file_contents = ready_file.get()['Body'].read().decode('utf-8')
start_time = int(file_contents)

# Update to running count 0 any services that should halt by now
self.ecs.stop_finished_services(start_time, self.event["steps"])

# If we're totally done, exit.
now = time.time()
plan_duration = self._find_test_plan_duration()
if now > (start_time + plan_duration):
raise ShutdownPlanException("Test Plan has completed")
return self.event

def cleanup_cluster(self):
"""Shutdown all ECS services and deregister all task definitions
Step 6
"""
self.ecs.shutdown_plan(self.event["steps"])

# Attempt to remove the S3 object
s3 = self.boto.resource('s3')
try:
ready_file = s3.Object(
os.environ["s3_ready_bucket"],
"{}.ready".format(self.ecs.plan_uuid)
)
ready_file.delete()
except botocore.exceptions.ClientError:
pass
return self.event
Loading

0 comments on commit 8bafa09

Please sign in to comment.