Skip to content
This repository has been archived by the owner on Jan 19, 2022. It is now read-only.

Check asg health check is done #188

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 6 additions & 0 deletions README.rst
Expand Up @@ -115,6 +115,10 @@ The ``ec2`` key configures the EC2 instances created by auto-scaling groups (ASG
Maximum number of instances to scale up to
``min``
Minimum number of instances to maintain.
``health_check_grace_period``
Seconds before running the healthcheck on an instance. Default 300
``health_check_type``
Use EC2 or ELB healthcheck types. Default EC2

Example::

Expand All @@ -125,6 +129,8 @@ The ``ec2`` key configures the EC2 instances created by auto-scaling groups (ASG
desired: 1
max: 3
min: 0
health_check_grace_period: 360
health_check_type: ELB

:``tags``:
A dictionary of tag name to value to apply to all instances of the ASG. Note that the environment you select via ``fab aws`` will be applied as a tag with a name of ``Env``.
Expand Down
30 changes: 29 additions & 1 deletion bootstrap_cfn/autoscale.py
Expand Up @@ -8,7 +8,7 @@

from bootstrap_cfn import utils

from bootstrap_cfn.errors import AutoscalingInstanceCountError, AutoscalingGroupNotFound
from bootstrap_cfn.errors import AutoscalingGroupNotFound, AutoscalingInstanceCountError


class Autoscale:
Expand Down Expand Up @@ -81,6 +81,13 @@ def cycle_instances(self,
logging.getLogger("bootstrap-cfn").info("cycle_instances: Found {} instance ids, {}"
.format(len(current_instance_ids), current_instance_ids))

# save the number of instances before starting the upgrade
num_instances = len(current_instance_ids)

# get the ASG HealthCheckGracePeriod
health_check_grace_period = self.group.health_check_period
logging.getLogger("bootstrap-cfn").info("ASG HealthCheckGracePeriod: %s" % health_check_grace_period)

# Iterate through the current instances, replacing current instances with new ones
for current_instance_id in current_instance_ids:
# Set the desired instances +1 and wait for it to be created
Expand All @@ -89,9 +96,30 @@ def cycle_instances(self,
self.wait_for_instances(len(current_instance_ids) + 1)
logging.getLogger("bootstrap-cfn").info("cycle_instances: Terminating recycled instance {} after {} seconds..."
.format(current_instance_id, termination_delay))
# wait for the same time as the "HealthCheckGracePeriod" in the ASG
logging.getLogger("bootstrap-cfn").info("Waiting %ss - HealthCheckGracePeriod" % health_check_grace_period)
time.sleep(health_check_grace_period)
logging.getLogger("bootstrap-cfn").info("End of waiting period")

# check if the number of healthy instances is = to the number of expected instances, where
# expected instances is num_instances + 1
new_curr_inst_ids = [instance.get('InstanceId') for instance in self.get_healthy_instances()]
logging.getLogger("bootstrap-cfn").info("new instance list %r" % new_curr_inst_ids)
if len(new_curr_inst_ids) != num_instances + 1:
logging.getLogger("bootstrap-cfn").error("Expected %s instances, found %s." % (
num_instances + 1, len(new_curr_inst_ids))
)
raise AutoscalingInstanceCountError(self.group.name, num_instances + 1, new_curr_inst_ids)
else:
logging.getLogger("bootstrap-cfn").info("Expected %s instances, found %s." % (
num_instances + 1, len(new_curr_inst_ids))
)

# If we have a delay before termination defined, delay before terminating the current instance
if termination_delay:
logging.getLogger("bootstrap-cfn").info("Waiting %ss - termination_delay" % termination_delay)
time.sleep(termination_delay)
logging.getLogger("bootstrap-cfn").info("End of waiting period")
client.terminate_instance_in_auto_scaling_group(
InstanceId=current_instance_id,
ShouldDecrementDesiredCapacity=True
Expand Down
100 changes: 88 additions & 12 deletions bootstrap_cfn/config.py
Expand Up @@ -91,14 +91,12 @@ def process(self):
def base_template(self):
from bootstrap_cfn import vpc
t = Template()
if 'os' in self.data['ec2'] and self.data['ec2']['os'] == 'windows2012':
t.add_mapping("AWSRegion2AMI", {
"eu-west-1": {"AMI": "ami-7943ec0a"},
})
else:
t.add_mapping("AWSRegion2AMI", {
"eu-west-1": {"AMI": "ami-00d88f77"},
})

# Get the OS specific data
os_data = self._get_os_data()
t.add_mapping("AWSRegion2AMI", {
os_data.get('region'): {"AMI": os_data.get('ami')},
})

if 'vpc' in self.data:
logging.info('bootstrap-cfn::base_template: Using configuration VPC address settings')
Expand Down Expand Up @@ -837,10 +835,30 @@ def ref_fixup(x):
return dict([(k, ref_fixup(v)) for k, v in o.items()])

def get_ec2_userdata(self):
"""
Build and return the user_data that'll be used for ec2 instances.
This contains a series of required entries, default config, and
and data specified in the template.
"""
os_data = self._get_os_data()
data = self.data['ec2']

parts = []

ami_type = os_data.get('type')

# Below is the ami flavour specific defaults
if ami_type == 'linux':
parts.append({
'content': yaml.dump(
{
'package_update': True,
'package_upgrade': True,
'package_reboot_if_required': True
}
),
'mime_type': 'text/cloud-config'
})

boothook = self.get_hostname_boothook(data)

if boothook:
Expand Down Expand Up @@ -1010,15 +1028,30 @@ def ec2(self):
logging.warning("config: Tag '%s' is deprecated.."
% (k))

# Setup ASG defaults
auto_scaling_config = data.get('auto_scaling', {})
asg_min_size = auto_scaling_config.get('min', 1)
asg_max_size = auto_scaling_config.get('max', 5)
asg_desired_size = auto_scaling_config.get('desired', 2)
health_check_type = auto_scaling_config.get('health_check_type', 'EC2').upper()
# The basic EC2 healthcheck has a low grace period need, if we switch to ELB then
# theres a lot more setup to be done before we should attempt a healthcheck
if health_check_type == 'ELB':
default_health_check_grace_period = 600
else:
default_health_check_grace_period = 300
health_check_grace_period = auto_scaling_config.get('health_check_grace_period', default_health_check_grace_period)
scaling_group = AutoScalingGroup(
"ScalingGroup",
VPCZoneIdentifier=[Ref("SubnetA"), Ref("SubnetB"), Ref("SubnetC")],
MinSize=data['auto_scaling']['min'],
MaxSize=data['auto_scaling']['max'],
DesiredCapacity=data['auto_scaling']['desired'],
MinSize=asg_min_size,
MaxSize=asg_max_size,
DesiredCapacity=asg_desired_size,
AvailabilityZones=GetAZs(),
Tags=ec2_tags,
LaunchConfigurationName=Ref(launch_config),
HealthCheckGracePeriod=health_check_grace_period,
HealthCheckType=health_check_type,
)
resources.append(scaling_group)

Expand All @@ -1045,3 +1078,46 @@ def _attach_elbs(self, template):
template.resources[asgs[0].title] = asgs[0]

return template

def _get_os_data(self):
"""
Get details about the OS from the config data

Return:
os_data(dict): Dictionary of OS data in the form
{
'name': 'ubuntu-1404',
'ami': 'ami-464af835',
'region': 'eu-west-1',
'distribution': 'ubuntu',
'type': 'linux',
'release': '20160217.1'
}

Exceptions:
OSTypeNotFoundError: Raised when the OS in the config file is not
recognised
"""
os_default = 'ubuntu-1404'
available_types = {
'ubuntu-1404': {
'name': 'ubuntu-1404',
'ami': 'ami-464af835',
'region': 'eu-west-1',
'distribution': 'ubuntu',
'type': 'linux',
'release': '20160217.1'
},
'windows2012': {
'name': 'windows2012',
'ami': 'ami-7943ec0a',
'region': 'eu-west-1',
'distribution': 'windows',
'type': 'windows',
'release': '2015.12.31'
}
}
os_choice = self.data['ec2'].get('os', os_default)
if not available_types.get(os_choice, False):
raise errors.OSTypeNotFoundError(self.data['ec2']['os'], available_types.keys())
return available_types.get(os_choice)
10 changes: 8 additions & 2 deletions bootstrap_cfn/errors.py
Expand Up @@ -60,14 +60,20 @@ class CloudResourceNotFoundError(BootstrapCfnError):
pass


class OSTypeNotFoundError(BootstrapCfnError):
def __init__(self, type, available_types):
msg = ("The os type '{}' is not recognised, should be one of {}. "
.format(type, available_types))
super(OSTypeNotFoundError, self).__init__(msg)


class AutoscalingGroupNotFound(BootstrapCfnError):
pass


class AutoscalingInstanceCountError(BootstrapCfnError):
def __init__(self, autoscaling_group, expected_instance_count, instances):
super(ProfileNotFoundError, self).__init__(
super(AutoscalingInstanceCountError, self).__init__(
"Could not find {} instances in autoscaling group {}. Actual state is {} instances, {}"
.format(expected_instance_count, autoscaling_group, len(instances), instances)
)
pass
1 change: 1 addition & 0 deletions bootstrap_cfn/fab_tasks.py
Expand Up @@ -20,6 +20,7 @@
from bootstrap_cfn.iam import IAM
from bootstrap_cfn.r53 import R53
from bootstrap_cfn.utils import tail
from bootstrap_cfn.vpc import VPC


# Default fab config. Set via the tasks below or --set
Expand Down
20 changes: 12 additions & 8 deletions tests/tests.py
Expand Up @@ -757,7 +757,7 @@ def test_process(self):

mappings = cfn_template['Mappings']
expected = {
'AWSRegion2AMI': {'eu-west-1': {'AMI': 'ami-00d88f77'}},
'AWSRegion2AMI': {'eu-west-1': {'AMI': 'ami-464af835'}},
'SubnetConfig': {
'VPC': {
'CIDR': '10.0.0.0/16',
Expand Down Expand Up @@ -820,7 +820,7 @@ def test_process_with_vpc_config(self):

mappings = cfn_template['Mappings']
expected = {
'AWSRegion2AMI': {'eu-west-1': {'AMI': 'ami-00d88f77'}},
'AWSRegion2AMI': {'eu-west-1': {'AMI': 'ami-464af835'}},
'SubnetConfig': {
'VPC': {
'CIDR': '172.22.0.0/16',
Expand Down Expand Up @@ -1265,6 +1265,8 @@ def test_ec2(self):
VPCZoneIdentifier=[Ref("SubnetA"), Ref("SubnetB"), Ref("SubnetC")],
LaunchConfigurationName=Ref("BaseHostLaunchConfig"),
AvailabilityZones=GetAZs(""),
HealthCheckGracePeriod=300,
HealthCheckType='EC2'
)

BaseHostSG = SecurityGroup(
Expand Down Expand Up @@ -1384,9 +1386,10 @@ def test_get_ec2_userdata(self):
with patch.object(config, 'get_hostname_boothook', return_value={"content": "sentinel"}) as mock_boothook:
user_data_parts = config.get_ec2_userdata()
mock_boothook.assert_called_once_with(data['ec2'])

compare(yaml.load(user_data_parts[1]['content']), data['ec2']['cloud_config'])
compare(user_data_parts[0]['content'], 'sentinel')
# We have linux, so we put package update data in first
compare(user_data_parts[0]['content'], '{package_reboot_if_required: true, package_update: true, package_upgrade: true}\n')
compare(user_data_parts[1]['content'], 'sentinel')
compare(yaml.load(user_data_parts[2]['content']), data['ec2']['cloud_config'])

def test_get_ec2_userdata_no_cloud_config(self):
# If there is no cloud config we should get a default
Expand All @@ -1399,9 +1402,10 @@ def test_get_ec2_userdata_no_cloud_config(self):
with patch.object(config, 'get_hostname_boothook', return_value={"content": "sentinel"}) as mock_boothook:
user_data_parts = config.get_ec2_userdata()
mock_boothook.assert_called_once_with(data['ec2'])

compare(yaml.load(user_data_parts[1]['content']), {'manage_etc_hosts': True})
compare(user_data_parts[0]['content'], 'sentinel')
# We have linux, so we put package update data in first
compare(user_data_parts[0]['content'], '{package_reboot_if_required: true, package_update: true, package_upgrade: true}\n')
compare(user_data_parts[1]['content'], 'sentinel')
compare(yaml.load(user_data_parts[2]['content']), {'manage_etc_hosts': True})

def test_get_hostname_boothook(self):
config = ConfigParser({}, environment="env", application="test", stack_name="my-stack")
Expand Down