ministryofjustice · sevenmachines · Feb 26, 2016 · Feb 23, 2016 · Feb 23, 2016 · Feb 23, 2016
diff --git a/README.rst b/README.rst
@@ -115,6 +115,10 @@ The ``ec2`` key configures the EC2 instances created by auto-scaling groups (ASG
     Maximum number of instances to scale up to
   ``min``
     Minimum number of instances to maintain.
+  ``health_check_grace_period``
+    Seconds before running the healthcheck on an instance. Default 300
+  ``health_check_type``
+    Use EC2 or ELB healthcheck types. Default EC2
 
   Example::
 
@@ -125,6 +129,8 @@ The ``ec2`` key configures the EC2 instances created by auto-scaling groups (ASG
           desired: 1
           max: 3
           min: 0
+          health_check_grace_period: 360
+          health_check_type: ELB
 
 :``tags``:
   A dictionary of tag name to value to apply to all instances of the ASG. Note that the environment you select via ``fab aws`` will be applied as a tag with a name of ``Env``.

diff --git a/bootstrap_cfn/autoscale.py b/bootstrap_cfn/autoscale.py
@@ -8,7 +8,7 @@
 
 from bootstrap_cfn import utils
 
-from bootstrap_cfn.errors import AutoscalingInstanceCountError, AutoscalingGroupNotFound
+from bootstrap_cfn.errors import AutoscalingGroupNotFound, AutoscalingInstanceCountError
 
 
 class Autoscale:
@@ -81,6 +81,13 @@ def cycle_instances(self,
         logging.getLogger("bootstrap-cfn").info("cycle_instances: Found {} instance ids, {}"
                                                 .format(len(current_instance_ids), current_instance_ids))
 
+        # save the number of instances before starting the upgrade
+        num_instances = len(current_instance_ids)
+
+        # get the ASG HealthCheckGracePeriod
+        health_check_grace_period = self.group.health_check_period
+        logging.getLogger("bootstrap-cfn").info("ASG HealthCheckGracePeriod: %s" % health_check_grace_period)
+
         # Iterate through the current instances, replacing current instances with new ones
         for current_instance_id in current_instance_ids:
             # Set the desired instances +1 and wait for it to be created
@@ -89,9 +96,30 @@ def cycle_instances(self,
             self.wait_for_instances(len(current_instance_ids) + 1)
             logging.getLogger("bootstrap-cfn").info("cycle_instances: Terminating recycled instance {} after {} seconds..."
                                                     .format(current_instance_id, termination_delay))
+            # wait for the same time as the "HealthCheckGracePeriod" in the ASG
+            logging.getLogger("bootstrap-cfn").info("Waiting %ss - HealthCheckGracePeriod" % health_check_grace_period)
+            time.sleep(health_check_grace_period)
+            logging.getLogger("bootstrap-cfn").info("End of waiting period")
+
+            # check if the number of healthy instances is = to the number of expected instances, where
+            # expected instances is num_instances + 1
+            new_curr_inst_ids = [instance.get('InstanceId') for instance in self.get_healthy_instances()]
+            logging.getLogger("bootstrap-cfn").info("new instance list %r" % new_curr_inst_ids)
+            if len(new_curr_inst_ids) != num_instances + 1:
+                logging.getLogger("bootstrap-cfn").error("Expected %s instances, found %s." % (
+                    num_instances + 1, len(new_curr_inst_ids))
+                )
+                raise AutoscalingInstanceCountError(self.group.name, num_instances + 1, new_curr_inst_ids)
+            else:
+                logging.getLogger("bootstrap-cfn").info("Expected %s instances, found %s." % (
+                    num_instances + 1, len(new_curr_inst_ids))
+                )
+
             # If we have a delay before termination defined, delay before terminating the current instance
             if termination_delay:
+                logging.getLogger("bootstrap-cfn").info("Waiting %ss - termination_delay" % termination_delay)
                 time.sleep(termination_delay)
+                logging.getLogger("bootstrap-cfn").info("End of waiting period")
             client.terminate_instance_in_auto_scaling_group(
                 InstanceId=current_instance_id,
                 ShouldDecrementDesiredCapacity=True

diff --git a/bootstrap_cfn/config.py b/bootstrap_cfn/config.py
@@ -91,14 +91,12 @@ def process(self):
     def base_template(self):
         from bootstrap_cfn import vpc
         t = Template()
-        if 'os' in self.data['ec2'] and self.data['ec2']['os'] == 'windows2012':
-            t.add_mapping("AWSRegion2AMI", {
-                "eu-west-1": {"AMI": "ami-7943ec0a"},
-            })
-        else:
-            t.add_mapping("AWSRegion2AMI", {
-                "eu-west-1": {"AMI": "ami-00d88f77"},
-            })
+
+        # Get the OS specific data
+        os_data = self._get_os_data()
+        t.add_mapping("AWSRegion2AMI", {
+            os_data.get('region'): {"AMI": os_data.get('ami')},
+        })
 
         if 'vpc' in self.data:
             logging.info('bootstrap-cfn::base_template: Using configuration VPC address settings')
@@ -837,10 +835,30 @@ def ref_fixup(x):
         return dict([(k, ref_fixup(v)) for k, v in o.items()])
 
     def get_ec2_userdata(self):
+        """
+        Build and return the user_data that'll be used for ec2 instances.
+        This contains a series of required entries, default config, and
+        and data specified in the template.
+        """
+        os_data = self._get_os_data()
         data = self.data['ec2']
-
         parts = []
 
+        ami_type = os_data.get('type')
+
+        # Below is the ami flavour specific defaults
+        if ami_type == 'linux':
+            parts.append({
+                'content': yaml.dump(
+                    {
+                        'package_update': True,
+                        'package_upgrade': True,
+                        'package_reboot_if_required': True
+                    }
+                ),
+                'mime_type': 'text/cloud-config'
+            })
+
         boothook = self.get_hostname_boothook(data)
 
         if boothook:
@@ -1010,15 +1028,30 @@ def ec2(self):
                 logging.warning("config: Tag '%s' is deprecated.."
                                 % (k))
 
+        # Setup ASG defaults
+        auto_scaling_config = data.get('auto_scaling', {})
+        asg_min_size = auto_scaling_config.get('min', 1)
+        asg_max_size = auto_scaling_config.get('max', 5)
+        asg_desired_size = auto_scaling_config.get('desired', 2)
+        health_check_type = auto_scaling_config.get('health_check_type', 'EC2').upper()
+        # The basic EC2 healthcheck has a low grace period need, if we switch to ELB then
+        # theres a lot more setup to be done before we should attempt a healthcheck
+        if health_check_type == 'ELB':
+            default_health_check_grace_period = 600
+        else:
+            default_health_check_grace_period = 300
+        health_check_grace_period = auto_scaling_config.get('health_check_grace_period', default_health_check_grace_period)
         scaling_group = AutoScalingGroup(
             "ScalingGroup",
             VPCZoneIdentifier=[Ref("SubnetA"), Ref("SubnetB"), Ref("SubnetC")],
-            MinSize=data['auto_scaling']['min'],
-            MaxSize=data['auto_scaling']['max'],
-            DesiredCapacity=data['auto_scaling']['desired'],
+            MinSize=asg_min_size,
+            MaxSize=asg_max_size,
+            DesiredCapacity=asg_desired_size,
             AvailabilityZones=GetAZs(),
             Tags=ec2_tags,
             LaunchConfigurationName=Ref(launch_config),
+            HealthCheckGracePeriod=health_check_grace_period,
+            HealthCheckType=health_check_type,
         )
         resources.append(scaling_group)
 
@@ -1045,3 +1078,46 @@ def _attach_elbs(self, template):
             template.resources[asgs[0].title] = asgs[0]
 
         return template
+
+    def _get_os_data(self):
+        """
+        Get details about the OS from the config data
+
+        Return:
+            os_data(dict): Dictionary of OS data in the form
+                {
+                    'name': 'ubuntu-1404',
+                    'ami': 'ami-464af835',
+                    'region': 'eu-west-1',
+                    'distribution': 'ubuntu',
+                    'type': 'linux',
+                    'release': '20160217.1'
+                }
+
+        Exceptions:
+            OSTypeNotFoundError: Raised when the OS in the config file is not
+                recognised
+        """
+        os_default = 'ubuntu-1404'
+        available_types = {
+            'ubuntu-1404': {
+                'name': 'ubuntu-1404',
+                'ami': 'ami-464af835',
+                'region': 'eu-west-1',
+                'distribution': 'ubuntu',
+                'type': 'linux',
+                'release': '20160217.1'
+            },
+            'windows2012': {
+                'name': 'windows2012',
+                'ami': 'ami-7943ec0a',
+                'region': 'eu-west-1',
+                'distribution': 'windows',
+                'type': 'windows',
+                'release': '2015.12.31'
+            }
+        }
+        os_choice = self.data['ec2'].get('os', os_default)
+        if not available_types.get(os_choice, False):
+            raise errors.OSTypeNotFoundError(self.data['ec2']['os'], available_types.keys())
+        return available_types.get(os_choice)
diff --git a/bootstrap_cfn/errors.py b/bootstrap_cfn/errors.py
@@ -60,14 +60,20 @@ class CloudResourceNotFoundError(BootstrapCfnError):
     pass
 
 
+class OSTypeNotFoundError(BootstrapCfnError):
+    def __init__(self, type, available_types):
+        msg = ("The os type '{}' is not recognised, should be one of {}. "
+               .format(type, available_types))
+        super(OSTypeNotFoundError, self).__init__(msg)
+
+
 class AutoscalingGroupNotFound(BootstrapCfnError):
     pass
 
 
 class AutoscalingInstanceCountError(BootstrapCfnError):
     def __init__(self, autoscaling_group, expected_instance_count, instances):
-        super(ProfileNotFoundError, self).__init__(
+        super(AutoscalingInstanceCountError, self).__init__(
             "Could not find {} instances in autoscaling group {}. Actual state is {} instances, {}"
             .format(expected_instance_count, autoscaling_group, len(instances), instances)
         )
-    pass
diff --git a/bootstrap_cfn/fab_tasks.py b/bootstrap_cfn/fab_tasks.py
@@ -20,6 +20,7 @@
 from bootstrap_cfn.iam import IAM
 from bootstrap_cfn.r53 import R53
 from bootstrap_cfn.utils import tail
+from bootstrap_cfn.vpc import VPC
 
 
 # Default fab config. Set via the tasks below or --set

diff --git a/tests/tests.py b/tests/tests.py
@@ -757,7 +757,7 @@ def test_process(self):
 
         mappings = cfn_template['Mappings']
         expected = {
-            'AWSRegion2AMI': {'eu-west-1': {'AMI': 'ami-00d88f77'}},
+            'AWSRegion2AMI': {'eu-west-1': {'AMI': 'ami-464af835'}},
             'SubnetConfig': {
                 'VPC': {
                     'CIDR': '10.0.0.0/16',
@@ -820,7 +820,7 @@ def test_process_with_vpc_config(self):
 
         mappings = cfn_template['Mappings']
         expected = {
-            'AWSRegion2AMI': {'eu-west-1': {'AMI': 'ami-00d88f77'}},
+            'AWSRegion2AMI': {'eu-west-1': {'AMI': 'ami-464af835'}},
             'SubnetConfig': {
                 'VPC': {
                     'CIDR': '172.22.0.0/16',
@@ -1265,6 +1265,8 @@ def test_ec2(self):
             VPCZoneIdentifier=[Ref("SubnetA"), Ref("SubnetB"), Ref("SubnetC")],
             LaunchConfigurationName=Ref("BaseHostLaunchConfig"),
             AvailabilityZones=GetAZs(""),
+            HealthCheckGracePeriod=300,
+            HealthCheckType='EC2'
         )
 
         BaseHostSG = SecurityGroup(
@@ -1384,9 +1386,10 @@ def test_get_ec2_userdata(self):
         with patch.object(config, 'get_hostname_boothook', return_value={"content": "sentinel"}) as mock_boothook:
             user_data_parts = config.get_ec2_userdata()
             mock_boothook.assert_called_once_with(data['ec2'])
-
-            compare(yaml.load(user_data_parts[1]['content']), data['ec2']['cloud_config'])
-            compare(user_data_parts[0]['content'], 'sentinel')
+            # We have linux, so we put package update data in first
+            compare(user_data_parts[0]['content'], '{package_reboot_if_required: true, package_update: true, package_upgrade: true}\n')
+            compare(user_data_parts[1]['content'], 'sentinel')
+            compare(yaml.load(user_data_parts[2]['content']), data['ec2']['cloud_config'])
 
     def test_get_ec2_userdata_no_cloud_config(self):
         # If there is no cloud config we should get a default
@@ -1399,9 +1402,10 @@ def test_get_ec2_userdata_no_cloud_config(self):
         with patch.object(config, 'get_hostname_boothook', return_value={"content": "sentinel"}) as mock_boothook:
             user_data_parts = config.get_ec2_userdata()
             mock_boothook.assert_called_once_with(data['ec2'])
-
-            compare(yaml.load(user_data_parts[1]['content']), {'manage_etc_hosts': True})
-            compare(user_data_parts[0]['content'], 'sentinel')
+            # We have linux, so we put package update data in first
+            compare(user_data_parts[0]['content'], '{package_reboot_if_required: true, package_update: true, package_upgrade: true}\n')
+            compare(user_data_parts[1]['content'], 'sentinel')
+            compare(yaml.load(user_data_parts[2]['content']), {'manage_etc_hosts': True})
 
     def test_get_hostname_boothook(self):
         config = ConfigParser({}, environment="env", application="test", stack_name="my-stack")