diff --git a/ardere/aws.py b/ardere/aws.py index 1ac7974..a807af5 100644 --- a/ardere/aws.py +++ b/ardere/aws.py @@ -28,6 +28,44 @@ with open(telegraf_path, 'r') as f: telegraf_script = f.read() +# EC2 userdata to setup values on load +# Settings for net.ipv4 settings based on: +# http://stackoverflow.com/questions/410616/increasing-the-maximum-number-of-tcp-ip-connections-in-linux +# Other settings are from operations on kernel tweaks they've done to handle +# large socket conditions. +EC2_USER_DATA = """#!/bin/bash +echo ECS_CLUSTER='{ecs_name}' >> /etc/ecs/ecs.config +sysctl net.core.rmem_default=8388608 +sysctl net.core.rmem_max=16777216 +sysctl net.core.wmem_max=16777216 +sysctl net.core.netdev_max_backlog=2500 +sysctl net.core.somaxconn=3240000 +sysctl net.netfilter.nf_conntrack_tcp_timeout_established=600 +sysctl net.nf_conntrack_max=1000000 +sysctl net.ipv4.ip_local_port_range="1024 65535" +sysctl net.ipv4.netfilter.ip_conntrack_max=4999999 +sysctl net.ipv4.netfilter.ip_conntrack_tcp_timeout_time_wait=1 +sysctl net.ipv4.netfilter.ip_conntrack_tcp_timeout_established=54000 +sysctl net.ipv4.tcp_fin_timeout=5 +sysctl net.ipv4.tcp_keepalive_time=30 +sysctl net.ipv4.tcp_keepalive_intvl=15 +sysctl net.ipv4.tcp_keepalive_probes=6 +sysctl net.ipv4.tcp_window_scaling=1 +sysctl net.ipv4.tcp_rmem="4096 87380 16777216" +sysctl net.ipv4.tcp_wmem="4096 65536 16777216" +sysctl net.ipv4.tcp_mem="786432 1048576 26777216" +sysctl net.ipv4.tcp_max_tw_buckets=360000 +sysctl net.ipv4.tcp_max_syn_backlog=3240000 +sysctl net.ipv4.tcp_max_tw_buckets=1440000 +sysctl net.ipv4.tcp_slow_start_after_idle=0 +sysctl net.ipv4.tcp_retries2=5 +sysctl net.ipv4.tcp_tw_recycle=1 +sysctl net.ipv4.tcp_tw_reuse=1 +sysctl vm.min_free_kbytes=65536 +sysctl -w fs.file-max=1000000 +ulimit -n 1000000 +""" + # List tracking vcpu's of all instance types for cpu unit reservations # We are intentionally leaving out the following instance types as they're @@ -166,31 +204,26 @@ def request_instances(self, instances): # type: (Dict[str, int]) -> None """Create requested types/quantities of instances for this cluster""" ami_id = self.ecs_ami_ids["us-east-1"] - request_instances = [] for instance_type, instance_count in instances.items(): - result = self._ec2_client.run_instances( + self._ec2_client.run_instances( ImageId=ami_id, - KeyName="loads", MinCount=instance_count, MaxCount=instance_count, InstanceType=instance_type, - UserData="#!/bin/bash \necho ECS_CLUSTER='" + self._ecs_name + - "' >> /etc/ecs/ecs.config", - IamInstanceProfile={"Arn": self.ecs_profile} + UserData=EC2_USER_DATA.format(ecs_name=self._ecs_name), + IamInstanceProfile={"Arn": self.ecs_profile}, + TagSpecifications=[ + { + "ResourceType": "instance", + "Tags": [ + dict(Key="Name", Value=self._ecs_name), + dict(Key="Owner", Value="ardere"), + dict(Key="ECSCluster", Value=self._ecs_name), + ] + } + ] ) - # Track returned instances for tagging step - request_instances.extend([x["InstanceId"] for x in - result["Instances"]]) - - self._ec2_client.create_tags( - Resources=request_instances, - Tags=[ - dict(Key="Owner", Value="ardere"), - dict(Key="ECSCluster", Value=self._ecs_name) - ] - ) - def locate_metrics_container_ip(self): """Locates the metrics container IP""" response = self._ecs_client.list_container_instances( @@ -319,8 +352,12 @@ def create_service(self, step): # using only memoryReservation sets no hard limit "memoryReservation": 256, + "privileged": True, "environment": env_vars, "entryPoint": cmd, + "ulimits": [ + dict(name="nofile", softLimit=1000000, hardLimit=1000000) + ], "logConfiguration": { "logDriver": "awslogs", "options": { @@ -352,6 +389,7 @@ def create_service(self, step): "portMappings": [ {"containerPort": 8125} ], + "privileged": True, "environment": [ {"name": "__ARDERE_TELEGRAF_CONF__", "value": telegraf_script}, diff --git a/requirements.txt b/requirements.txt index e40927a..4d31a66 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,5 @@ futures==3.0.5 typing==3.5.3.0 toml==0.9.2 marshmallow==2.13.4 -influxdb==4.0.0 \ No newline at end of file +influxdb==4.0.0 +boto3==1.4.4 \ No newline at end of file diff --git a/serverless.yml b/serverless.yml index 246c4ab..9143c54 100644 --- a/serverless.yml +++ b/serverless.yml @@ -160,7 +160,7 @@ stepFunctions: ErrorEquals: - NoSuchKey IntervalSeconds: 10 - MaxAttempts: 4 + MaxAttempts: 2 BackoffRate: 1 Catch: - diff --git a/tests/test_aws.py b/tests/test_aws.py index 7de2af4..cd72b58 100644 --- a/tests/test_aws.py +++ b/tests/test_aws.py @@ -73,12 +73,7 @@ def test_request_instances(self): "Instances": [{"InstanceId": 12345}] } ecs.request_instances(instances) - ecs._ec2_client.create_tags.assert_called_with( - Resources=[12345], Tags=[ - {'Value': 'ardere', 'Key': 'Owner'}, - {'Value': u'ardere-test', 'Key': 'ECSCluster'} - ] - ) + ecs._ec2_client.run_instances.assert_called() def test_locate_metrics_container_ip(self): ecs = self._make_FUT()