In [1]:
import boto3
import sys
from botocore.exceptions import ClientError
import logging
from time import sleep
import math


In [2]:
ec2_client = boto3.client('ec2')
response = ec2_client.describe_instances()
response

{'Reservations': [{'Groups': [],
   'Instances': [{'AmiLaunchIndex': 2,
     'ImageId': 'ami-026dea5602e368e96',
     'InstanceId': 'i-01bc49ae38883ba25',
     'InstanceType': 't2.micro',
     'KeyName': 'key',
     'LaunchTime': datetime.datetime(2020, 6, 6, 20, 1, 26, tzinfo=tzutc()),
     'Monitoring': {'State': 'disabled'},
     'Placement': {'AvailabilityZone': 'us-east-2b',
      'GroupName': '',
      'Tenancy': 'default'},
     'PrivateDnsName': 'ip-172-31-23-159.us-east-2.compute.internal',
     'PrivateIpAddress': '172.31.23.159',
     'ProductCodes': [],
     'PublicDnsName': 'ec2-18-218-145-173.us-east-2.compute.amazonaws.com',
     'PublicIpAddress': '18.218.145.173',
     'State': {'Code': 16, 'Name': 'running'},
     'StateTransitionReason': '',
     'SubnetId': 'subnet-4c4a2836',
     'VpcId': 'vpc-2b6c6043',
     'Architecture': 'x86_64',
     'BlockDeviceMappings': [{'DeviceName': '/dev/xvda',
       'Ebs': {'AttachTime': datetime.datetime(2020, 6, 6, 20, 1, 27, tzinf

In [None]:
#I used https://github.com/awsdocs/aws-doc-sdk-examples/blob/master/python/example_code/ec2/create_instance.py for help here

def create_ec2_instance(image_id, instance_type, keypair_name, security_group, security_group_id, iam_role_name):
    """Provision and launch an EC2 instance
    The method returns without waiting for the instance to reach
    a running state.
    :param image_id: ID of AMI to launch, such as 'ami-XXXX'
    :param instance_type: string, such as 't2.micro'
    :param keypair_name: string, name of the key pair
    :return Dictionary containing information about the instance. If error,
    returns None.
    """

    # Provision and launch the EC2 instance
    ec2_client = boto3.client('ec2')
    try:
        response = ec2_client.run_instances(ImageId=image_id,
                                            InstanceType=instance_type,
                                            KeyName=keypair_name,
                                            SecurityGroupIds = [security_group_id],
                                            SecurityGroups= [security_group],
                                            IamInstanceProfile={
                                                #'Arn': 'arn:aws:iam::578971879148:instance-profile/EnablesEC2ToAccessSystemsManagerRole',
                                                'Name': iam_role_name
                                            },
                                            MinCount=1,
                                            MaxCount=5)
    except ClientError as e:
        logging.error(e)
        return None
    return response


AMI_IMAGE_ID = 'ami-02f53f5f90a9cc773' #This is the Amazon machine image ID
INSTANCE_TYPE = 't2.micro' #this is the type of vm ec2 instance
KEYPAIR_NAME = 'key' #this is the name of the key that allows you to ssh into the instance
SECURITY_GROUP = 'kademlia-all-access' #this is the name of the security group. I custom defined a security group that allows any protocol to connect on any port.
SECURITY_GROUP_ID = 'sg-06474ad72b0f3fd58' #this is the id associated with the security group on our account
IAM_ROLE_NAME = 'EnablesEC2ToAccessSystemsManagerRole' #this is the iam role to allow ssm to send commands to each instance.

# Set up logging
logging.basicConfig(level=logging.DEBUG,
                    format='%(levelname)s: %(asctime)s: %(message)s')

response = create_ec2_instance(AMI_IMAGE_ID, INSTANCE_TYPE, KEYPAIR_NAME, SECURITY_GROUP, SECURITY_GROUP_ID, IAM_ROLE_NAME)

ec2 = boto3.resource('ec2')

# AWS Instance Type Docs: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/ec2.html#instance
instances = [] #this is a python list of type ec2 instance objects

for instance in response['Instances']:
    logging.info(f'Launched EC2 Instance {instance["InstanceId"]}')
    logging.info(f'    VPC ID: {instance["VpcId"]}')
    logging.info(f'    Private IP Address: {instance["PrivateIpAddress"]}')
    logging.info(f'    Current State: {instance["State"]["Name"]}')
    print(instance)
    instances.append(ec2.Instance(instance["InstanceId"])) #creates the ec2 instance object

for instance in instances:
    instance.wait_until_running()
    print(instance)
    print(instance.public_ip_address)
                  



In [None]:
instances #use this if spawning nodes from scratch
# instances[0].iam_instance_profile


In [3]:
def get_running_instances():
    """ Returns a list of instance objects for instances that are already running. 
        Does not spawn any instances. 
    """

    ec2_client = boto3.client('ec2')
    response = ec2_client.describe_instances()

    ec2 = boto3.resource('ec2')
    instances = []

    for reservation in response['Reservations']:
        for instance in reservation["Instances"]:
            if instance['State']['Name'] != 'terminated': #we don't want to include the terminated instances
                instances.append(ec2.Instance(instance["InstanceId"])) #creates the ec2 instance object
    
    for instance in instances:
        instance.wait_until_running #this assumes all instances are running, TODO maybe check and then start if not?
        print(instance)
        print(instance.public_ip_address)
    
    return instances

instances = get_running_instances()
instances

ec2.Instance(id='i-01bc49ae38883ba25')
18.218.145.173
ec2.Instance(id='i-053cb74b90426da70')
13.58.116.51
ec2.Instance(id='i-0967bbf8f4155db90')
18.191.111.142
ec2.Instance(id='i-05502eb50803bc085')
18.224.44.242
ec2.Instance(id='i-0528cafad0f7a83d1')
13.59.53.56
ec2.Instance(id='i-054b1ce206e7145dd')
18.217.22.130


[ec2.Instance(id='i-01bc49ae38883ba25'),
 ec2.Instance(id='i-053cb74b90426da70'),
 ec2.Instance(id='i-0967bbf8f4155db90'),
 ec2.Instance(id='i-05502eb50803bc085'),
 ec2.Instance(id='i-0528cafad0f7a83d1'),
 ec2.Instance(id='i-054b1ce206e7145dd')]

In [5]:
# I used https://stackoverflow.com/questions/42645196/how-to-ssh-and-run-commands-in-ec2-using-boto3 for help
# AWS SSM Docs: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/ssm.html
# AWS SSM Info: https://docs.aws.amazon.com/systems-manager/latest/userguide/what-is-systems-manager.html
# AWS SSM Getting Started Guide: https://aws.amazon.com/getting-started/hands-on/remotely-run-commands-ec2-instance-systems-manager/

ssm_client = boto3.client('ssm')
ssm_client.describe_instance_information()

def cancel_all_commands(): 
    """
        This function cancels any pending (still running) SSM commands. 
        It enables us to start fresh. 
    """
    
    remaining_uncanceled = True    
    while(remaining_uncanceled):
        remaining_uncanceled = False
        
        response = ssm_client.list_commands()
        
        for command in response['Commands']: 
            if command['Status'] == 'InProgress' or command['Status'] == 'Pending': 
                remaining_uncanceled = True
                
                print(command)
                #this does not guarantee a command will be cancelled so we must double check or send multiple requests 
                ssm_client.cancel_command(CommandId=command['CommandId'])
                
cancel_all_commands()

In [6]:
instances
#instances[0].id

[ec2.Instance(id='i-01bc49ae38883ba25'),
 ec2.Instance(id='i-053cb74b90426da70'),
 ec2.Instance(id='i-0967bbf8f4155db90'),
 ec2.Instance(id='i-05502eb50803bc085'),
 ec2.Instance(id='i-0528cafad0f7a83d1'),
 ec2.Instance(id='i-054b1ce206e7145dd')]

In [7]:
# def execute_commands_on_linux_instances(client, commands, instance_ids):
#     """Runs commands on remote linux instances
#     :param client: a boto/boto3 ssm client
#     :param commands: a list of strings, each one a command to execute on the instances
#     :param instance_ids: a list of instance_id strings, of the instances on which to execute the command
#     :return: the response from the send_command function (check the boto3 docs for ssm client.send_command() )
#     """

#     resp = client.send_command(
#         DocumentName="AWS-RunShellScript", # One of AWS' preconfigured documents
#         Parameters={'commands': commands},
#         InstanceIds=instance_ids,
#     )
#     return resp

# # Example use:
# # commands = ['python3 /home/ec2-user/CS244B/aws/kademlia_scripts/first_node.py 20 3']
# # instance_ids = ['i-01bc49ae38883ba25']
# # response1 = execute_commands_on_linux_instances(ssm_client, commands, instance_ids)
# # response1

# # commands = ['python3 /home/ec2-user/CS244B/aws/kademlia_scripts/test.py 18.218.145.173 8468 a 5']
# # instance_ids = ['i-029096fe3848618fe']
# # response2 = execute_commands_on_linux_instances(ssm_client, commands, instance_ids)
# # response2

# # ssm_client.get_command_invocation(CommandId=response1['Command']['CommandId'],
# #     InstanceId= response1['Command']['InstanceIds'][0])

# # ssm_client.get_command_invocation(CommandId=response2['Command']['CommandId'],
# #     InstanceId= response2['Command']['InstanceIds'][0])

In [13]:
def run_nodes(ksize, alpha, instances, mean_time=100):
    ''' This function will run the actual testing for success rate once
        we have all the instances spawned and set up. It assumes instances[0]
        is the first (bootstrapping) node that never churns. This is desgined to 
        be defined within another function or in a larger script.
        
        @mean_time is how long each server instance should be alive on 
        average. The same value is used for how long a server is down when 
        it fails. The mean uptime and downtime and therefore the same. 
    '''
    
    def execute_commands_on_linux_instances(client, commands, instance_ids):
    """Runs commands on remote linux instances
    :param client: a boto/boto3 ssm client
    :param commands: a list of strings, each one a command to execute on the instances
    :param instance_ids: a list of instance_id strings, of the instances on which to execute the command
    :return: the response from the send_command function (check the boto3 docs for ssm client.send_command() )
    """

    resp = client.send_command(
        DocumentName="AWS-RunShellScript", # One of AWS' preconfigured documents
        Parameters={'commands': commands},
        InstanceIds=instance_ids,
    )
    return resp

    def execute_commands_with_timeout(client, commands, instance_ids, timeout):
    """Runs commands on remote linux instances
    :param client: a boto/boto3 ssm client
    :param commands: a list of strings, each one a command to execute on the instances
    :param instance_ids: a list of instance_id strings, of the instances on which to execute the command
    :return: the response from the send_command function (check the boto3 docs for ssm client.send_command() )
    """

    resp = client.send_command(
        DocumentName="AWS-RunShellScript", # One of AWS' preconfigured documents
        TimeoutSeconds = timeout,
        Parameters={'commands': commands},
        InstanceIds=instance_ids,
    )
    return resp
    
    
    first_node_ip = instances[0].public_ip_address
    if(first_node_ip == None): 
        println("\n\n\n\n\n ERROR: Failed to get public IPv4 address for the first (bootstrapping) node")
    
    first_node_command = 'python3 /home/ec2-user/CS244B/aws/kademlia_scripts/first_node.py {} {}'.format(ksize, alpha) 
    bootstrap_server_command = 'python3 /home/ec2-user/CS244B/aws/kademlia_scripts/run_node.py {} 8468 {} {}'.format(first_node_ip, ksize, alpha)
    
    def get_command(key): 
        _get_command = 'python3 /home/ec2-user/CS244B/aws/kademlia_scripts/get.py {} 8468 {}'.format(first_node_ip, key)
        return _get_command
    def set_command(key, value):
        _set_command = 'python3 /home/ec2-user/CS244B/aws/kademlia_scripts/set.py {} 8468 {} {}'.format(first_node_ip, key, value)
        return _set_command
    def sleep_command(time):
        _sleep_command = 'sleep {}'.format(time)
        return _sleep_command

    set_instance_index = len(instances)-2
    get_instance_index = len(instances)-1 
    
    # this will be used to set values and determine whether we got the right value back or not 
    truth_table = {
        'a':1, 
        'b':2,
        'c':3,
        'd':4,
        'e':5,
        'f':6,
        'g':7,
        'h':8,
        'i':9,
        'j':10,
        'k':11,
        'l':12,
        'm':13,
        'n':14,
        'o':15,
        'p':16,
        'q':17,
        'r':18,
        's':19,
        't':20,
        'u':21,
        'v':22,
        'w':23,
        'x':24,
        'y':25,
        'z':26,
    }
    
    command_responses = ["" for i in range(len(instances))]
    
    #start first node. The first node is the bootstrapping node that does not churn 
    commands = [first_node_command]
    instance_ids = [instances[0].id]
    command_responses[0] = execute_commands_on_linux_instances(ssm_client, commands, instance_ids)
    
    sleep(3)
    
    # start the kademlia servers, these nodes will churn 
    for index in range(1, len(instances)-2): # we leave the final two instances for set and get requests 
        
        #TODO insert timeout that is an exponentially distribtued variable 
        commands = [bootstrap_server_command]
        instance_ids = [instances[index].id]
        command_responses[index] = execute_commands_on_linux_instances(ssm_client, commands, instance_ids)
    
    sleep(10) #allow the instances a chance to connect and populate routing tables
                
    def churn():
        
        # iterate through the commands
        # if any commands have timed out, they simulate node failures
        # send a sleep command to the same machine to simulate the node failing for some amount of time
        # if the failed command was a sleep command, then restart the server 
        for command_idx in range(1, len(command_responses)-2): 
            command = command_responses[command_idx]['Command']

            # the server has timed out, or the sleep has completed
            if command['Status'] != 'Pending' or command['Status'] != 'InProgress':

                print(command['Parameters']['commands'][0])
                if 'sleep' in command['Parameters']['commands'][0]: 
                    
                    #TODO assert this command succeeded 
                    #we must restart the kademlia server
                    commands = [bootstrap_server_command]
                    instance_ids = [command['InstanceId'].id]
                    command_responses[command_idx] = execute_commands_on_linux_instances(ssm_client, commands, instance_ids)

                else: #the server timed out
                    pass 
                    #TODO assert this command timed out 
                    #TODO insert exponential variable for sleep time
                
    def evaluate_get_response(get_response, key, value):
        stdout_result = get_response['StandardOutputContent']
        print(stdout_result)
        result_list = stdout_result.split(' ')
        idx = result_list.index('result:')
        result = result_list[idx+1]
        if "None" in result:
            return False
        if str(value) in result:
            return True
        else: 
            print("Inconclusive Result: {} for value {}".format(stdout_result, value))
            return False
        
    def wait_until_complete(command):
        sleep(1)
        
        while(True):
            command_info = ssm_client.get_command_invocation(CommandId=command['Command']['CommandId'], 
                                              InstanceId= command['Command']['InstanceIds'][0])
              
            if(command_info['Status'] == "Success"): 
                return True
            if(command_info['Status'] == "Failed" or command_info['Status'] == "TimedOut"):
                print(command_info)
                return False
            else: 
                sleep(1)
            

    success_list = []
    
    for key, value in truth_table.items():
        #begin to make get and set requests to judge the success rate
        commands = [set_command(key, value)]
        instance_ids = [instances[set_instance_index].id] #this will be the set instance index
        command_responses[set_instance_index] = execute_commands_on_linux_instances(ssm_client, commands, instance_ids)
        if not wait_until_complete(command_responses[set_instance_index]):
            #TODO relaunch
            pass

        commands = [get_command(key)]
        instance_ids = [instances[get_instance_index].id] #this will be the get instance index
        command_responses[get_instance_index] = execute_commands_on_linux_instances(ssm_client, commands, instance_ids)
        if not wait_until_complete(command_responses[get_instance_index]):
            #TODO relaunch
            pass

        get_response = ssm_client.get_command_invocation(CommandId=command_responses[get_instance_index]['Command']['CommandId'], 
                                          InstanceId= command_responses[get_instance_index]['Command']['InstanceIds'][0])
        print(get_response)

        if evaluate_get_response(get_response, key, value):
            success_list.append(1)
        else:
            success_list.append(0)
    
    success_rate = math.sum(success_list)/len(truth_table)
    return success_rate

            
run_nodes(20, 3, instances)


python3 /home/ec2-user/CS244B/aws/kademlia_scripts/run_node.py 18.218.145.173 8468 20 3
python3 /home/ec2-user/CS244B/aws/kademlia_scripts/run_node.py 18.218.145.173 8468 20 3
python3 /home/ec2-user/CS244B/aws/kademlia_scripts/run_node.py 18.218.145.173 8468 20 3
Get result: 1

Get result: 2



Get result: 3

Get result: 4



Get result: 5

Get result: 6



Get result: 7



Get result: 8



Get result: 9



Get result: 10



Get result: 11



Get result: 12



Get result: 13



Get result: 14



Get result: 15



Get result: 16



Get result: 17



Get result: 18



Get result: 19



Get result: 20



Get result: 21



Get result: 22



Get result: 23



Get result: 24



Get result: 25



Get result: 26



In [12]:
ssm_client.get_command_invocation(CommandId='eda94f5b-fa1a-4ac7-a4d1-412e592fda3e',
    InstanceId='i-0528cafad0f7a83d1' )

{'CommandId': 'eda94f5b-fa1a-4ac7-a4d1-412e592fda3e',
 'InstanceId': 'i-0528cafad0f7a83d1',
 'Comment': '',
 'DocumentName': 'AWS-RunShellScript',
 'DocumentVersion': '',
 'PluginName': 'aws:runShellScript',
 'ResponseCode': 0,
 'ExecutionStartDateTime': '2020-06-08T23:21:05.177Z',
 'ExecutionElapsedTime': 'PT1.975S',
 'ExecutionEndDateTime': '2020-06-08T23:21:06.177Z',
 'Status': 'Success',
 'StatusDetails': 'Success',
 'StandardOutputContent': '',
 'StandardOutputUrl': '',
 'StandardErrorContent': "2020-06-08 23:21:07,117 - kademlia.network - INFO - Node 307766693402545648519261536028081225373449331391 listening on 0.0.0.0:8469\n2020-06-08 23:21:07,118 - kademlia.network - DEBUG - Refreshing routing table\n2020-06-08 23:21:07,119 - kademlia.network - DEBUG - Attempting to bootstrap node with 1 initial contacts\n2020-06-08 23:21:07,122 - kademlia.crawling - INFO - creating spider with peers: [[1200080849793280139940667294907761120360613605543, '18.218.145.173', 8468]]\n2020-06-08 23:2

In [None]:
# # #begin to make get and set requests to judge the success rate
# # commands = [set_command('a', '1')]
# # instance_ids = [instances[set_instance_index].id] #this will be the set instance index
# # command_responses[set_instance_index] = execute_commands_on_linux_instances(ssm_client, commands, instance_ids)

# # commands = [get_command('a')]
# # instance_ids = [instances[get_instance_index].id] #this will be the get instance index
# # command_responses[get_instance_index] = execute_commands_on_linux_instances(ssm_client, commands, instance_ids)

# ssm_client.get_command_invocation(CommandId=responses[10]['Command']['CommandId'],
#     InstanceId= responses[10]['Command']['InstanceIds'][0])

In [None]:
len(ssm_client.list_commands()['Commands'])

In [9]:
ssm_client.list_commands()

{'Commands': [{'CommandId': 'b73d2240-ac73-4a94-b0cb-4b8cc74349a3',
   'DocumentName': 'AWS-RunShellScript',
   'DocumentVersion': '',
   'Comment': '',
   'ExpiresAfter': datetime.datetime(2020, 6, 8, 21, 20, 52, 761000, tzinfo=tzlocal()),
   'Parameters': {'commands': ['python3 /home/ec2-user/CS244B/aws/kademlia_scripts/run_node.py 18.218.145.173 8468 20 3']},
   'InstanceIds': ['i-05502eb50803bc085'],
   'Targets': [],
   'RequestedDateTime': datetime.datetime(2020, 6, 8, 19, 20, 52, 761000, tzinfo=tzlocal()),
   'Status': 'InProgress',
   'StatusDetails': 'InProgress',
   'OutputS3BucketName': '',
   'OutputS3KeyPrefix': '',
   'MaxConcurrency': '50',
   'MaxErrors': '0',
   'TargetCount': 1,
   'CompletedCount': 0,
   'ErrorCount': 0,
   'DeliveryTimedOutCount': 0,
   'ServiceRole': '',
   'NotificationConfig': {'NotificationArn': '',
    'NotificationEvents': [],
    'NotificationType': ''},
   'CloudWatchOutputConfig': {'CloudWatchLogGroupName': '',
    'CloudWatchOutputEnabled'

In [None]:
ssm_client.get_command_invocation(CommandId='c423966d-bf71-4eae-b3bc-1e61f7f6cf20',
    InstanceId='i-022b9834666ed3ef6' )

In [None]:
command_responses