In [None]:
import boto3
import sys
from botocore.exceptions import ClientError
import logging
from time import sleep
import math
import numpy as np 
import datetime
from dateutil.tz import tzlocal
import sys
import os

import matplotlib.pyplot as plt
from matplotlib import style

In [None]:
ec2_client = boto3.client('ec2')
response = ec2_client.describe_instances()
response

In [None]:
#I used https://github.com/awsdocs/aws-doc-sdk-examples/blob/master/python/example_code/ec2/create_instance.py for help here

def create_ec2_instance(image_id, instance_type, keypair_name, security_group, security_group_id, iam_role_name):
    """Provision and launch an EC2 instance
    The method returns without waiting for the instance to reach
    a running state.
    :param image_id: ID of AMI to launch, such as 'ami-XXXX'
    :param instance_type: string, such as 't2.micro'
    :param keypair_name: string, name of the key pair
    :return Dictionary containing information about the instance. If error,
    returns None.
    """

    # Provision and launch the EC2 instance
    ec2_client = boto3.client('ec2')
    try:
        response = ec2_client.run_instances(ImageId=image_id,
                                            InstanceType=instance_type,
                                            KeyName=keypair_name,
                                            SecurityGroupIds = [security_group_id],
                                            SecurityGroups= [security_group],
                                            IamInstanceProfile={
                                                #'Arn': 'arn:aws:iam::578971879148:instance-profile/EnablesEC2ToAccessSystemsManagerRole',
                                                'Name': iam_role_name
                                            },
                                            MinCount=1,
                                            MaxCount=14)
    except ClientError as e:
        logging.error(e)
        return None
    return response


AMI_IMAGE_ID = 'ami-02f53f5f90a9cc773' #This is the Amazon machine image ID
INSTANCE_TYPE = 't2.micro' #this is the type of vm ec2 instance
KEYPAIR_NAME = 'key' #this is the name of the key that allows you to ssh into the instance
SECURITY_GROUP = 'kademlia-all-access' #this is the name of the security group. I custom defined a security group that allows any protocol to connect on any port.
SECURITY_GROUP_ID = 'sg-06474ad72b0f3fd58' #this is the id associated with the security group on our account
IAM_ROLE_NAME = 'EnablesEC2ToAccessSystemsManagerRole' #this is the iam role to allow ssm to send commands to each instance.

# Set up logging
logging.basicConfig(level=logging.DEBUG,
                    format='%(levelname)s: %(asctime)s: %(message)s')

response = create_ec2_instance(AMI_IMAGE_ID, INSTANCE_TYPE, KEYPAIR_NAME, SECURITY_GROUP, SECURITY_GROUP_ID, IAM_ROLE_NAME)

ec2 = boto3.resource('ec2')

# AWS Instance Type Docs: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/ec2.html#instance
instances = [] #this is a python list of type ec2 instance objects

for instance in response['Instances']:
    logging.info(f'Launched EC2 Instance {instance["InstanceId"]}')
    logging.info(f'    VPC ID: {instance["VpcId"]}')
    logging.info(f'    Private IP Address: {instance["PrivateIpAddress"]}')
    logging.info(f'    Current State: {instance["State"]["Name"]}')
    print(instance)
    instances.append(ec2.Instance(instance["InstanceId"])) #creates the ec2 instance object

for instance in instances:
    instance.wait_until_running()
    print(instance)
    print(instance.public_ip_address)
                  



In [None]:
def get_running_instances():
    """ Returns a list of instance objects for instances that are already running. 
        Does not spawn any instances. 
    """

    ec2_client = boto3.client('ec2')
    response = ec2_client.describe_instances()

    ec2 = boto3.resource('ec2')
    instances = []

    for reservation in response['Reservations']:
        for instance in reservation["Instances"]:
            if instance['State']['Name'] != 'terminated': #we don't want to include the terminated instances
                instances.append(ec2.Instance(instance["InstanceId"])) #creates the ec2 instance object
    
    for instance in instances:
        instance.wait_until_running #this assumes all instances are running, TODO maybe check and then start if not?
        print(instance)
        print(instance.public_ip_address)
    
    return instances

instances = get_running_instances()
instances

In [None]:
# I used https://stackoverflow.com/questions/42645196/how-to-ssh-and-run-commands-in-ec2-using-boto3 for help
# AWS SSM Docs: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/ssm.html
# AWS SSM Info: https://docs.aws.amazon.com/systems-manager/latest/userguide/what-is-systems-manager.html
# AWS SSM Getting Started Guide: https://aws.amazon.com/getting-started/hands-on/remotely-run-commands-ec2-instance-systems-manager/

ssm_client = boto3.client('ssm')
ssm_client.describe_instance_information()

In [None]:
def cancel_command(command_id):
    remaining_uncanceled = True    
    while(remaining_uncanceled):
        remaining_uncanceled = False
        
        response = ssm_client.list_commands(CommandId = command_id)
        
        for command in response['Commands']: 
            if command['Status'] == 'InProgress' or command['Status'] == 'Pending': 
                remaining_uncanceled = True
                
                #this does not guarantee a command will be cancelled so we must double check or send multiple requests 
                ssm_client.cancel_command(CommandId=command['CommandId'])

In [None]:

def cancel_all_commands(): 
    """
        This function cancels any pending (still running) SSM commands. 
        It enables us to start fresh. 
    """
    
    remaining_uncanceled = True    
    while(remaining_uncanceled):
        remaining_uncanceled = False
        
        response = ssm_client.list_commands()
        
        for command in response['Commands']: 
            if command['Status'] == 'InProgress' or command['Status'] == 'Pending': 
                remaining_uncanceled = True
                
                print(command)
                #this does not guarantee a command will be cancelled so we must double check or send multiple requests 
                ssm_client.cancel_command(CommandId=command['CommandId'])
                
cancel_all_commands()

In [None]:
instances
#instances[0].id

In [None]:
def run_nodes(ksize, alpha, instances, mean_time=100):
    ''' This function will run the actual testing for success rate once
        we have all the instances spawned and set up. It assumes instances[0]
        is the first (bootstrapping) node that never churns. This is desgined to 
        be defined within another function or in a larger script.
        
        @mean_time is how long each server instance should be alive on 
        average. The same value is used for how long a server is down when 
        it fails. The mean uptime and downtime and therefore the same. 
    '''
    
    def execute_commands_on_linux_instances(client, commands, instance_ids):
        """Runs commands on remote linux instances
        :param client: a boto/boto3 ssm client
        :param commands: a list of strings, each one a command to execute on the instances
        :param instance_ids: a list of instance_id strings, of the instances on which to execute the command
        :return: the response from the send_command function (check the boto3 docs for ssm client.send_command() )
        """

        resp = client.send_command(
            DocumentName="AWS-RunShellScript", # One of AWS' preconfigured documents
            Parameters={'commands': commands},
            InstanceIds=instance_ids,
        )
        return resp
    
    first_node_ip = instances[0].public_ip_address
    if(first_node_ip == None): 
        println("\n\n\n\n\n ERROR: Failed to get public IPv4 address for the first (bootstrapping) node")
    
    first_node_command = 'python3 /home/ec2-user/CS244B/aws/kademlia_scripts/first_node.py {} {}'.format(ksize, alpha) 
    bootstrap_server_command = 'python3 /home/ec2-user/CS244B/aws/kademlia_scripts/run_node.py {} 8468 {} {}'.format(first_node_ip, ksize, alpha)
    
    def get_command(key): 
        _get_command = 'python3 /home/ec2-user/CS244B/aws/kademlia_scripts/get.py {} 8468 {}'.format(first_node_ip, key)
        return _get_command
    def set_command(key, value):
        _set_command = 'python3 /home/ec2-user/CS244B/aws/kademlia_scripts/set.py {} 8468 {} {}'.format(first_node_ip, key, value)
        return _set_command
    def sleep_command(time):
        _sleep_command = 'sleep {}'.format(time)
        return _sleep_command

    set_instance_index = len(instances)-2
    get_instance_index = len(instances)-1 
    
    # this will be used to set values and determine whether we got the right value back or not 
    truth_table = {
        'a':1, 
        'b':2,
        'c':3,
        'd':4,
        'e':5,
        'f':6,
        'g':7,
        'h':8,
        'i':9,
        'j':10,
        'k':11,
        'l':12,
        'm':13,
        'n':14,
        'o':15,
        'p':16,
        'q':17,
        'r':18,
        's':19,
        't':20,
        'u':21,
        'v':22,
        'w':23,
        'x':24,
        'y':25,
        'z':26,
    }
    
    # note the assigned timeout is not used for the first instance, which  always remains live 
    # or for the last two instances which are used to get and set nodes and do not run long lived servers
    timeouts = [np.random.exponential(mean_time) for i in range(len(instances))]
    
    command_responses = ["" for i in range(len(instances))]
    
    #start first node. The first node is the bootstrapping node that does not churn 
    commands = [first_node_command]
    instance_ids = [instances[0].id]
    command_responses[0] = execute_commands_on_linux_instances(ssm_client, commands, instance_ids)    
    sleep(3)
    
    # start the kademlia servers, these nodes will churn 
    for index in range(1, len(instances)-2): # we leave the final two instances for set and get requests 
        
        commands = [bootstrap_server_command]
        instance_ids = [instances[index].id]
        command_responses[index] = execute_commands_on_linux_instances(ssm_client, commands, instance_ids)
    
    sleep(10) #allow the instances a chance to connect and populate routing tables
                
    def churn():
        
        def get_elapsed_time(command):
            time_response = ssm_client.list_commands(CommandId = command['CommandId'])
            dt = time_response['Commands'][0]['RequestedDateTime']
            now = datetime.datetime.now(dt.tzinfo)
            return (now - dt).seconds
        
        
        #TODO there is a problem where commands are run on nodes that are already running kademlia 
        for command_idx in range(1, len(command_responses)-2): 
            command = command_responses[command_idx]['Command']
            
            #get updated info on the status of the command
            command_info = ssm_client.get_command_invocation(CommandId=command['CommandId'], 
                                              InstanceId= command['InstanceIds'][0])
            
            if get_elapsed_time(command) > timeouts[command_idx]: #simulate node churn
                cancel_command(command['CommandId']) #this is a blocking call and makes sure the command is cancelled
                
                commands = [sleep_command(timeouts[command_idx])]
                instance_ids = [command['InstanceIds'][0]]
                command_responses[command_idx] = execute_commands_on_linux_instances(ssm_client, commands, instance_ids)
                
            elif(command_info['Status'] == 'Success'): # restart kademlia server, simulated downtime has ended
                assert('sleep' in command['Parameters']['commands'][0])
                
                # recalculate the exponentially distributed timeout here
                timeouts[command_idx] = np.random.exponential(mean_time)

                #we must restart the kademlia server
                commands = [bootstrap_server_command]
                instance_ids = [command['InstanceIds'][0]]
                command_responses[command_idx] = execute_commands_on_linux_instances(ssm_client, commands, instance_ids)
            
            elif command_info['Status'] == 'InProgress' or command_info['Status'] == 'Pending':
                pass
                #this is expected
            else: 
                print("\n\n\n\n\ UNEXPECTED BEHAVIOR: The command didn't succeed or just keep running")
                print(command_info['Status'])
                    
    def evaluate_get_response(get_response, key, value):
        stdout_result = get_response['StandardOutputContent']
        print(stdout_result)
        result_list = stdout_result.split(' ')
        idx = result_list.index('result:')
        result = result_list[idx+1]
        if "None" in result:
            return False
        if str(value) in result:
            return True
        else: 
            print("Inconclusive Result: {} for value {}".format(stdout_result, value))
            return False
        
    def wait_until_complete(command):
        sleep(1)
        
        while(True):
            command_info = ssm_client.get_command_invocation(CommandId=command['Command']['CommandId'], 
                                              InstanceId= command['Command']['InstanceIds'][0])
              
            if(command_info['Status'] == "Success"): 
                return True
            if(command_info['Status'] == "Failed" or command_info['Status'] == "TimedOut"):
                print("Print command we were waiting for failed: {}".format(command_info))
                return False
            else: 
                sleep(1)
            

    success_list = []
    
    for key, value in truth_table.items():
        
        churn()
        
        #begin to make get and set requests to judge the success rate
        commands = [set_command(key, value)]
        instance_ids = [instances[set_instance_index].id] #this will be the set instance index
        command_responses[set_instance_index] = execute_commands_on_linux_instances(ssm_client, commands, instance_ids)
        if not wait_until_complete(command_responses[set_instance_index]):
            pass
        
        churn()

        commands = [get_command(key)]
        instance_ids = [instances[get_instance_index].id] #this will be the get instance index
        command_responses[get_instance_index] = execute_commands_on_linux_instances(ssm_client, commands, instance_ids)
        if not wait_until_complete(command_responses[get_instance_index]):
            pass

        get_response = ssm_client.get_command_invocation(CommandId=command_responses[get_instance_index]['Command']['CommandId'], 
                                          InstanceId= command_responses[get_instance_index]['Command']['InstanceIds'][0])
        
        print(get_response['StandardErrorContent'])
        
        if evaluate_get_response(get_response, key, value):
            success_list.append(1)
        else:
            success_list.append(0)
    
    success_rate = sum(success_list)/len(truth_table)
    
    #so we can run again if needed by the script 
    
    cancel_all_commands()
    return success_rate

            
run_nodes(20, 3, instances)


In [None]:
def get_results(ksize, alpha):
    mean_times = [200, 400, 600, 800, 1000, 2000, 3000, 4000]
    for mean_time in mean_times:
        results_dir = "success_rates/ksize_{}_alpha_{}".format(ksize, alpha)
        
        f = open(results_dir + '/meantime_' + str(mean_time), 'w')
        f.write(run_nodes(ksize, alpha, instances))


In [None]:
def figure_1a():
    
    mean_times = [200, 400, 600, 800, 1000, 2000, 3000, 4000]
    results_dir_a = "success_rates/ksize_{}_alpha_{}".format(20, 3)

    success_rates_a = []
    for mean_time in mean_times:
        f = open(results_dir_a + '/meantime_' + str(mean_time), 'r')

        rate = f.read()
        success_rates_a.append(float(rate))
        
    results_dir_b = "success_rates/ksize_{}_alpha_{}".format(3, 3)

    success_rates_b = []
    for mean_time in mean_times:
        f = open(results_dir_b + '/meantime_' + str(mean_time), 'r')

        rate = f.read()
        success_rates_b.append(float(rate))

    style.use('seaborn')

    fig, ax = plt.subplots()
    ksize_20_alpha_3_plot = ax.plot(np.arange(len(mean_times)), success_rates_a, label = "K Value={}, Alpha={}".format(20, 3), color='blue', marker='^')
    ksize_3_alpha_3_plot = ax.plot(np.arange(len(mean_times)), success_rates_b, label = "K Value={}, Alpha={}".format(3, 3), color='red', marker='s')
    
    ax.set_xlabel("Mean Time Online (s)")
    ax.set_ylabel("Success Ratio (%)")
    ax.xaxis.set_ticks(np.arange(len(mean_times)))
    ax.xaxis.set_ticklabels(mean_times)
    ax.set_yticks(np.arange(start=0.5, stop=1.1, step=0.1))
    ax.set_xlim([0,8])
    ax.set_ylim([0.0,1.01])
    ax.legend(labelspacing = 1.25, frameon=True)
    plt.savefig("figures/figure_1a")
    plt.show()

figure_1a()

In [None]:
def figure_2():

    mean_times = [200, 400, 600, 800, 1000, 2000, 3000, 4000]

    success_rates_1 = []
    success_rates_2 = []
    success_rates_3 = []
    success_rates_4 = []
    success_rates_5 = []
    success_rates = [success_rates_1, success_rates_2, success_rates_3, success_rates_4, success_rates_5]

    for index, success_rate in enumerate(success_rates): 

        value = index+1

        results_dir = "success_rates/ksize_{}_alpha_{}".format(value, 1)

        for mean_time in mean_times:
            f = open(results_dir + '/meantime_' + str(mean_time), 'r')

            rate = f.read()
            success_rate.append(float(rate))

    style.use('seaborn')

    fig, ax = plt.subplots()
    ksize_1_alpha_1_plot = ax.plot(np.arange(len(mean_times)), success_rates_1, label = "K Value={}, Alpha={}".format(1, 1), color='black', marker='s')
    ksize_2_alpha_1_plot = ax.plot(np.arange(len(mean_times)), success_rates_2, label = "K Value={}, Alpha={}".format(2, 1), color='blue', marker='^')
    ksize_3_alpha_1_plot = ax.plot(np.arange(len(mean_times)), success_rates_3, label = "K Value={}, Alpha={}".format(3, 1), color='green', marker='o')
    ksize_4_alpha_1_plot = ax.plot(np.arange(len(mean_times)), success_rates_4, label = "K Value={}, Alpha={}".format(4, 1), color='red', marker='s')
    ksize_5_alpha_1_plot = ax.plot(np.arange(len(mean_times)), success_rates_5, label = "K Value={}, Alpha={}".format(5, 1), color='black', ls = ":", marker='^')



    ax.set_xlabel("Mean Time Online (s)")
    ax.set_ylabel("Success Ratio (%)")
    ax.xaxis.set_ticks(np.arange(len(mean_times)))
    ax.xaxis.set_ticklabels(mean_times)
    ax.set_yticks(np.arange(start=0.5, stop=1.1, step=0.1))
    ax.set_xlim([0,8])
    ax.set_ylim([0.0,1.01])
    ax.legend(labelspacing = 1.25, frameon=True)
    plt.savefig("figures/figure_2")
    plt.show()

figure_2()