In [2]:
import os
import logging
from io import BytesIO
import time
import zipfile
import numpy as np
import boto3
from datetime import datetime, timezone
from time import gmtime, strftime
import json
import pandas as pd
import matplotlib.pyplot as plt
import pickle

In [48]:
import math

In [3]:
# The difference between UTC and local timezone
timezone_offset = 0

### Function Name List

In [17]:
function_prefix = "AppMdl"
function_count = 16
function_name_list = [function_prefix+'_f'+str(i) for i in range(1, function_count+1)]
print(function_name_list)

['AppMdl_f1', 'AppMdl_f2', 'AppMdl_f3', 'AppMdl_f4', 'AppMdl_f5', 'AppMdl_f6', 'AppMdl_f7', 'AppMdl_f8', 'AppMdl_f9', 'AppMdl_f10', 'AppMdl_f11', 'AppMdl_f12', 'AppMdl_f13', 'AppMdl_f14', 'AppMdl_f15', 'AppMdl_f16']


In [19]:
mem_config_list={
    'f1':1536,
    'f2':1792,
    'f3':576,
    'f4':2240,
    'f5':896,
    'f6':1728,
    'f7':128,
    'f8':128,
    'f9':256,
    'f10':320,
    'f11':1920,
    'f12':1984,
    'f13':1088,
    'f14':640,
    'f15':896,
    'f16':1088
}
function_count = 16

# Execute the Application

In [3]:
sfn_client = boto3.client('stepfunctions')


In [4]:
stateMachineArn='arn:aws:states:us-east-2:499537426559:stateMachine:AppMdl'

## Test Run

In [5]:
sfn_client.start_execution(
    stateMachineArn=stateMachineArn,
    input="{\"para1\":96, \"para2\":[1,2,3,4,100], \"para4\":64, \"para6\":[96,97,1,1,2,3,4,5,6]}"
)

{'executionArn': 'arn:aws:states:us-east-2:499537426559:express:AppMdl:a2394588-0f07-49d6-96ab-46e8eeaf1873:13e747b3-83ed-44d4-ad0b-5d6de53b7144',
 'startDate': datetime.datetime(2021, 12, 26, 21, 45, 53, 78000, tzinfo=tzlocal()),
 'ResponseMetadata': {'RequestId': '79103f11-2f24-4b98-91e6-59d4f500af84',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '79103f11-2f24-4b98-91e6-59d4f500af84',
   'content-type': 'application/x-amz-json-1.0',
   'content-length': '174'},
  'RetryAttempts': 0}}

## Configure Logging

In [6]:
logging.basicConfig(filename='AppExecution.log', encoding='utf-8', format='%(asctime)s.%(msecs)03d %(message)s', datefmt='%Y-%m-%d %H:%M:%S', level=logging.INFO)

## Execute AppMdl

In [7]:
np.random.seed(256)

In [None]:
for i in range(5000):
    payload_str="{"+ "\"para1\":{}, \"para2\":{}, \"para4\":{}, \"para6\":{}".format(
            np.random.randint(1, 101),
            list(np.random.randint(1, 101, 20)), 
            np.random.randint(1, 101),
            list(np.random.randint(1, 101, 20))
    ) +"}"
    response = sfn_client.start_execution(stateMachineArn=stateMachineArn, input=payload_str)
    RequestId = response.get('ResponseMetadata', {}).get('RequestId')
    StatusCode = response.get('ResponseMetadata', {}).get('HTTPStatusCode', 'ERR')
    logging.info(f'{i+1} {StatusCode} {RequestId}')
    time.sleep(10)

## Get the start time and the end time

In [84]:
app_exeuction_start_time = ' '.join(os.popen('head -1 AppExecution.log').read().split(' ')[:2])
app_execution_end_time = ' '.join(os.popen('tail -1 AppExecution.log').read().split(' ')[:2])
app_exeuction_start_time = datetime.strptime(app_exeuction_start_time, '%Y-%m-%d %H:%M:%S.%f')
app_execution_end_time = datetime.strptime(app_execution_end_time, '%Y-%m-%d %H:%M:%S.%f')

In [85]:
app_exeuction_start_time

datetime.datetime(2021, 12, 26, 21, 46, 17, 476000)

In [86]:
app_execution_end_time

datetime.datetime(2021, 12, 27, 11, 52, 6, 760000)

In [87]:
app_exeuction_start_time = int(datetime.timestamp(app_exeuction_start_time))

In [88]:
app_execution_end_time = int(datetime.timestamp(app_execution_end_time)) + 20

# Retrieve Logs

In [22]:
logclient = boto3.client('logs')

## Query Step Functions Logs

In [90]:
query_sfn_AppMdl = logclient.start_query(
    logGroupName='/aws/vendedlogs/states/{}-Logs'.format('AppMdl'),
    queryString="fields type, @timestamp| filter type = 'ExecutionStarted' or type = 'ExecutionSucceeded' | sort id desc",
    startTime=app_exeuction_start_time,
    endTime=app_execution_end_time,
    limit = 10000
)

In [91]:
query_results_sfn_AppMdl = logclient.get_query_results(
    queryId=query_sfn_AppMdl['queryId']
)

In [101]:
AppMdl_starttimestamp = np.sort([datetime.timestamp(datetime.strptime(item[1]['value'], '%Y-%m-%d %H:%M:%S.%f'))+timezone_offset*3600 for item in query_results_sfn_AppMdl['results'] if item[0]['value']=='ExecutionStarted'])
AppMdl_endtimestamp = np.sort([datetime.timestamp(datetime.strptime(item[1]['value'], '%Y-%m-%d %H:%M:%S.%f'))+timezone_offset*3600 for item in query_results_sfn_AppMdl['results'] if item[0]['value']=='ExecutionSucceeded'])

In [102]:
pd.DataFrame({'Start': AppMdl_starttimestamp, 'End':AppMdl_endtimestamp}).to_csv('AppMdl_SFN_Logs.csv', index=False)


In [4]:
AppMdl_sfn_logs = pd.read_csv('AppMdl_SFN_Logs.csv', low_memory=False)

In [5]:
AppMdl_sfn_logs.shape

(5000, 2)

## Query Lambda Function Logs

In [12]:
timestamp_list = [' '.join(os.popen(f'head -1 AppExecution.log').read().split(' ')[:2])]
for i in range(1000, 6000, 1000):
    timestamp_list.append(' '.join(os.popen(f'head -{i} AppExecution.log | tail -1').read().split(' ')[:2]))

### Functions for parsing Logs

In [13]:
def lambda_report_log_to_dict(log):
    res={}
    lis=[item.split(': ') for item in log[1]['value'].split('\t')]
    res['RequestId']=lis[0][1]
    res['Duration']=float(lis[1][1].split(' ')[0])
    res['Billed_Duration']=int(lis[2][1].split(' ')[0])
    res['Memory_Size']=int(lis[3][1].split(' ')[0])
    res['Max_Memory_Used']=int(lis[4][1].split(' ')[0])
    res['UTC_Timestamp'] = time.mktime(datetime.strptime(log[0]['value'], "%Y-%m-%d %H:%M:%S.%f").timetuple()) +timezone_offset*3600
    return res

In [14]:
timestamp_list

['2021-12-26 21:46:17.476',
 '2021-12-27 00:35:18.365',
 '2021-12-27 03:24:29.190',
 '2021-12-27 06:13:42.231',
 '2021-12-27 09:02:53.925',
 '2021-12-27 11:52:06.760']

In [15]:
def process_time_string(s: str) -> int:
    t = datetime.strptime(s, '%Y-%m-%d %H:%M:%S.%f')
    t = datetime.timestamp(t)
    return int(t)

### Prepare and Retrieve Logs

In [20]:
AppMdl_lambda_logs_dict = {'f'+str(i):[] for i in range(1, function_count+1)}

In [None]:
query_lambda = []
for function in function_name_list:
    query_lambda.append(logclient.start_query(
        logGroupName='/aws/lambda/{}'.format(function),
        queryString="fields @timestamp, @message| filter @message like 'REPORT'| sort @timestamp asc",
        startTime=process_time_string('2021-12-26 21:46:17.476') - 5,
        endTime=process_time_string('2021-12-27 00:35:18.365') - 5,
        limit=10000
        ))
    time.sleep(4)
time.sleep(10)
query_lambda_results = []
for q in query_lambda:
    query_lambda_results.append(logclient.get_query_results(
        queryId=q['queryId']
    ))
    time.sleep(4)
with open('query_lambda_results_part1.pickle', 'wb') as f:
    f.write(pickle.dumps(query_lambda_results))
for i in range(1, function_count+1):
    AppMdl_lambda_logs_dict['f'+str(i)] = [lambda_report_log_to_dict(item) for item in query_lambda_results[i-1]['results']]
    for item in AppMdl_lambda_logs_dict['f'+str(i)]:
        item['Function']='f'+str(i)

In [36]:
len(AppMdl_lambda_logs_dict['f1'])

5000

In [31]:
query_lambda = []
for function in function_name_list:
    query_lambda.append(logclient.start_query(
        logGroupName='/aws/lambda/{}'.format(function),
        queryString="fields @timestamp, @message| filter @message like 'REPORT'| sort @timestamp asc",
        startTime=process_time_string('2021-12-27 00:35:18.365') - 5,
        endTime=process_time_string('2021-12-27 03:24:29.190') - 5,
        limit=10000
        ))
    time.sleep(4)
query_lambda_results = []
for q in query_lambda:
    query_lambda_results.append(logclient.get_query_results(
        queryId=q['queryId']
    ))
    time.sleep(4)
with open('query_lambda_results_part2.pickle', 'wb') as f:
    f.write(pickle.dumps(query_lambda_results))
for i in range(1, function_count+1):
    AppMdl_lambda_logs_dict['f'+str(i)] += [lambda_report_log_to_dict(item) for item in query_lambda_results[i-1]['results']]
    for item in AppMdl_lambda_logs_dict['f'+str(i)]:
        item['Function']='f'+str(i)

In [32]:
query_lambda = []
for function in function_name_list:
    query_lambda.append(logclient.start_query(
        logGroupName='/aws/lambda/{}'.format(function),
        queryString="fields @timestamp, @message| filter @message like 'REPORT'| sort @timestamp asc",
        startTime=process_time_string('2021-12-27 03:24:29.190') - 5,
        endTime=process_time_string('2021-12-27 06:13:42.231') - 5,
        limit=10000
        ))
    time.sleep(4)
query_lambda_results = []
for q in query_lambda:
    query_lambda_results.append(logclient.get_query_results(
        queryId=q['queryId']
    ))
    time.sleep(4)
with open('query_lambda_results_part3.pickle', 'wb') as f:
    f.write(pickle.dumps(query_lambda_results))
for i in range(1, function_count+1):
    AppMdl_lambda_logs_dict['f'+str(i)] += [lambda_report_log_to_dict(item) for item in query_lambda_results[i-1]['results']]
    for item in AppMdl_lambda_logs_dict['f'+str(i)]:
        item['Function']='f'+str(i)

In [33]:
query_lambda = []
for function in function_name_list:
    query_lambda.append(logclient.start_query(
        logGroupName='/aws/lambda/{}'.format(function),
        queryString="fields @timestamp, @message| filter @message like 'REPORT'| sort @timestamp asc",
        startTime=process_time_string('2021-12-27 06:13:42.231') - 5,
        endTime=process_time_string('2021-12-27 09:02:53.925') - 5,
        limit=10000
        ))
    time.sleep(4)
query_lambda_results = []
for q in query_lambda:
    query_lambda_results.append(logclient.get_query_results(
        queryId=q['queryId']
    ))
    time.sleep(4)
with open('query_lambda_results_part4.pickle', 'wb') as f:
    f.write(pickle.dumps(query_lambda_results))
for i in range(1, function_count+1):
    AppMdl_lambda_logs_dict['f'+str(i)] += [lambda_report_log_to_dict(item) for item in query_lambda_results[i-1]['results']]
    for item in AppMdl_lambda_logs_dict['f'+str(i)]:
        item['Function']='f'+str(i)

In [34]:
query_lambda = []
for function in function_name_list:
    query_lambda.append(logclient.start_query(
        logGroupName='/aws/lambda/{}'.format(function),
        queryString="fields @timestamp, @message| filter @message like 'REPORT'| sort @timestamp asc",
        startTime=process_time_string('2021-12-27 09:02:53.925') - 5,
        endTime=process_time_string('2021-12-27 11:52:06.760') + 5,
        limit=10000
        ))
    time.sleep(4)
query_lambda_results = []
for q in query_lambda:
    query_lambda_results.append(logclient.get_query_results(
        queryId=q['queryId']
    ))
    time.sleep(4)
with open('query_lambda_results_part5.pickle', 'wb') as f:
    f.write(pickle.dumps(query_lambda_results))
for i in range(1, function_count+1):
    AppMdl_lambda_logs_dict['f'+str(i)] += [lambda_report_log_to_dict(item) for item in query_lambda_results[i-1]['results']]
    for item in AppMdl_lambda_logs_dict['f'+str(i)]:
        item['Function']='f'+str(i)

In [35]:
with open('AppMdl_lambda_logs_dict.pickle', 'wb') as f:
    f.write(pickle.dumps(AppMdl_lambda_logs_dict))

#### Convert Logs into DataFrame and Save as CSV

In [37]:
AppMdl_lambda_logs=pd.DataFrame()
for i in range(1, function_count+1):
    AppMdl_lambda_logs = AppMdl_lambda_logs.append(pd.DataFrame(AppMdl_lambda_logs_dict['f'+str(i)]))
AppMdl_lambda_logs.index=range(AppMdl_lambda_logs.shape[0])
AppMdl_lambda_logs=AppMdl_lambda_logs[['Function', 'Memory_Size', 'Max_Memory_Used', 'Duration', 'Billed_Duration', 'UTC_Timestamp', 'RequestId']]
AppMdl_lambda_logs.to_csv('AppMdl_lambda_logs.csv',index=False)

In [38]:
AppMdl_lambda_logs = pd.read_csv('AppMdl_lambda_logs.csv', low_memory=False)
AppMdl_lambda_logs.columns = ['Function', 'Memory_Size', 'Max_Memory_Used', 'Duration', 'Billed_Duration', 'UTCTimestamp', 'RequestId']

In [40]:
AppMdl_lambda_logs.head()

Unnamed: 0,Function,Memory_Size,Max_Memory_Used,Duration,Billed_Duration,UTCTimestamp,RequestId
0,f1,1536,38,617.06,618,1640555000.0,6d6a9102-db93-4699-a7af-514cfbef0265
1,f1,1536,38,712.13,713,1640555000.0,03d04889-047d-4167-aa17-305875b4abc4
2,f1,1536,38,600.1,601,1640555000.0,a2fa4433-9689-403e-984f-73701bedab12
3,f1,1536,38,737.77,738,1640555000.0,a58f8cb7-7f6e-4238-a4de-8913e8be059d
4,f1,1536,39,603.87,604,1640555000.0,8dc0af8c-9944-49cd-ae3d-8b3d28cfb42d


In [41]:
for i in range(1, function_count+1):
    print(f"f{i}", AppMdl_lambda_logs.query(f"Function == 'f{i}'").shape[0], AppMdl_lambda_logs.query(f"Function == 'f{i}'")['Duration'].mean())

f1 5000 691.541994
f2 5000 205.230394
f3 5588 526.5487276306371
f4 3017 842.1539211136891
f5 1983 208.11401916288452
f6 5588 518.4042358625626
f7 5000 322.995504
f8 49990 212.44284736947384
f9 4466 147.43433049708912
f10 1122 363.4150445632799
f11 1983 900.0714876449823
f12 1983 784.3699646999496
f13 1983 381.93210287443264
f14 49990 232.4744122824565
f15 4674 273.6729888746257
f16 4466 184.72936632333187


In [42]:
def calculate_cost(rt: float, mem: float, pmms: float = 0.0000166667/1024/1000, ppi: float = 0.0000002) -> float:
    return math.ceil(rt) * mem * pmms + ppi

def adjacent_values(vals, q1, q3):
    upper_adjacent_value = q3 + (q3 - q1) * 1.5
    upper_adjacent_value = np.clip(upper_adjacent_value, q3, vals[-1])

    lower_adjacent_value = q1 - (q3 - q1) * 1.5
    lower_adjacent_value = np.clip(lower_adjacent_value, vals[0], q1)
    return lower_adjacent_value, upper_adjacent_value

# End-to-end RT Reported by AWS

In [100]:
AppMdl_duration = pd.DataFrame((AppMdl_sfn_logs['End'] - AppMdl_sfn_logs['Start'])*1000, columns=['Duration'])
AppMdl_duration.to_csv('AppMdl_duration_aws.csv', index=False)
AppMdl_duration = pd.read_csv('AppMdl_duration_aws.csv', low_memory=False)

In [101]:
print('Number of Executions: ', len(AppMdl_duration['Duration']))
AppMdl_avg_duration_aws = np.mean(AppMdl_duration['Duration'])
AppMdl_mid_duration_aws = np.median(AppMdl_duration['Duration'])
AppMdl_percentile10_aws = np.percentile(AppMdl_duration['Duration'], 10)
AppMdl_percentile90_aws = np.percentile(AppMdl_duration['Duration'], 90)
print('Average Duration Reported by AWS: ', AppMdl_avg_duration_aws, 'ms')
print('Median Duration Reported by AWS: ', AppMdl_mid_duration_aws, 'ms')
print('10-th percentile of Duration Reported by AWS: ', AppMdl_percentile10_aws, 'ms')
print('90-th percentile Duration Reported by AWS: ', AppMdl_percentile90_aws, 'ms')
print('Standard Deviation of Duration Reported by AWS: ', np.std(AppMdl_duration['Duration']), 'ms')

Number of Executions:  5000
Average Duration Reported by AWS:  3482.393802547455 ms
Median Duration Reported by AWS:  3309.000015258789 ms
10-th percentile of Duration Reported by AWS:  3099.9999046325684 ms
90-th percentile Duration Reported by AWS:  4466.100072860718 ms
Standard Deviation of Duration Reported by AWS:  634.7561576851803 ms


# Cost Reported by AWS

In [67]:
AppMdl_sfn_logs.head()

Unnamed: 0,Start,End
0,1640555000.0,1640555000.0
1,1640555000.0,1640555000.0
2,1640555000.0,1640555000.0
3,1640555000.0,1640555000.0
4,1640555000.0,1640555000.0


In [46]:
AppMdl_lambda_logs.head()

Unnamed: 0,Function,Memory_Size,Max_Memory_Used,Duration,Billed_Duration,UTCTimestamp,RequestId
0,f1,1536,38,617.06,618,1640555000.0,6d6a9102-db93-4699-a7af-514cfbef0265
1,f1,1536,38,712.13,713,1640555000.0,03d04889-047d-4167-aa17-305875b4abc4
2,f1,1536,38,600.1,601,1640555000.0,a2fa4433-9689-403e-984f-73701bedab12
3,f1,1536,38,737.77,738,1640555000.0,a58f8cb7-7f6e-4238-a4de-8913e8be059d
4,f1,1536,39,603.87,604,1640555000.0,8dc0af8c-9944-49cd-ae3d-8b3d28cfb42d


In [49]:
cost_list = []
for index, row in AppMdl_sfn_logs.iterrows():
    cost = 0
    app_start = row['Start'] - 2
    app_end = row['End'] + 2
    lambda_logs = AppMdl_lambda_logs.query(f"""UTCTimestamp>{app_start} and UTCTimestamp<{app_end}""")
    for i, r in lambda_logs.iterrows():
        memory_size = r['Memory_Size']
        duration = r['Duration']
        cost += calculate_cost(rt=duration, mem=memory_size) * 1000000
    cost_list.append(cost)

In [50]:
AppMdl_avg_cost_aws = np.mean(cost_list)
AppMdl_mid_cost_aws = np.median(cost_list)
AppMdl_percentile10_cost_aws = np.percentile(cost_list, 10)
AppMdl_percentile90_cost_aws = np.percentile(cost_list, 90)
AppMdl_std_cost_aws = np.std(cost_list)
print('Average Cost Reported by AWS: ', AppMdl_avg_cost_aws, 'USD')
print('Median Cost Reported by AWS: ', AppMdl_mid_cost_aws, 'USD')
print('10-th percentile of Cost Reported by AWS: ', AppMdl_percentile10_cost_aws, 'USD')
print('90-th percentile Cost Reported by AWS: ', AppMdl_percentile90_cost_aws, 'USD')
print('Standard Deviation of Cost Reported by AWS: ', AppMdl_std_cost_aws, 'USD')

Average Cost Reported by AWS:  131.88937225460123 USD
Median Cost Reported by AWS:  120.56533369687502 USD
10-th percentile of Cost Reported by AWS:  111.80614959187497 USD
90-th percentile Cost Reported by AWS:  152.83300119874997 USD
Standard Deviation of Cost Reported by AWS:  20.16068515599438 USD


# End-to-end RT and Cost Derived from the Modeling Algorithm

In [51]:
import sys
sys.path.append('../../')
from slappsim.Structures import *
from slappsim.Function import *
from slappsim.PetriApp import *
from slappsim.States import *

In [88]:
performance_profile = pd.read_csv('AppMdl_lambda_logs_performance_profile.csv', low_memory=False)
performance_profile.columns = ['Function', 'Memory_Size', 'Max_Memory_Used', 'Duration', 'Billed_Duration',
                              'UTCTimestamp', 'RequestId']
scheduling_overhead = pd.read_csv('../sfn-delay/Scheduling_Overhead.csv')
scheduling_overhead = np.array(scheduling_overhead['scheduling_overhead'].to_list())
function_execution_delay = pd.read_csv('../sfn-delay/Function_Execution_Delay.csv')
function_execution_delay = np.array(function_execution_delay['Duration'].to_list())

In [92]:
rs = np.random.RandomState(64)
random.seed(64)

## Define the application

In [93]:
f1_rt = np.array(performance_profile.query(f"Function=='f1'")['Duration'].to_list()[500:9501])
f1_pp_fun = partial(rs.choice, a=f1_rt)
f1 = Function(pf_fun=f1_pp_fun, mem=mem_config_list['f1'], name='f1')
f2_rt = np.array(performance_profile.query(f"Function=='f2'")['Duration'].to_list()[500:9501])
f2_pp_fun = partial(rs.choice, a=f2_rt)
f2 = Function(pf_fun=f2_pp_fun, mem=mem_config_list['f2'], name='f2')
f3_rt = np.array(performance_profile.query(f"Function=='f3'")['Duration'].to_list()[500:9501])
f3_pp_fun = partial(rs.choice, a=f3_rt)
f3 = Function(pf_fun=f3_pp_fun, mem=mem_config_list['f3'], name='f3')
f4_rt = np.array(performance_profile.query(f"Function=='f4'")['Duration'].to_list()[500:9501])
f4_pp_fun = partial(rs.choice, a=f4_rt)
f4 = Function(pf_fun=f4_pp_fun, mem=mem_config_list['f4'], name='f4')
f5_rt = np.array(performance_profile.query(f"Function=='f5'")['Duration'].to_list()[500:9501])
f5_pp_fun = partial(rs.choice, a=f5_rt)
f5 = Function(pf_fun=f5_pp_fun, mem=mem_config_list['f5'], name='f5')
f6_rt = np.array(performance_profile.query(f"Function=='f6'")['Duration'].to_list()[500:9501])
f6_pp_fun = partial(rs.choice, a=f6_rt)
f6 = Function(pf_fun=f6_pp_fun, mem=mem_config_list['f6'], name='f6')
f7_rt = np.array(performance_profile.query(f"Function=='f7'")['Duration'].to_list()[500:9501])
f7_pp_fun = partial(rs.choice, a=f7_rt)
f7 = Function(pf_fun=f7_pp_fun, mem=mem_config_list['f7'], name='f7')
f8_rt = np.array(performance_profile.query(f"Function=='f8'")['Duration'].to_list()[500:9501])
f8_pp_fun = partial(rs.choice, a=f8_rt)
f8 = Function(pf_fun=f8_pp_fun, mem=mem_config_list['f8'], name='f8')
f9_rt = np.array(performance_profile.query(f"Function=='f9'")['Duration'].to_list()[500:9501])
f9_pp_fun = partial(rs.choice, a=f9_rt)
f9 = Function(pf_fun=f9_pp_fun, mem=mem_config_list['f9'], name='f9')
f10_rt = np.array(performance_profile.query(f"Function=='f10'")['Duration'].to_list()[500:9501])
f10_pp_fun = partial(rs.choice, a=f10_rt)
f10 = Function(pf_fun=f10_pp_fun, mem=mem_config_list['f10'], name='f10')
f11_rt = np.array(performance_profile.query(f"Function=='f11'")['Duration'].to_list()[500:9501])
f11_pp_fun = partial(rs.choice, a=f11_rt)
f11 = Function(pf_fun=f11_pp_fun, mem=mem_config_list['f11'], name='f11')
f12_rt = np.array(performance_profile.query(f"Function=='f12'")['Duration'].to_list()[500:9501])
f12_pp_fun = partial(rs.choice, a=f12_rt)
f12 = Function(pf_fun=f12_pp_fun, mem=mem_config_list['f12'], name='f12')
f13_rt = np.array(performance_profile.query(f"Function=='f13'")['Duration'].to_list()[500:9501])
f13_pp_fun = partial(rs.choice, a=f13_rt)
f13 = Function(pf_fun=f13_pp_fun, mem=mem_config_list['f13'], name='f13')
f14_rt = np.array(performance_profile.query(f"Function=='f14'")['Duration'].to_list()[500:9501])
f14_pp_fun = partial(rs.choice, a=f14_rt)
f14 = Function(pf_fun=f14_pp_fun, mem=mem_config_list['f14'], name='f14')
f15_rt = np.array(performance_profile.query(f"Function=='f15'")['Duration'].to_list()[500:9501])
f15_pp_fun = partial(rs.choice, a=f15_rt)
f15 = Function(pf_fun=f15_pp_fun, mem=mem_config_list['f15'], name='f15')
f16_rt = np.array(performance_profile.query(f"Function=='f16'")['Duration'].to_list()[500:9501])
f16_pp_fun = partial(rs.choice, a=f16_rt)
f16 = Function(pf_fun=f16_pp_fun, mem=mem_config_list['f16'], name='f16')
sfn_scheduling_overhead_fun = partial(rs.choice, a=scheduling_overhead)
function_execution_delay_fun = partial(rs.choice, a=function_execution_delay)
delays = {'FunctionExecution': function_execution_delay_fun, 'SchedulingOverhead': sfn_scheduling_overhead_fun}
start = Start()
end = End()
pass1 = Pass()
pass2 = Pass()
sequence1_parallel2 = Sequence(actions=[f11])
sequence2_parallel2 = Sequence(actions=[f12, f13])
parallel2 = Parallel(branches=[sequence1_parallel2, sequence2_parallel2])
sequence1_choice1 = Sequence(actions=[f4])
sequence2_choice1 = Sequence(actions=[f5, parallel2])
choice1 = Choice(choices=[sequence1_choice1, sequence2_choice1], probabilities=[0.6, 0.4])
sequence1_parallel1 = Sequence(actions=[f2, choice1, pass1])
choice6 = Choice(choices=[f16, f15], probabilities=[0.95, 0.05], end=False)
sequence1_choice4 = Sequence(actions=[f9, f15, choice6, f16])
sequence2_choice4 = Sequence(actions=[f10])
choice4 = Choice(choices=[sequence1_choice4, sequence2_choice4], probabilities=[0.8, 0.2])
choice2 = Choice(choices=[pass2, f3], probabilities=[0.9, 0.1], end=False)
sequence2_parallel1 = Sequence(actions=[f3, choice4, f6, choice2, pass2])
parallel1 = Parallel(branches=[sequence1_parallel1, sequence2_parallel1])
sequence_map1 = Sequence(actions=[f8, f14])
map1 = Map(sequence=sequence_map1, iterations=10)
sequence1 = Sequence(actions=[f1, parallel1, f7, map1])
structures = [pass1, pass2, sequence1_parallel2, sequence2_parallel2, parallel2, sequence1_choice1, sequence2_choice1,
              choice1, sequence1_parallel1, choice6, sequence1_choice4, sequence2_choice4, choice4, choice2,
              sequence2_parallel1, parallel1, sequence_map1, map1, sequence1]
i1 = InArc(place=start)
o1 = OutArc(place=sequence1.structure_start)
t1 = Transition(in_arcs=[i1], out_arcs=[o1])
i2 = InArc(place=sequence1.structure_end)
o2 = OutArc(place=end)
t2 = Transition(in_arcs=[i2], out_arcs=[o2])
transitions = [t1, t2]
transitions += sequence1.transitions
appmdl = PetriApp(transitions=transitions,
                  functions=[f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15, f16],
                  structures=structures,
                  delays=delays)

## Run the modeling algorithm

In [94]:
ert = []
ec = []
for i in range(100000):
    rt, c, s, logs = appmdl.execute()
    ert.append(rt)
    ec.append(c * 1000000)
    appmdl.reset()
appmdl_avg_cost_mdl = np.mean(ec)
appmdl_mid_cost_mdl = np.median(ec)
appmdl_percentile10_cost_mdl = np.percentile(ec, 10)
appmdl_percentile90_cost_mdl = np.percentile(ec, 90)
appmdl_std_cost_mdl = np.std(ec)
appmdl_avg_ert_mdl = np.mean(ert)
appmdl_mid_ert_mdl = np.median(ert)
appmdl_percentile10_ert_mdl = np.percentile(ert, 10)
appmdl_percentile90_ert_mdl = np.percentile(ert, 90)
appmdl_std_ert_mdl = np.std(ert)
print('Average Duration Reported by Algorithm: ', appmdl_avg_ert_mdl, 'ms')
print('Median Duration Reported by Algorithm: ', appmdl_mid_ert_mdl, 'ms')
print('10-th percentile of Duration Reported by Algorithm: ', appmdl_percentile10_ert_mdl, 'ms')
print('90-th percentile Duration Reported by Algorithm: ', appmdl_percentile90_ert_mdl, 'ms')
print('Standard Deviation of Duration Reported by Algorithm: ', appmdl_std_ert_mdl, 'ms')
print('Average Cost Reported by Algorithm: ', appmdl_avg_cost_mdl, 'USD')
print('Median Cost Reported by Algorithm: ', appmdl_mid_cost_mdl, 'USD')
print('10-th percentile of Cost Reported by Algorithm: ', appmdl_percentile10_cost_mdl, 'USD')
print('90-th percentile Cost Reported by Algorithm: ', appmdl_percentile90_cost_mdl, 'USD')
print('Standard Deviation of Cost Reported by Algorithm: ', appmdl_std_cost_mdl, 'USD')

Average Duration Reported by Algorithm:  3481.3801966017204 ms
Median Duration Reported by Algorithm:  3313.209970436096 ms
10-th percentile of Duration Reported by Algorithm:  3089.8916997833244 ms
90-th percentile Duration Reported by Algorithm:  4466.090224876404 ms
Standard Deviation of Duration Reported by Algorithm:  623.250203312969 ms
Average Cost Reported by Algorithm:  133.26451374483406 USD
Median Cost Reported by Algorithm:  122.66846330312501 USD
10-th percentile of Cost Reported by Algorithm:  112.2168795 USD
90-th percentile Cost Reported by Algorithm:  155.7523820375 USD
Standard Deviation of Cost Reported by Algorithm:  20.597538614983776 USD


In [104]:
AppMdl_cost_aws = pd.DataFrame(pd.Series(cost_list), columns=['Cost'])
AppMdl_cost_aws.to_csv('AppMdl_cost_aws.csv', index=False)
AppMdl_duration_model = pd.DataFrame(pd.Series(ert), columns=['Duration'])
AppMdl_duration_model.to_csv('AppMdl_duration_model.csv', index=False)
AppMdl_cost_model = pd.DataFrame(pd.Series(ec), columns=['Cost'])
AppMdl_cost_model.to_csv('AppMdl_cost_model.csv', index=False)