In [1]:
import os
import logging
import time
import numpy as np
import boto3
from datetime import datetime
import pandas as pd
import pickle

In [2]:
# The difference between UTC and local timezone
timezone_offset = 0

This notebook is for measuring the function execution delay and scheduling overhead of AWS Lambda and AWS Step Functions.

# Execute the Application

In [3]:
sfn_client = boto3.client('stepfunctions')

In [4]:
stateMachineArn='arn:aws:states:us-east-2:499537426559:stateMachine:TextProcessingApp'

## Get the start time and the end time

In [5]:
app_exeuction_start_time = ' '.join(os.popen('head -1 AppExecution.log').read().split(' ')[:2])
app_execution_end_time = ' '.join(os.popen('tail -1 AppExecution.log').read().split(' ')[:2])
app_exeuction_start_time = datetime.strptime(app_exeuction_start_time, '%Y-%m-%d %H:%M:%S.%f')
app_execution_end_time = datetime.strptime(app_execution_end_time, '%Y-%m-%d %H:%M:%S.%f')

In [6]:
app_exeuction_start_time

datetime.datetime(2022, 9, 29, 7, 30, 38, 162000)

In [7]:
app_execution_end_time

datetime.datetime(2022, 9, 29, 13, 16, 48, 741000)

In [8]:
app_exeuction_start_time = int(datetime.timestamp(app_exeuction_start_time)) -10

In [9]:
app_execution_end_time = int(datetime.timestamp(app_execution_end_time)) + 20

# Retrieve Logs

In [10]:
logclient = boto3.client('logs')

## Query Step Functions Logs for App Execution Time

In [44]:
query_sfn_TextProcessingAppDelay = logclient.start_query(
    logGroupName='/aws/vendedlogs/states/{}-Logs'.format('TextProcessingApp'),
    queryString="fields id, type, @timestamp| filter id = 1 | sort id desc",
    startTime=app_exeuction_start_time,
    endTime=app_execution_end_time,
    limit = 10000
)

In [45]:
query_results_sfn_TextProcessingAppDelay_execution_start = logclient.get_query_results(
    queryId=query_sfn_TextProcessingAppDelay['queryId']
)

In [13]:
query_sfn_TextProcessingAppDelay = logclient.start_query(
    logGroupName='/aws/vendedlogs/states/{}-Logs'.format('TextProcessingApp'),
    queryString="fields id, type, @timestamp| filter id = 22 | sort id desc",
    startTime=app_exeuction_start_time,
    endTime=app_execution_end_time,
    limit = 10000
)

In [24]:
query_results_sfn_TextProcessingAppDelay_execution_end = logclient.get_query_results(
    queryId=query_sfn_TextProcessingAppDelay['queryId']
)

In [47]:
TextProcessingAppDelay_starttimestamp = np.sort([datetime.timestamp(datetime.strptime(item[2]['value'], '%Y-%m-%d %H:%M:%S.%f'))+timezone_offset*3600 for item in query_results_sfn_TextProcessingAppDelay_execution_start['results'] if item[1]['value']=='ExecutionStarted'])
TextProcessingAppDelay_endtimestamp = np.sort([datetime.timestamp(datetime.strptime(item[2]['value'], '%Y-%m-%d %H:%M:%S.%f'))+timezone_offset*3600 for item in query_results_sfn_TextProcessingAppDelay_execution_end['results'] if item[1]['value']=='ExecutionSucceeded'])

In [52]:
pd.DataFrame({'Start': TextProcessingAppDelay_starttimestamp, 'End':TextProcessingAppDelay_endtimestamp}).to_csv('TextProcessingAppDelay_SFN_Logs.csv', index=False)

In [53]:
TextProcessingAppDelay_sfn_logs = pd.read_csv('TextProcessingAppDelay_SFN_Logs.csv', low_memory=False)

In [54]:
TextProcessingAppDelay_sfn_logs.shape

(2000, 2)

## Query Step Functions Logs for Function Execution Time

### f1

In [143]:
query_sfn_TextProcessingAppDelay_f1_entered = logclient.start_query(
    logGroupName='/aws/vendedlogs/states/{}-Logs'.format('TextProcessingApp'),
    queryString="fields id, type, @timestamp| filter id = 2 | sort id desc",
    startTime=app_exeuction_start_time,
    endTime=app_execution_end_time,
    limit = 10000
)
time.sleep(1)
query_sfn_TextProcessingAppDelay_f1_scheduled = logclient.start_query(
    logGroupName='/aws/vendedlogs/states/{}-Logs'.format('TextProcessingApp'),
    queryString="fields id, type, @timestamp| filter id = 3 | sort id desc",
    startTime=app_exeuction_start_time,
    endTime=app_execution_end_time,
    limit = 10000
)
time.sleep(1)
query_sfn_TextProcessingAppDelay_f1_start = logclient.start_query(
    logGroupName='/aws/vendedlogs/states/{}-Logs'.format('TextProcessingApp'),
    queryString="fields id, type, @timestamp| filter id = 4 | sort id desc",
    startTime=app_exeuction_start_time,
    endTime=app_execution_end_time,
    limit = 10000
)
time.sleep(1)
query_sfn_TextProcessingAppDelay_f1_end = logclient.start_query(
    logGroupName='/aws/vendedlogs/states/{}-Logs'.format('TextProcessingApp'),
    queryString="fields id, type, @timestamp| filter id = 6 | sort id desc",
    startTime=app_exeuction_start_time,
    endTime=app_execution_end_time,
    limit = 10000
)
time.sleep(5)
query_results_sfn_TextProcessingAppDelay_f1_entered = logclient.get_query_results(
    queryId=query_sfn_TextProcessingAppDelay_f1_entered['queryId']
)
query_results_sfn_TextProcessingAppDelay_f1_scheduled = logclient.get_query_results(
    queryId=query_sfn_TextProcessingAppDelay_f1_scheduled['queryId']
)
query_results_sfn_TextProcessingAppDelay_f1_start = logclient.get_query_results(
    queryId=query_sfn_TextProcessingAppDelay_f1_start['queryId']
)
query_results_sfn_TextProcessingAppDelay_f1_end = logclient.get_query_results(
    queryId=query_sfn_TextProcessingAppDelay_f1_end['queryId']
)
TextProcessingAppDelay_f1_entered_timestamp = np.sort([datetime.timestamp(datetime.strptime(item[2]['value'], '%Y-%m-%d %H:%M:%S.%f'))+timezone_offset*3600 for item in query_results_sfn_TextProcessingAppDelay_f1_entered['results'] if item[1]['value']=='TaskStateEntered'])
TextProcessingAppDelay_f1_scheduled_timestamp = np.sort([datetime.timestamp(datetime.strptime(item[2]['value'], '%Y-%m-%d %H:%M:%S.%f'))+timezone_offset*3600 for item in query_results_sfn_TextProcessingAppDelay_f1_scheduled['results'] if item[1]['value']=='TaskScheduled'])
TextProcessingAppDelay_f1_start_timestamp = np.sort([datetime.timestamp(datetime.strptime(item[2]['value'], '%Y-%m-%d %H:%M:%S.%f'))+timezone_offset*3600 for item in query_results_sfn_TextProcessingAppDelay_f1_start['results'] if item[1]['value']=='TaskStarted'])
TextProcessingAppDelay_f1_end_timestamp = np.sort([datetime.timestamp(datetime.strptime(item[2]['value'], '%Y-%m-%d %H:%M:%S.%f'))+timezone_offset*3600 for item in query_results_sfn_TextProcessingAppDelay_f1_end['results'] if item[1]['value']=='TaskStateExited'])
pd.DataFrame({'Entered':TextProcessingAppDelay_f1_entered_timestamp, 'Scheduled': TextProcessingAppDelay_f1_scheduled_timestamp, 'Start': TextProcessingAppDelay_f1_start_timestamp, 'End':TextProcessingAppDelay_f1_end_timestamp}).to_csv('TextProcessingAppDelay_SFN_f1_Logs.csv', index=False)
TextProcessingAppDelay_sfn_f1_logs = pd.read_csv('TextProcessingAppDelay_SFN_f1_Logs.csv', low_memory=False)
TextProcessingAppDelay_sfn_f1_logs.shape

(2000, 4)

### f2

In [150]:
query_sfn_TextProcessingAppDelay_f2_entered = logclient.start_query(
    logGroupName='/aws/vendedlogs/states/{}-Logs'.format('TextProcessingApp'),
    queryString="fields id, type, @timestamp| filter id = 7 | sort id desc",
    startTime=app_exeuction_start_time,
    endTime=app_execution_end_time,
    limit = 10000
)
time.sleep(1)
query_sfn_TextProcessingAppDelay_f2_scheduled = logclient.start_query(
    logGroupName='/aws/vendedlogs/states/{}-Logs'.format('TextProcessingApp'),
    queryString="fields id, type, @timestamp| filter id = 8 | sort id desc",
    startTime=app_exeuction_start_time,
    endTime=app_execution_end_time,
    limit = 10000
)
time.sleep(1)
query_sfn_TextProcessingAppDelay_f2_start = logclient.start_query(
    logGroupName='/aws/vendedlogs/states/{}-Logs'.format('TextProcessingApp'),
    queryString="fields id, type, @timestamp| filter id = 9 | sort id desc",
    startTime=app_exeuction_start_time,
    endTime=app_execution_end_time,
    limit = 10000
)
time.sleep(1)
query_sfn_TextProcessingAppDelay_f2_end = logclient.start_query(
    logGroupName='/aws/vendedlogs/states/{}-Logs'.format('TextProcessingApp'),
    queryString="fields id, type, @timestamp| filter id = 11 | sort id desc",
    startTime=app_exeuction_start_time,
    endTime=app_execution_end_time,
    limit = 10000
)
time.sleep(5)
query_results_sfn_TextProcessingAppDelay_f2_entered = logclient.get_query_results(
    queryId=query_sfn_TextProcessingAppDelay_f2_entered['queryId']
)
query_results_sfn_TextProcessingAppDelay_f2_scheduled = logclient.get_query_results(
    queryId=query_sfn_TextProcessingAppDelay_f2_scheduled['queryId']
)
query_results_sfn_TextProcessingAppDelay_f2_start = logclient.get_query_results(
    queryId=query_sfn_TextProcessingAppDelay_f2_start['queryId']
)
query_results_sfn_TextProcessingAppDelay_f2_end = logclient.get_query_results(
    queryId=query_sfn_TextProcessingAppDelay_f2_end['queryId']
)
TextProcessingAppDelay_f2_entered_timestamp = np.sort([datetime.timestamp(datetime.strptime(item[2]['value'], '%Y-%m-%d %H:%M:%S.%f'))+timezone_offset*3600 for item in query_results_sfn_TextProcessingAppDelay_f2_entered['results'] if item[1]['value']=='TaskStateEntered'])
TextProcessingAppDelay_f2_scheduled_timestamp = np.sort([datetime.timestamp(datetime.strptime(item[2]['value'], '%Y-%m-%d %H:%M:%S.%f'))+timezone_offset*3600 for item in query_results_sfn_TextProcessingAppDelay_f2_scheduled['results'] if item[1]['value']=='TaskScheduled'])
TextProcessingAppDelay_f2_start_timestamp = np.sort([datetime.timestamp(datetime.strptime(item[2]['value'], '%Y-%m-%d %H:%M:%S.%f'))+timezone_offset*3600 for item in query_results_sfn_TextProcessingAppDelay_f2_start['results'] if item[1]['value']=='TaskStarted'])
TextProcessingAppDelay_f2_end_timestamp = np.sort([datetime.timestamp(datetime.strptime(item[2]['value'], '%Y-%m-%d %H:%M:%S.%f'))+timezone_offset*3600 for item in query_results_sfn_TextProcessingAppDelay_f2_end['results'] if item[1]['value']=='TaskStateExited'])
pd.DataFrame({'Entered':TextProcessingAppDelay_f2_entered_timestamp, 'Scheduled': TextProcessingAppDelay_f2_scheduled_timestamp, 'Start': TextProcessingAppDelay_f2_start_timestamp, 'End':TextProcessingAppDelay_f2_end_timestamp}).to_csv('TextProcessingAppDelay_SFN_f2_Logs.csv', index=False)
TextProcessingAppDelay_sfn_f2_logs = pd.read_csv('TextProcessingAppDelay_SFN_f2_Logs.csv', low_memory=False)
TextProcessingAppDelay_sfn_f2_logs.shape

(2000, 4)

### f3

In [151]:
query_sfn_TextProcessingAppDelay_f3_entered = logclient.start_query(
    logGroupName='/aws/vendedlogs/states/{}-Logs'.format('TextProcessingApp'),
    queryString="fields id, type, @timestamp| filter id = 12 | sort id desc",
    startTime=app_exeuction_start_time,
    endTime=app_execution_end_time,
    limit = 10000
)
time.sleep(1)
query_sfn_TextProcessingAppDelay_f3_scheduled = logclient.start_query(
    logGroupName='/aws/vendedlogs/states/{}-Logs'.format('TextProcessingApp'),
    queryString="fields id, type, @timestamp| filter id = 13 | sort id desc",
    startTime=app_exeuction_start_time,
    endTime=app_execution_end_time,
    limit = 10000
)
time.sleep(1)
query_sfn_TextProcessingAppDelay_f3_start = logclient.start_query(
    logGroupName='/aws/vendedlogs/states/{}-Logs'.format('TextProcessingApp'),
    queryString="fields id, type, @timestamp| filter id = 14 | sort id desc",
    startTime=app_exeuction_start_time,
    endTime=app_execution_end_time,
    limit = 10000
)
time.sleep(1)
query_sfn_TextProcessingAppDelay_f3_end = logclient.start_query(
    logGroupName='/aws/vendedlogs/states/{}-Logs'.format('TextProcessingApp'),
    queryString="fields id, type, @timestamp| filter id = 16 | sort id desc",
    startTime=app_exeuction_start_time,
    endTime=app_execution_end_time,
    limit = 10000
)
time.sleep(5)
query_results_sfn_TextProcessingAppDelay_f3_entered = logclient.get_query_results(
    queryId=query_sfn_TextProcessingAppDelay_f3_entered['queryId']
)
query_results_sfn_TextProcessingAppDelay_f3_scheduled = logclient.get_query_results(
    queryId=query_sfn_TextProcessingAppDelay_f3_scheduled['queryId']
)
query_results_sfn_TextProcessingAppDelay_f3_start = logclient.get_query_results(
    queryId=query_sfn_TextProcessingAppDelay_f3_start['queryId']
)
query_results_sfn_TextProcessingAppDelay_f3_end = logclient.get_query_results(
    queryId=query_sfn_TextProcessingAppDelay_f3_end['queryId']
)
TextProcessingAppDelay_f3_entered_timestamp = np.sort([datetime.timestamp(datetime.strptime(item[2]['value'], '%Y-%m-%d %H:%M:%S.%f'))+timezone_offset*3600 for item in query_results_sfn_TextProcessingAppDelay_f3_entered['results'] if item[1]['value']=='TaskStateEntered'])
TextProcessingAppDelay_f3_scheduled_timestamp = np.sort([datetime.timestamp(datetime.strptime(item[2]['value'], '%Y-%m-%d %H:%M:%S.%f'))+timezone_offset*3600 for item in query_results_sfn_TextProcessingAppDelay_f3_scheduled['results'] if item[1]['value']=='TaskScheduled'])
TextProcessingAppDelay_f3_start_timestamp = np.sort([datetime.timestamp(datetime.strptime(item[2]['value'], '%Y-%m-%d %H:%M:%S.%f'))+timezone_offset*3600 for item in query_results_sfn_TextProcessingAppDelay_f3_start['results'] if item[1]['value']=='TaskStarted'])
TextProcessingAppDelay_f3_end_timestamp = np.sort([datetime.timestamp(datetime.strptime(item[2]['value'], '%Y-%m-%d %H:%M:%S.%f'))+timezone_offset*3600 for item in query_results_sfn_TextProcessingAppDelay_f3_end['results'] if item[1]['value']=='TaskStateExited'])
pd.DataFrame({'Entered':TextProcessingAppDelay_f3_entered_timestamp, 'Scheduled': TextProcessingAppDelay_f3_scheduled_timestamp, 'Start': TextProcessingAppDelay_f3_start_timestamp, 'End':TextProcessingAppDelay_f3_end_timestamp}).to_csv('TextProcessingAppDelay_SFN_f3_Logs.csv', index=False)
TextProcessingAppDelay_sfn_f3_logs = pd.read_csv('TextProcessingAppDelay_SFN_f3_Logs.csv', low_memory=False)
TextProcessingAppDelay_sfn_f3_logs.shape

(2000, 4)

### f4

In [152]:
query_sfn_TextProcessingAppDelay_f4_entered = logclient.start_query(
    logGroupName='/aws/vendedlogs/states/{}-Logs'.format('TextProcessingApp'),
    queryString="fields id, type, @timestamp| filter id = 17 | sort id desc",
    startTime=app_exeuction_start_time,
    endTime=app_execution_end_time,
    limit = 10000
)
time.sleep(1)
query_sfn_TextProcessingAppDelay_f4_scheduled = logclient.start_query(
    logGroupName='/aws/vendedlogs/states/{}-Logs'.format('TextProcessingApp'),
    queryString="fields id, type, @timestamp| filter id = 18 | sort id desc",
    startTime=app_exeuction_start_time,
    endTime=app_execution_end_time,
    limit = 10000
)
time.sleep(1)
query_sfn_TextProcessingAppDelay_f4_start = logclient.start_query(
    logGroupName='/aws/vendedlogs/states/{}-Logs'.format('TextProcessingApp'),
    queryString="fields id, type, @timestamp| filter id = 19 | sort id desc",
    startTime=app_exeuction_start_time,
    endTime=app_execution_end_time,
    limit = 10000
)
time.sleep(1)
query_sfn_TextProcessingAppDelay_f4_end = logclient.start_query(
    logGroupName='/aws/vendedlogs/states/{}-Logs'.format('TextProcessingApp'),
    queryString="fields id, type, @timestamp| filter id = 21 | sort id desc",
    startTime=app_exeuction_start_time,
    endTime=app_execution_end_time,
    limit = 10000
)
time.sleep(5)
query_results_sfn_TextProcessingAppDelay_f4_entered = logclient.get_query_results(
    queryId=query_sfn_TextProcessingAppDelay_f4_entered['queryId']
)
query_results_sfn_TextProcessingAppDelay_f4_scheduled = logclient.get_query_results(
    queryId=query_sfn_TextProcessingAppDelay_f4_scheduled['queryId']
)
query_results_sfn_TextProcessingAppDelay_f4_start = logclient.get_query_results(
    queryId=query_sfn_TextProcessingAppDelay_f4_start['queryId']
)
query_results_sfn_TextProcessingAppDelay_f4_end = logclient.get_query_results(
    queryId=query_sfn_TextProcessingAppDelay_f4_end['queryId']
)
TextProcessingAppDelay_f4_entered_timestamp = np.sort([datetime.timestamp(datetime.strptime(item[2]['value'], '%Y-%m-%d %H:%M:%S.%f'))+timezone_offset*3600 for item in query_results_sfn_TextProcessingAppDelay_f4_entered['results'] if item[1]['value']=='TaskStateEntered'])
TextProcessingAppDelay_f4_scheduled_timestamp = np.sort([datetime.timestamp(datetime.strptime(item[2]['value'], '%Y-%m-%d %H:%M:%S.%f'))+timezone_offset*3600 for item in query_results_sfn_TextProcessingAppDelay_f4_scheduled['results'] if item[1]['value']=='TaskScheduled'])
TextProcessingAppDelay_f4_start_timestamp = np.sort([datetime.timestamp(datetime.strptime(item[2]['value'], '%Y-%m-%d %H:%M:%S.%f'))+timezone_offset*3600 for item in query_results_sfn_TextProcessingAppDelay_f4_start['results'] if item[1]['value']=='TaskStarted'])
TextProcessingAppDelay_f4_end_timestamp = np.sort([datetime.timestamp(datetime.strptime(item[2]['value'], '%Y-%m-%d %H:%M:%S.%f'))+timezone_offset*3600 for item in query_results_sfn_TextProcessingAppDelay_f4_end['results'] if item[1]['value']=='TaskStateExited'])
pd.DataFrame({'Entered':TextProcessingAppDelay_f4_entered_timestamp, 'Scheduled': TextProcessingAppDelay_f4_scheduled_timestamp, 'Start': TextProcessingAppDelay_f4_start_timestamp, 'End':TextProcessingAppDelay_f4_end_timestamp}).to_csv('TextProcessingAppDelay_SFN_f4_Logs.csv', index=False)
TextProcessingAppDelay_sfn_f4_logs = pd.read_csv('TextProcessingAppDelay_SFN_f4_Logs.csv', low_memory=False)
TextProcessingAppDelay_sfn_f4_logs.shape

(2000, 4)

## Query Lambda Logs for Function Execution Time

In [59]:
def lambda_report_log_to_dict(log):
    res={}
    lis=[item.split(': ') for item in log[1]['value'].split('\t')]
    res['RequestId']=lis[0][1]
    res['Duration']=float(lis[1][1].split(' ')[0])
    res['Billed_Duration']=int(lis[2][1].split(' ')[0])
    res['Memory_Size']=int(lis[3][1].split(' ')[0])
    res['Max_Memory_Used']=int(lis[4][1].split(' ')[0])
    res['UTC_Timestamp'] = time.mktime(datetime.strptime(log[0]['value'], "%Y-%m-%d %H:%M:%S.%f").timetuple()) +timezone_offset*3600
    return res

### Prepare Logs

In [60]:
query_lambda_f1 = logclient.start_query(
        logGroupName='/aws/lambda/{}'.format("TextProcessingApp_f1"),
        queryString="fields @timestamp, @message| filter @message like 'REPORT'| sort @timestamp asc",
        startTime=app_exeuction_start_time,
        endTime=app_execution_end_time,
        limit=10000
        )
time.sleep(2)
query_lambda_f2 = logclient.start_query(
        logGroupName='/aws/lambda/{}'.format("TextProcessingApp_f2"),
        queryString="fields @timestamp, @message| filter @message like 'REPORT'| sort @timestamp asc",
        startTime=app_exeuction_start_time,
        endTime=app_execution_end_time,
        limit=10000
        )
time.sleep(2)
query_lambda_f3 = logclient.start_query(
        logGroupName='/aws/lambda/{}'.format("TextProcessingApp_f3"),
        queryString="fields @timestamp, @message| filter @message like 'REPORT'| sort @timestamp asc",
        startTime=app_exeuction_start_time,
        endTime=app_execution_end_time,
        limit=10000
        )
time.sleep(2)
query_lambda_f4 = logclient.start_query(
        logGroupName='/aws/lambda/{}'.format("TextProcessingApp_f4"),
        queryString="fields @timestamp, @message| filter @message like 'REPORT'| sort @timestamp asc",
        startTime=app_exeuction_start_time,
        endTime=app_execution_end_time,
        limit=10000
        )
time.sleep(2)

### Retrieve Logs

In [61]:
query_lambda_results_f1 = logclient.get_query_results(
        queryId=query_lambda_f1['queryId']
    )
time.sleep(1)
query_lambda_results_f2 = logclient.get_query_results(
        queryId=query_lambda_f2['queryId']
    )
time.sleep(1)
query_lambda_results_f3 = logclient.get_query_results(
        queryId=query_lambda_f3['queryId']
    )
time.sleep(1)
query_lambda_results_f4 = logclient.get_query_results(
        queryId=query_lambda_f4['queryId']
    )
time.sleep(1)

In [62]:
with open('query_lambda_results_f1.pickle', 'wb') as f:
    f.write(pickle.dumps(query_lambda_results_f1))
with open('query_lambda_results_f2.pickle', 'wb') as f:
    f.write(pickle.dumps(query_lambda_results_f2))
with open('query_lambda_results_f3.pickle', 'wb') as f:
    f.write(pickle.dumps(query_lambda_results_f3))
with open('query_lambda_results_f4.pickle', 'wb') as f:
    f.write(pickle.dumps(query_lambda_results_f4))

In [63]:
TextProcessingAppDelay_duration_lambda_f1 = [lambda_report_log_to_dict(log) for log in query_lambda_results_f1['results']]
TextProcessingAppDelay_duration_lambda_f2 = [lambda_report_log_to_dict(log) for log in query_lambda_results_f2['results']]
TextProcessingAppDelay_duration_lambda_f3 = [lambda_report_log_to_dict(log) for log in query_lambda_results_f3['results']]
TextProcessingAppDelay_duration_lambda_f4 = [lambda_report_log_to_dict(log) for log in query_lambda_results_f4['results']]

In [64]:
TextProcessingAppDelay_duration_lambda_f1 = pd.DataFrame(TextProcessingAppDelay_duration_lambda_f1)
TextProcessingAppDelay_duration_lambda_f2 = pd.DataFrame(TextProcessingAppDelay_duration_lambda_f2)
TextProcessingAppDelay_duration_lambda_f3 = pd.DataFrame(TextProcessingAppDelay_duration_lambda_f3)
TextProcessingAppDelay_duration_lambda_f4 = pd.DataFrame(TextProcessingAppDelay_duration_lambda_f4)

# Analyze the Delay

## Response time reported by Step Functions

In [153]:
TextProcessingAppDelay_duration = pd.DataFrame((TextProcessingAppDelay_sfn_logs['End'] - TextProcessingAppDelay_sfn_logs['Start'])*1000, columns=['Duration'])
TextProcessingAppDelay_duration.to_csv('TextProcessingAppDelay_duration.csv', index=True)
TextProcessingAppDelay_duration = pd.read_csv('TextProcessingAppDelay_duration.csv', low_memory=False, index_col=0)

In [162]:
TextProcessingAppDelay_duration_f1 = pd.DataFrame((TextProcessingAppDelay_sfn_f1_logs['End'] - TextProcessingAppDelay_sfn_f1_logs['Entered'])*1000, columns=['Duration'])
TextProcessingAppDelay_duration_f2 = pd.DataFrame((TextProcessingAppDelay_sfn_f2_logs['End'] - TextProcessingAppDelay_sfn_f2_logs['Entered'])*1000, columns=['Duration'])
TextProcessingAppDelay_duration_f3 = pd.DataFrame((TextProcessingAppDelay_sfn_f3_logs['End'] - TextProcessingAppDelay_sfn_f3_logs['Entered'])*1000, columns=['Duration'])
TextProcessingAppDelay_duration_f4 = pd.DataFrame((TextProcessingAppDelay_sfn_f4_logs['End'] - TextProcessingAppDelay_sfn_f4_logs['Entered'])*1000, columns=['Duration'])
TextProcessingAppDelay_duration_f1.to_csv('TextProcessingAppDelay_duration_f1.csv', index=True)
TextProcessingAppDelay_duration_f1 = pd.read_csv('TextProcessingAppDelay_duration_f1.csv', low_memory=False, index_col=0)
TextProcessingAppDelay_duration_f2.to_csv('TextProcessingAppDelay_duration_f2.csv', index=True)
TextProcessingAppDelay_duration_f2 = pd.read_csv('TextProcessingAppDelay_duration_f2.csv', low_memory=False, index_col=0)
TextProcessingAppDelay_duration_f3.to_csv('TextProcessingAppDelay_duration_f3.csv', index=True)
TextProcessingAppDelay_duration_f3 = pd.read_csv('TextProcessingAppDelay_duration_f3.csv', low_memory=False, index_col=0)
TextProcessingAppDelay_duration_f4.to_csv('TextProcessingAppDelay_duration_f4.csv', index=True)
TextProcessingAppDelay_duration_f4 = pd.read_csv('TextProcessingAppDelay_duration_f4.csv', low_memory=False, index_col=0)

In [163]:
TextProcessingAppDelay_scheduling_overhead_f1 = pd.DataFrame((TextProcessingAppDelay_sfn_f1_logs['Start'] - TextProcessingAppDelay_sfn_f1_logs['Scheduled'])*1000, columns=['scheduling_overhead'])
TextProcessingAppDelay_scheduling_overhead_f2 = pd.DataFrame((TextProcessingAppDelay_sfn_f2_logs['Start'] - TextProcessingAppDelay_sfn_f2_logs['Scheduled'])*1000, columns=['scheduling_overhead'])
TextProcessingAppDelay_scheduling_overhead_f3 = pd.DataFrame((TextProcessingAppDelay_sfn_f3_logs['Start'] - TextProcessingAppDelay_sfn_f3_logs['Scheduled'])*1000, columns=['scheduling_overhead'])
TextProcessingAppDelay_scheduling_overhead_f4 = pd.DataFrame((TextProcessingAppDelay_sfn_f4_logs['Start'] - TextProcessingAppDelay_sfn_f4_logs['Scheduled'])*1000, columns=['scheduling_overhead'])
TextProcessingAppDelay_scheduling_overhead_f1.to_csv('TextProcessingAppDelay_scheduling_overhead_f1.csv', index=True)
TextProcessingAppDelay_scheduling_overhead_f1 = pd.read_csv('TextProcessingAppDelay_scheduling_overhead_f1.csv', low_memory=False, index_col=0)
TextProcessingAppDelay_scheduling_overhead_f2.to_csv('TextProcessingAppDelay_scheduling_overhead_f2.csv', index=True)
TextProcessingAppDelay_scheduling_overhead_f2 = pd.read_csv('TextProcessingAppDelay_scheduling_overhead_f2.csv', low_memory=False, index_col=0)
TextProcessingAppDelay_scheduling_overhead_f3.to_csv('TextProcessingAppDelay_scheduling_overhead_f3.csv', index=True)
TextProcessingAppDelay_scheduling_overhead_f3 = pd.read_csv('TextProcessingAppDelay_scheduling_overhead_f3.csv', low_memory=False, index_col=0)
TextProcessingAppDelay_scheduling_overhead_f4.to_csv('TextProcessingAppDelay_scheduling_overhead_f4.csv', index=True)
TextProcessingAppDelay_scheduling_overhead_f4 = pd.read_csv('TextProcessingAppDelay_scheduling_overhead_f4.csv', low_memory=False, index_col=0)

In [164]:
print('Number of Executions: ', len(TextProcessingAppDelay_duration['Duration']))
TextProcessingAppDelay_avg_duration_aws = np.mean(TextProcessingAppDelay_duration['Duration'])
TextProcessingAppDelay_mid_duration_aws = np.median(TextProcessingAppDelay_duration['Duration'])
print('Average Duration Reported by AWS: ', TextProcessingAppDelay_avg_duration_aws, 'ms')
print('Median Duration Reported by AWS: ', TextProcessingAppDelay_mid_duration_aws, 'ms')
print('Standard Deviation of Duration Reported by AWS: ', np.std(TextProcessingAppDelay_duration['Duration']), 'ms')

Number of Executions:  2000
Average Duration Reported by AWS:  445.60450077056885 ms
Median Duration Reported by AWS:  417.9999828338623 ms
Standard Deviation of Duration Reported by AWS:  98.17248028092828 ms


In [227]:
(TextProcessingAppDelay_duration['Duration'] - (TextProcessingAppDelay_duration_f1['Duration'] + TextProcessingAppDelay_duration_f2['Duration'] + TextProcessingAppDelay_duration_f3['Duration'] + TextProcessingAppDelay_duration_f4['Duration'])).rename('scheduling_overhead').to_frame().to_csv("Scheduling_Overhead.csv", index=False)

## Response time reported by Lambda

In [170]:
TextProcessingAppDelay_duration_lambda_f1['Duration'] + TextProcessingAppDelay_duration_lambda_f2['Duration'] + TextProcessingAppDelay_duration_lambda_f3['Duration'] + TextProcessingAppDelay_duration_lambda_f4['Duration']

0       198.51
1       246.66
2       229.12
3       218.18
4       224.14
         ...  
1995    192.17
1996    221.82
1997    208.70
1998    191.14
1999    207.27
Name: Duration, Length: 2000, dtype: float64

## Delay

## f1

In [174]:
print('mean:', (TextProcessingAppDelay_duration_f1.squeeze() - TextProcessingAppDelay_duration_lambda_f1['Duration']).mean())
print('median:', (TextProcessingAppDelay_duration_f1.squeeze() - TextProcessingAppDelay_duration_lambda_f1['Duration']).median())
print('avg:', ((TextProcessingAppDelay_sfn_f1_logs['End'] - TextProcessingAppDelay_sfn_f1_logs['Entered'])*1000 - TextProcessingAppDelay_duration_lambda_f1['Duration']).mean())
print('median:', ((TextProcessingAppDelay_sfn_f1_logs['End'] - TextProcessingAppDelay_sfn_f1_logs['Entered'])*1000 - TextProcessingAppDelay_duration_lambda_f1['Duration']).median())

mean: 82.07447948215486
median: 78.07497615814208
avg: 82.07447948215486
median: 78.07497615814208


## f2

In [175]:
print('mean:', (TextProcessingAppDelay_duration_f2.squeeze() - TextProcessingAppDelay_duration_lambda_f2['Duration']).mean())
print('median:', (TextProcessingAppDelay_duration_f2.squeeze() - TextProcessingAppDelay_duration_lambda_f2['Duration']).median())
print('avg:', ((TextProcessingAppDelay_sfn_f2_logs['End'] - TextProcessingAppDelay_sfn_f2_logs['Scheduled'])*1000 - TextProcessingAppDelay_duration_lambda_f2['Duration']).mean())
print('median:', ((TextProcessingAppDelay_sfn_f2_logs['End'] - TextProcessingAppDelay_sfn_f2_logs['Scheduled'])*1000 - TextProcessingAppDelay_duration_lambda_f2['Duration']).median())

mean: 40.93450525558471
median: 35.87504577636719
avg: 22.734504969482426
median: 19.789992370605468


## f3

In [176]:
print('mean:', (TextProcessingAppDelay_duration_f3.squeeze() - TextProcessingAppDelay_duration_lambda_f3['Duration']).mean())
print('median:', (TextProcessingAppDelay_duration_f3.squeeze() - TextProcessingAppDelay_duration_lambda_f3['Duration']).median())
print('avg:', ((TextProcessingAppDelay_sfn_f3_logs['End'] - TextProcessingAppDelay_sfn_f3_logs['Scheduled'])*1000 - TextProcessingAppDelay_duration_lambda_f3['Duration']).mean())
print('median:', ((TextProcessingAppDelay_sfn_f3_logs['End'] - TextProcessingAppDelay_sfn_f3_logs['Scheduled'])*1000 - TextProcessingAppDelay_duration_lambda_f3['Duration']).median())

mean: 36.513272511978144
median: 31.705084877014155
avg: 28.131273019332887
median: 23.824944686889644


## f4

In [177]:
print('mean:', (TextProcessingAppDelay_duration_f4.squeeze() - TextProcessingAppDelay_duration_lambda_f4['Duration']).mean())
print('median:', (TextProcessingAppDelay_duration_f4.squeeze() - TextProcessingAppDelay_duration_lambda_f4['Duration']).median())
print('avg:', ((TextProcessingAppDelay_sfn_f4_logs['End'] - TextProcessingAppDelay_sfn_f4_logs['Scheduled'])*1000 - TextProcessingAppDelay_duration_lambda_f4['Duration']).mean())
print('median:', ((TextProcessingAppDelay_sfn_f4_logs['End'] - TextProcessingAppDelay_sfn_f4_logs['Scheduled'])*1000 - TextProcessingAppDelay_duration_lambda_f4['Duration']).median())

mean: 41.69121134838104
median: 35.97003528594971
avg: 25.71121252140045
median: 22.490039100646975


* Save the Function Execution Delay Data

In [178]:
f1_delay = TextProcessingAppDelay_duration_f1.squeeze() - TextProcessingAppDelay_duration_lambda_f1['Duration']
f2_delay = TextProcessingAppDelay_duration_f2.squeeze() - TextProcessingAppDelay_duration_lambda_f2['Duration']
f3_delay = TextProcessingAppDelay_duration_f3.squeeze() - TextProcessingAppDelay_duration_lambda_f3['Duration']
f4_delay = TextProcessingAppDelay_duration_f4.squeeze() - TextProcessingAppDelay_duration_lambda_f4['Duration']

In [191]:
function_execution_delay = f1_delay.append(f2_delay).append(f3_delay).append(f4_delay)

In [192]:
function_execution_delay.to_csv("Function_Execution_Delay.csv", index=False)