<a href="https://colab.research.google.com/github/lmastalerz/sutainability/blob/main/FaaS_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import math 

pd.set_option('display.float_format', lambda x: '%.3f' % x)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_columns', 50)

In [None]:
# How many minutes Function App is cached after execution before resourtces 
# are deallocated 
FUN_TTL_MIN = 10

# This is the percentage of CPU that I'm assumming function consumes when it's running
# This value doesn't have massive impact on final results in the most extreme case 
# average contribution across deployment types is around 15% (comparing results with 
# value 0 to result swith value 100)
ACTIVE_FUNCTION_CPU_PCT = 50

# Datasets for invocations and duration don't seem to match ideally
# This is the amount of data across datasets that's allowed to be missing 
MAX_MISSING_PCT = 2

Source data files downloaded from here: https://azurecloudpublicdataset2.blob.core.windows.net/azurepublicdatasetv2/azurefunctions_dataset2019/azurefunctions-dataset2019.tar.xz


In [None]:
# Function execution duration in miliseconds 
# Details of file structure here: 
# https://github.com/Azure/AzurePublicDataset/blob/master/AzureFunctionsDataset2019.md#function-execution-duration
dur_1 = pd.read_csv('/content/function_durations_percentiles.anon.d02.csv')
dur_1.head()

Unnamed: 0,HashOwner,HashApp,HashFunction,Average,Count,Minimum,Maximum,percentile_Average_0,percentile_Average_1,percentile_Average_25,percentile_Average_50,percentile_Average_75,percentile_Average_99,percentile_Average_100
0,188aa0effba52f3801b7ae0331267195e144dc4304c0ab33693bf4690b8bb790,786aab6a114579a1d20a7d3c43f67959f17fe480461c36818a8e52c4dccafd77,2b373145c4fa2c3447bb2a19be20d14120b76238b61c9eb9ac92f86821da3e6a,79,20859838,36.0,99838.0,59,70,75,77,82,116,99838
1,bd80116a680283c92364a00dbcce89b92b8cb8543a39e2f958b339cb2f850f6b,f7339562a59677cd37ca76eac05727dda2c1f98544090734f15fcfe739ef7373,76e3b8292113c2c8ff70ae66716618eab54569abec510b69461dbf92ab05b130,426,23568,46.0,143633.0,155,244,273,287,307,4475,71988
2,bd80116a680283c92364a00dbcce89b92b8cb8543a39e2f958b339cb2f850f6b,f7339562a59677cd37ca76eac05727dda2c1f98544090734f15fcfe739ef7373,f76a4b45211cb764c292d6c469b5e9e16c2a8400ef773d2eaff0acf533f551bd,245,11508,0.0,2081.0,15,205,225,234,249,454,1586
3,bd80116a680283c92364a00dbcce89b92b8cb8543a39e2f958b339cb2f850f6b,f7339562a59677cd37ca76eac05727dda2c1f98544090734f15fcfe739ef7373,85aeddfb6d84a57fe2a289c40b87f5238a11d6bf127fd48e4217e14b77a37725,737,11711,290.0,5996.0,390,450,598,661,753,2090,5205
4,09a2382ad009092b5c6d8a4eacaf33d915495fd841f6bfcdb7f204cbe4ab6b2b,31d8d2ace1ab9d734259b8ba90bf5cba0f0f5021d7109fe0f8a6ced8526fd2f8,100ebb205516778c9ce1a7f098ee05de297ca2179efa337370741afc619bf44e,3,288,0.0,130.0,0,0,0,0,0,74,130


In [None]:
# Grouping durations by application - this is how resources are managed
# Taking 50th percentile per function and then (rather conservatively) 
# max across all functions within given app
# There has been multiple functions per app so need to group them by HashApp
dur_1.rename(columns={'percentile_Average_50':'duration_ms'}, inplace = True)
app_dur_1 = dur_1[['HashApp', 'Count', 'duration_ms']].groupby('HashApp').sum()
app_dur_1.head()

Unnamed: 0_level_0,Count,duration_ms
HashApp,Unnamed: 1_level_1,Unnamed: 2_level_1
0002577c18765bc3c1be0077ec0dfde45d5cdf532d547b8d8ae68494b21cdecb,1,13506
000481d975e1672df56b11adc7957d13836870d14a1a5e0ab82a6970284b736a,6,2749
000b058a70dedf75e66071d3fe6f8701e31fed235942f4ae734e5d7737a3707f,513,6810
0016a719d18f743a52142f3b07efd6c17a034fa261e8f445d07da08a0b4bac72,79,2362
0019d10aa8d3ef382c31a7b35d88b487d937d08dcdbdcd3867386807a4f8a8b0,183,127


In [None]:
# Number of function invocations 
# Details of file structure here:
# https://github.com/Azure/AzurePublicDataset/blob/master/AzureFunctionsDataset2019.md#function-invocation-counts
inv_1 = pd.read_csv('/content/invocations_per_function_md.anon.d02.csv')
inv_1.head()

Unnamed: 0,HashOwner,HashApp,HashFunction,Trigger,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,...,1416,1417,1418,1419,1420,1421,1422,1423,1424,1425,1426,1427,1428,1429,1430,1431,1432,1433,1434,1435,1436,1437,1438,1439,1440
0,71ca12c7af70d021e285b51b245942f8432df6463ff9f2c009b06b3f661f871f,7ca324d9fc836a5d4562811c11ce3719530ee919dd1fb91bcaf71942eab8240a,520dbd6bd906840012aa0c4b778743efc7c0ac7b7caf96b3d7f85d46209b7872,http,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,...,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0
1,71ca12c7af70d021e285b51b245942f8432df6463ff9f2c009b06b3f661f871f,0d0ac65651f54ae3285a59564d64e39238b516fa1d5b565582032986e780b634,115ca7a2b5bc290052c3da74cd0347d19c3c67b7d5aa66e9a975e427f25fc7ed,http,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,...,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0
2,71ca12c7af70d021e285b51b245942f8432df6463ff9f2c009b06b3f661f871f,a04487a6ba1e14296eb7647e4963180d28bef7a90a8fc5b3fbb894b8800418f3,93e6c664773bbec3a7f50a0e92fa7e97401a802dc6eed86ae062344eb0cb7c2e,orchestration,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,71ca12c7af70d021e285b51b245942f8432df6463ff9f2c009b06b3f661f871f,a04487a6ba1e14296eb7647e4963180d28bef7a90a8fc5b3fbb894b8800418f3,740c5c767e4b9978ee59a97d1829cfbaf755a47806a3114f0d4c182bb5a7e253,http,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,71ca12c7af70d021e285b51b245942f8432df6463ff9f2c009b06b3f661f871f,a04487a6ba1e14296eb7647e4963180d28bef7a90a8fc5b3fbb894b8800418f3,c108b4864b866b38b80d0e4594cc6d038f39668b804a1ba88d2b95d682a8ab20,http,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [None]:
# Grouping executions by application - this is how resources are managed
# New structure represents number of execution within one-minute buckets 
# for all functions running as a part of given App
# Automatically dropping all other columns like hash ID's and Trigger Type
app_inv_1 = inv_1.groupby('HashApp').sum()
app_inv_1.head()

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,...,1416,1417,1418,1419,1420,1421,1422,1423,1424,1425,1426,1427,1428,1429,1430,1431,1432,1433,1434,1435,1436,1437,1438,1439,1440
HashApp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1
0002577c18765bc3c1be0077ec0dfde45d5cdf532d547b8d8ae68494b21cdecb,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
000481d975e1672df56b11adc7957d13836870d14a1a5e0ab82a6970284b736a,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
000b058a70dedf75e66071d3fe6f8701e31fed235942f4ae734e5d7737a3707f,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,...,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0
0016a719d18f743a52142f3b07efd6c17a034fa261e8f445d07da08a0b4bac72,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
0019d10aa8d3ef382c31a7b35d88b487d937d08dcdbdcd3867386807a4f8a8b0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0,1,0,0,...,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0


In [None]:
# Joining data about invocations with data about duration 
app_1 = pd.merge(app_inv_1, app_dur_1, on='HashApp', how='outer')
app_1

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,...,1418,1419,1420,1421,1422,1423,1424,1425,1426,1427,1428,1429,1430,1431,1432,1433,1434,1435,1436,1437,1438,1439,1440,Count,duration_ms
HashApp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1
0002577c18765bc3c1be0077ec0dfde45d5cdf532d547b8d8ae68494b21cdecb,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,...,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,13506.000
000481d975e1672df56b11adc7957d13836870d14a1a5e0ab82a6970284b736a,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,...,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,6.000,2749.000
000b058a70dedf75e66071d3fe6f8701e31fed235942f4ae734e5d7737a3707f,1.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,...,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,513.000,6810.000
0016a719d18f743a52142f3b07efd6c17a034fa261e8f445d07da08a0b4bac72,0.000,0.000,0.000,1.000,0.000,0.000,1.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,...,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,79.000,2362.000
0019d10aa8d3ef382c31a7b35d88b487d937d08dcdbdcd3867386807a4f8a8b0,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,1.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,1.000,0.000,0.000,...,0.000,0.000,0.000,0.000,0.000,0.000,1.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,183.000,127.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ef9e327769593a422d6803a7e5234fa7fa0bab938c0b80d5b80251bcb94341d1,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,1.000,0.000
f5e356242d74f5762be1b654782eb19131879600d0e48242b00f6e08923e9edb,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,1.000,0.000
f8e78d55773d361ee6a9b0b1cb91591d5e005ec050c75beb2da21d1cd20819cb,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,1.000,15.000
fc432a617a4b0145a0c0a81209278671fea9988a5ea8961ce9facc2577c5d30a,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,1.000,1653.000


In [None]:
# Validate after joining to make sure not too much data is missing 
# Some observations have stats about duration but don't have stats about invocations 
# and vice versa - these are to be dropped unless they are above threshold in which 
# case execution is aborted 
no_inv_idx = np.isnan(app_1['1'])
no_dur_idx = np.isnan(app_1['duration_ms'])

missing_dur_pct = len(app_1[no_inv_idx]) / len(app_1.index) * 100 
missing_inv_pct = len(app_1[no_dur_idx]) / len(app_1.index) * 100

print('Missing durations: {:.2f}%'.format(missing_dur_pct))
print('Missing invocations: {:.2f}%'.format(missing_inv_pct))

# Both have to be below the threshold 
assert (missing_dur_pct <= MAX_MISSING_PCT) & (missing_inv_pct <= MAX_MISSING_PCT)

print('Total rows before cleanup: {}. Dropping rows with missing durations or invocations.' \
  .format(len(app_1.index)))
app_1.drop(app_1[no_dur_idx].index, inplace = True)
app_1.drop(app_1[no_inv_idx].index, inplace = True)
print('Total rows after cleanup: {}.'.format(len(app_1.index)))

# Make sure there are no more NA's in data 
assert not app_1.isnull().any().any()

Missing durations: 0.66%
Missing invocations: 0.25%
Total rows before cleanup: 17901. Dropping rows with missing durations or invocations.
Total rows after cleanup: 17738.




In [None]:
# Now I'm taking two different paths
# For all the applications where functions take less than minute to execute 
# (vast majority) I'll assume that execution results in resources being allocated 
# for one minute plus pre-defined TTL. This is a conservative assumption, but with 
# data resulution of 1 minute it would be difficult to get more granular than this. 
# For these functions I'll construct detailed map of when resources were allocated 
# and when they were free. 
# Remaining longer functions to be handled separately 
short_app_idx_1 = app_1['duration_ms'] <= ( 60 * 1000 ) 
short_app_1 = app_1[short_app_idx_1]
long_app_1 = app_1[~short_app_idx_1]

print('Total apps: {}, apps with functions shorter than minute: {}, apps with functions longer than minute {}' \
      .format( len(app_1.index), len(short_app_1.index), len(long_app_1.index) )  ) 

Total apps: 17738, apps with functions shorter than minute: 16272, apps with functions longer than minute 1466


##Short Functions 

In [None]:
# For short functions I'm creating a matrix that represents when resources are allocated. 
# If given funtion is executed at point in time t1 then resources are allocated for 
# next FUN_TTL_MIN minutes plus one minute (max duration of function execution)
# If another function kicks in during this time couning starts from beginning 
# Aggregating with max() but it doesn't really matter - it's a question of having 
# a value vs. not having anything (=0)
short_app_alloc_martix_1 = short_app_1.iloc[:, :1440].rolling( \
                            window = FUN_TTL_MIN + 1, axis = 1, min_periods = 1).max()
# Converting to boolean. 
# True - resources are allocated in given one-minute bucket 
# False - resourtces are not allocated 
short_app_alloc_idx_bool_1 = (short_app_alloc_martix_1 > 0)
# Counting allocated buckets per app
short_app_alloc_1 = pd.DataFrame(short_app_alloc_martix_1[short_app_alloc_idx_bool_1].count(axis = 1), \
                                 columns = ['alloc_mins'])
# Calculating percentage of time resources are allocated per app 
short_app_alloc_1['alloc_pct'] = short_app_alloc_1['alloc_mins'] / 1440 * 100 

# Calculating total duration of execution for all the functions alomng with 
# percentage of total time 
# Ignoring anything above 100% - not enough data to calculate scalability impact 
short_app_alloc_1['exec_duration_mins'] = short_app_1['Count'] * short_app_1['duration_ms'] / 1000 / 60 
short_app_alloc_1['exec_duration_pct'] = np.where( short_app_alloc_1['exec_duration_mins'] / 1440 * 100 < 100, \
                                                   short_app_alloc_1['exec_duration_mins'] / 1440 * 100, 100) 

short_app_alloc_1

Unnamed: 0_level_0,alloc_mins,alloc_pct,exec_duration_mins,exec_duration_pct
HashApp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0002577c18765bc3c1be0077ec0dfde45d5cdf532d547b8d8ae68494b21cdecb,11,0.764,0.225,0.016
000481d975e1672df56b11adc7957d13836870d14a1a5e0ab82a6970284b736a,66,4.583,0.275,0.019
000b058a70dedf75e66071d3fe6f8701e31fed235942f4ae734e5d7737a3707f,1440,100.000,58.226,4.043
0016a719d18f743a52142f3b07efd6c17a034fa261e8f445d07da08a0b4bac72,248,17.222,3.110,0.216
0019d10aa8d3ef382c31a7b35d88b487d937d08dcdbdcd3867386807a4f8a8b0,1122,77.917,0.387,0.027
...,...,...,...,...
ffdfbb69c91ec3bbc7f9ce02cce783d45d620f4aa050c6e93cf1b7646b0bade8,65,4.514,0.009,0.001
ffe6a32b346421ffd5c745b5206091e692d03a4b71d8dc34aae997ff0e5522c6,520,36.111,2744.999,100.000
fff4c266c6912dd6dfe2cb32fc6bc42e47a65438e9e9d845514b1e5486509843,1435,99.653,0.557,0.039
fff66690a846223ff9c261d9ab918f0c7a8b029756425dce4a2ce4c36236fe7c,979,67.986,39.396,2.736


## Long functions 

In [None]:
# For short functions for the sake of simplicity I'm assumming that they keep resources 
# allocated all the time. 
# This is a conservative approach and results in having allocation reported as higher 
# than in reality, but the impact on overall results would be well below 10% 
# and calculation with variable-sized windows would be difficult to do given the dataset 
long_app_alloc_1 = pd.DataFrame(long_app_1.index, columns = ['HashApp'])
long_app_alloc_1['alloc_mins'] = 1440
long_app_alloc_1['alloc_pct'] = 100
    
long_app_alloc_1.set_index('HashApp', inplace = True)

# Calculating total duration of execution for all the functions alomng with 
# percentage of total time 
# Ignoring anything above 100% - not enough data to calculate scalability impact 
long_app_alloc_1['exec_duration_mins'] = long_app_1['Count'] * long_app_1['duration_ms'] / 1000 / 60 
long_app_alloc_1['exec_duration_pct'] = np.where( long_app_alloc_1['exec_duration_mins'] / 1440 * 100 < 100, \
                                                  long_app_alloc_1['exec_duration_mins'] / 1440 * 100, 100) 
long_app_alloc_1

Unnamed: 0_level_0,alloc_mins,alloc_pct,exec_duration_mins,exec_duration_pct
HashApp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
009f72f7ae1bc39581c27a9bc53c34ef59d6694bdd09147181251c3ea43caea7,1440,100,2155553.219,100.000
00eaa5b2be78b495d9c0a7f4d43aa5922d317a26605c2c061fe91bd77ce6905f,1440,100,5.001,0.347
00eee305d4552de040e3409b7e4cc1b40e5d886e96dd3c39848c5c4b705a6600,1440,100,2157.360,100.000
01598ea73c90b1d78d06a4763865e0d529976e82d29f0d1501cd2c4bcb76954c,1440,100,5247796.446,100.000
01b98005fe3af6939268d65223109a99b73dfb8a4cb84d132a55cc04ea56953c,1440,100,1.025,0.071
...,...,...,...,...
ff43f8c249c8a39aa05016e9274d46c4e4939dfb7607a215dc31094e0752e25d,1440,100,2780.401,100.000
ff5252ca443b3704f1b180298a2492f5c76c02436ab2a71ce7181f47d95d9209,1440,100,1055.589,73.305
ff89f894e5d45bc686b367f52412e4939bfdc18197f55dc12ebff3c022db3ff3,1440,100,4361906.711,100.000
ffa92cd52bb6c851feb35c843e359fd73220269b1a3ecb6a254595ffc5c74d93,1440,100,279.769,19.428


In [None]:
# Joining both datasets back for further analysis 
app_alloc_1 = pd.concat([short_app_alloc_1, long_app_alloc_1])

# Energy usage

## Hardware
Energy usage of an average CPU core.
This data comes from [Cloud Carbon Footprint](https://www.cloudcarbonfootprint.org/docs/methodology) methodology page, which in turn follows [Etsy's Cloud Jewels](https://codeascraft.com/2020/04/23/cloud-jewels-estimating-kwh-in-the-cloud/)


In [None]:
AVG_MIN_WATTS = 0.78
AVG_MAX_WATTS = 3.76

## FaaS energy usage

In [None]:
# Energy consumption of workload deployed on FaaS
# It consists of enegry consumed when resources are allocated but not used:
app_alloc_1['idle_kwh'] = app_alloc_1['alloc_mins'] / 60 * AVG_MIN_WATTS / 1000

# .. and enegry consumed when resources are actually used 
# first part used to calculate number of hours within a day when function is running
# second part represent enegry consumption assumming function is running at given CPU percentage 
app_alloc_1['active_kwh'] = app_alloc_1['exec_duration_pct']/100 * 1440 / 60 * \
  (AVG_MAX_WATTS - AVG_MIN_WATTS) * ACTIVE_FUNCTION_CPU_PCT/100 \
  / 1000 # to convert to kW

results_faas = pd.DataFrame(app_alloc_1[['idle_kwh', 'active_kwh']].sum())

##VM enegry usage

In [None]:
vm_alloc = pd.DataFrame(app_alloc_1.index)
vm_alloc.set_index('HashApp', inplace = True)

no_of_cpu = None
no_of_servers = None

# Configuration #1
# One app deployed on one-CPU VM running all the time 
no_of_cpu = 1
no_of_servers = 1
vm_alloc['conf_1_idle_kwh'] = AVG_MIN_WATTS/1000 * 24 * no_of_cpu * no_of_servers
# Active consumption is the same as for serverless 
vm_alloc['conf_1_active_kwh'] = app_alloc_1['active_kwh']

# Configuration #2
# One app deployed on two-CPU VM running all the time 
no_of_cpu = 2
no_of_servers = 1
vm_alloc['conf_2_idle_kwh'] = AVG_MIN_WATTS/1000 * 24 * no_of_cpu * no_of_servers
# Active consumption is the same as for serverless 
vm_alloc['conf_2_active_kwh'] = app_alloc_1['active_kwh']

# Configuration #3
# One app deployed on four-CPU VM running all the time 
no_of_cpu = 4
no_of_servers = 1
vm_alloc['conf_3_idle_kwh'] = AVG_MIN_WATTS/1000 * 24 * no_of_cpu * no_of_servers
# Active consumption is the same as for serverless 
vm_alloc['conf_3_active_kwh'] = app_alloc_1['active_kwh']

# Configuration #4
# One app deployed on four-CPU VM running all the time plus warm standby 
no_of_cpu = 4
no_of_servers = 2
vm_alloc['conf_4_idle_kwh'] = AVG_MIN_WATTS/1000 * 24 * no_of_cpu * no_of_servers
# Active consumption is the same as for serverless 
vm_alloc['conf_4_active_kwh'] = app_alloc_1['active_kwh']

results_vm = pd.DataFrame(vm_alloc.sum())


In [None]:
results = pd.concat([results_faas, results_vm])
results

Unnamed: 0,0
idle_kwh,189.117
active_kwh,67.441
conf_1_idle_kwh,332.055
conf_1_active_kwh,67.441
conf_2_idle_kwh,664.111
conf_2_active_kwh,67.441
conf_3_idle_kwh,1328.221
conf_3_active_kwh,67.441
conf_4_idle_kwh,2656.443
conf_4_active_kwh,67.441
