# Profiling Results Accuracy

This file compares the accuracy of the following profiling results:

* Baseline: exhaustive profiling (159 resource profiles, based on the complete AWS profiles range in 64 MB steps)
* Linear model (linear interpolation between an evenly distributed selection of 5% of the exhaustive profiling results)
* Bayesian Optimization

In [1]:
import os

# Workaround for using the Jupyter container from VS Code Jupyter extension.
if not os.path.exists('./profiling-results'):
    os.chdir('./chunk-func/jupyter')

In [2]:
from dataclasses import dataclass
import pandas as pd
import yaml

# Definitions of our column names.
col_profile = 'profile'
col_exec_time = 'execTime'
col_cost = 'cost'
col_profiled = 'profiled'

@dataclass
class FunctionProfilingResults:
    input_sizes: list[int]
    '''All input sizes for this function in ascending order.'''
    
    results: dict[int, pd.DataFrame]
    '''The results indexed by input size.'''


def extract_input_sizes(func_desc) -> list[int]:
    ret: list[int] = []
    typical_inputs = func_desc['spec']['functionDescription']['typicalInputs']
    for input in typical_inputs:
        ret.append(input['sizeBytes'])
    return ret

def add_result(profile_result, dest_results_per_input: dict[int, list[dict[str, object]]]):
    profile = profile_result['resourceProfileId']
    results = profile_result.get('results')
    
    if results is not None:
        for input_result in results:
            results_list = dest_results_per_input[input_result['inputSizeBytes']]
            results_list.append({ 
                col_profile: profile, 
                col_exec_time: input_result['executionTimeMs'], 
                col_cost: float(input_result['executionCost']),
                col_profiled: input_result.get('resultType', 'Profiled') == 'Profiled',
            })


def load_result(path: str) -> FunctionProfilingResults:
    '''
    Reads a FunctionDescription YAML file and returns a dictionary of the profiling results,
    indexed by input size.
    '''
    with open(path, 'r') as file:
        func_desc = yaml.safe_load(file)
    input_sizes = extract_input_sizes(func_desc)
    results_per_input = { 
        input_size: [] for input_size in input_sizes 
    }

    all_results = func_desc['status']['profilingResults']['results']
    for profile_result in all_results:
        add_result(profile_result, results_per_input)

    data_frames: dict[int, pd.DataFrame] = {}
    for input_size, results in results_per_input.items():
        df = pd.DataFrame(data=results, columns=[col_profile, col_exec_time, col_cost, col_profiled])
        df = df.set_index(col_profile)
        data_frames[input_size] = df
    return FunctionProfilingResults(input_sizes=input_sizes, results=data_frames)


def load_results(functions: list[str], profiling_type: str) -> dict[str, FunctionProfilingResults]:
    '''
    Loads the results for all specified functions. profiling_type may be 'aws', 'gcf', or 'bo'.
    '''
    ret: dict[str, dict[int, pd.DataFrame]] = {
        func: load_result(f'./profiling-results/{profiling_type}/{func}.yaml') for func in functions
    }
    return ret


In [3]:
all_functions = [
    # LogPro
    'validate-log',
    'extract-basic-stats',
    'extract-successes',
    'extract-success-stats',

    # VidPro
    'validate-video',   
    'cut-video',
    'merge-videos',

    # FaceDet
    'validate-video-face-recog',
    'transform-video',
    'detect-faces',
    'mark-faces',
]

aws_results = load_results(all_functions, 'aws')
exhaustive_results = aws_results

In [4]:
bo_hyperparams = 'poi=0.02-minSamples=0.1'

bo_xi_values = [ '0.01', '0.05', '0.1', '0.5', '1.0', '1.5', '2.0', '2.5', '3.0' ]
output_folder = './output/aws-bo'

def get_bo_results_dir(bo_xi: str) -> str:
    return f'bo/{bo_hyperparams}/xi={bo_xi}'

In [5]:
import matplotlib.pyplot as plt

def matplot_exhaustive_bo_comparison(exhaustive_result: pd.DataFrame, bo_result: pd.DataFrame):
    fig, ax = plt.subplots()
    exhaustive_result[[col_exec_time]].plot(ax=ax, label='Exhaustive')
    bo_result[[col_exec_time]].plot(ax=ax, label='BO')
    ax.legend()

In [6]:
import seaborn as sns

sns.set_theme()

def seaplot_exhaustive_bo_comparison(exhaustive_result: pd.DataFrame, bo_result: pd.DataFrame, title: str):
    exhaustive_result = exhaustive_result[[col_exec_time]].rename(columns={col_exec_time: 'Exhaustive'})
    bo_result_renamed = bo_result[[col_exec_time]].rename(columns={col_exec_time: 'BO'})
    joined = exhaustive_result.join(bo_result_renamed)
    melted = joined.reset_index().melt(id_vars=[col_profile], var_name='type', value_name=col_exec_time)
    g: sns.FacetGrid = sns.relplot(
        data=melted,
        kind='line',
        x=col_profile,
        y=col_exec_time,
        hue='type',
        facet_kws=dict(sharex=True),
    )
    g.set_axis_labels('Profile', 'Execution Time (ms)')
    g.set_xticklabels(step=10, rotation=45)
    g.ax.set_title(title)

    # Mark the samples that BO decided to profile (the others were inferred)
    bo_profiled = bo_result[bo_result[col_profiled] == True].copy()
    bo_profiled.loc[:, 'type'] = 'BO'
    sns.scatterplot(
        ax=g.ax,
        data=bo_profiled,
        x=col_profile,
        y=col_exec_time,
        hue='type',
        style='type',
        legend=False,
        palette=[(0, 0, 0)],  #[sns.color_palette('flare')[0]],
        markers=['X'],
        zorder=10
    )

In [7]:
from sklearn.metrics import root_mean_squared_error

def normalized_rmse(exhaustive_result: pd.DataFrame, predicted_result: pd.DataFrame) -> float:
    '''Computes the normalized RMSE for a single input size.'''
    y_exhaustive_df: pd.DataFrame = exhaustive_result[[col_exec_time]]
    y_pred_df: pd.DataFrame = predicted_result[[col_exec_time]]

    # Due to errors for certain profiles in the exhaustive runs, the two DataFrames might not contain exactly the same profiles,
    # e.g., the profiling for 256mib went into timeout, while the predicted results inferred a value for this profile.
    # We want to compare only those profiles which are present in both DataFrames.
    merged = y_exhaustive_df.merge(y_pred_df, how='inner', on=[col_profile], suffixes=('_exhaustive', '_predicted'))
    y_exhaustive = merged[f'{col_exec_time}_exhaustive']
    y_pred = merged[f'{col_exec_time}_predicted']

    rmse = root_mean_squared_error(y_exhaustive, y_pred)
    y_ex_mean = y_exhaustive.mean()
    return rmse / y_ex_mean


def mean_normalized_rmse(exhaustive_results: FunctionProfilingResults, predicted_results: FunctionProfilingResults) -> float:
    '''Computes the normalized RMSE for all input sizes and returns the mean average.'''
    rmse_sum = 0.0
    for input_size in exhaustive_results.input_sizes:
        exhaustive = exhaustive_results.results[input_size]
        predicted = predicted_results.results[input_size]
        rmse = normalized_rmse(exhaustive_result=exhaustive, predicted_result=predicted)
        rmse_sum += rmse
    
    mean_rmse = rmse_sum / float(len(exhaustive_results.input_sizes))
    return mean_rmse
    

def mean_normalized_rmse_for_all(bo_xi: str, exhaustive_results: dict[str, FunctionProfilingResults]) -> pd.Series:
    predicted_results = load_results(all_functions, get_bo_results_dir(bo_xi))
    rmses: dict[str, float] = {}
    for key, exhaustive_result in exhaustive_results.items():
        predicted_result = predicted_results[key]
        rmses[key] = mean_normalized_rmse(exhaustive_results=exhaustive_result, predicted_results=predicted_result)
    return pd.Series(rmses)

In [8]:
# bo_xi='0.01'
# bo_results = load_results(all_functions, f'bo/poi=0.02-minSamples=0.1/xi={bo_xi}')

# print('xi = ', bo_xi)

# for key, exhaustive_result in exhaustive_results.items():
#     bo_result = bo_results[key]
#     # exhaustive_largest_input_result = exhaustive_result.results[exhaustive_result.input_sizes[-1]]
#     # bo_largest_input_result = bo_result.results[bo_result.input_sizes[-1]]
#     # seaplot_exhaustive_bo_comparison(exhaustive_largest_input_result, bo_largest_input_result, key)
#     # rmse = normalized_rmse(exhaustive_largest_input_result, bo_largest_input_result)
#     rmse = mean_normalized_rmse(exhaustive_results=exhaustive_result, predicted_results=bo_result)
#     print(key, 'RMSE =', rmse)

In [9]:
df_rmse = pd.DataFrame(0.0, exhaustive_results.keys(), bo_xi_values)

for bo_xi in bo_xi_values:
    rmses = mean_normalized_rmse_for_all(bo_xi, exhaustive_results)
    df_rmse[bo_xi] = rmses

display(df_rmse)
df_rmse.to_csv(f'{output_folder}/bo-mean-rmse-{bo_hyperparams}.csv')

Unnamed: 0,0.01,0.05,0.1,0.5,1.0,1.5,2.0,2.5,3.0
validate-log,0.105166,0.106051,0.099868,0.099864,0.101862,0.102448,0.101842,0.102841,0.102314
extract-basic-stats,0.033351,0.02764,0.030661,0.030764,0.030811,0.030596,0.031755,0.03131,0.031367
extract-successes,0.0656,0.065637,0.076864,0.070812,0.074912,0.069596,0.0757,0.076138,0.074701
extract-success-stats,0.055046,0.052493,0.055912,0.063086,0.055645,0.057413,0.064096,0.061017,0.064382
validate-video,0.223326,0.22388,0.240435,0.234738,0.240961,0.223222,0.219121,0.233723,0.240863
cut-video,0.153354,0.121151,0.163987,0.145784,0.130357,0.12162,0.123166,0.131395,0.123336
merge-videos,0.070553,0.058043,0.062447,0.048692,0.064652,0.059227,0.061704,0.069395,0.076705
validate-video-face-recog,0.187594,0.163918,0.168884,0.177631,0.203967,0.204345,0.203782,0.205785,0.204363
transform-video,0.1692,0.165738,0.170036,0.17283,0.168188,0.169615,0.164923,0.16223,0.163394
detect-faces,0.127132,0.138157,0.121491,0.139804,0.114572,0.121764,0.138161,0.112609,0.127609
