# Profiling Results Accuracy

This file compares the accuracy of the following profiling results:

* Baseline: exhaustive profiling (159 resource profiles, based on the complete AWS profiles range in 64 MB steps)
* Linear model (linear interpolation between an evenly distributed selection of 5% of the exhaustive profiling results)
* Bayesian Optimization

In [1]:
import os

# Workaround for using the Jupyter container from VS Code Jupyter extension.
if not os.path.exists('./profiling-results'):
    os.chdir('./chunk-func/jupyter')

In [2]:
from dataclasses import dataclass
import math
import pandas as pd
import yaml

# Definitions of our column names.
col_profile = 'profile'
col_exec_time = 'execTime'
col_cost = 'cost'
col_profiled = 'profiled'

@dataclass
class FunctionProfilingResults:
    input_sizes: list[int]
    '''All input sizes for this function in ascending order.'''
    
    results: dict[int, pd.DataFrame]
    '''The results indexed by input size.'''

class FunctionProfilingResultsMap(dict[str, FunctionProfilingResults]):
    '''FunctionProfilingResults indexed by function name'''
    # Type aliases do not seem to be supported in notebooks, so we inherit from dict.
    pass

def extract_input_sizes(func_desc) -> list[int]:
    ret: list[int] = []
    typical_inputs = func_desc['spec']['functionDescription']['typicalInputs']
    for input in typical_inputs:
        ret.append(input['sizeBytes'])
    return ret

def add_result(profile_result, dest_results_per_input: dict[int, list[dict[str, object]]]):
    profile = profile_result['resourceProfileId']
    results = profile_result.get('results')
    
    if results is not None:
        for input_result in results:
            results_list = dest_results_per_input[input_result['inputSizeBytes']]
            results_list.append({ 
                col_profile: profile, 
                col_exec_time: input_result['executionTimeMs'], 
                col_cost: float(input_result['executionCost']),
                col_profiled: input_result.get('resultType', 'Profiled') == 'Profiled',
            })


def load_result(path: str) -> FunctionProfilingResults:
    '''
    Reads a FunctionDescription YAML file and returns a dictionary of the profiling results,
    indexed by input size.
    '''
    with open(path, 'r') as file:
        func_desc = yaml.safe_load(file)
    input_sizes = extract_input_sizes(func_desc)
    results_per_input = { 
        input_size: [] for input_size in input_sizes 
    }

    all_results = func_desc['status']['profilingResults']['results']
    for profile_result in all_results:
        add_result(profile_result, results_per_input)

    data_frames: dict[int, pd.DataFrame] = {}
    for input_size, results in results_per_input.items():
        df = pd.DataFrame(data=results, columns=[col_profile, col_exec_time, col_cost, col_profiled])
        df = df.set_index(col_profile)
        data_frames[input_size] = df
    return FunctionProfilingResults(input_sizes=input_sizes, results=data_frames)


def load_results(functions: list[str], profiling_type: str) -> FunctionProfilingResultsMap:
    '''
    Loads the results for all specified functions. profiling_type may be 'aws', 'gcf', or 'bo'.
    '''
    ret = FunctionProfilingResultsMap()
    for func in functions:
        ret[func] = load_result(f'./profiling-results/{profiling_type}/{func}.yaml')
    return ret


In [3]:
all_functions = [
    # LogPro
    'validate-log',
    'extract-basic-stats',
    'extract-successes',
    'extract-success-stats',

    # VidPro
    'validate-video',   
    'cut-video',
    'merge-videos',

    # FaceDet
    'validate-video-face-recog',
    'transform-video',
    'detect-faces',
    'mark-faces',
]

aws_results = load_results(all_functions, 'aws')
exhaustive_results = aws_results

In [4]:
bo_hyperparams = 'poi=0.02-minSamples=0.1'

bo_xi_values = [ '0.01', '0.05', '0.1', '0.5', '1.0', '1.5', '2.0', '2.5', '3.0' ]
output_folder = './output/aws-bo'

def get_bo_results_dir(bo_xi: str) -> str:
    return f'bo/{bo_hyperparams}/xi={bo_xi}'

In [5]:
import matplotlib.pyplot as plt

def matplot_exhaustive_bo_comparison(exhaustive_result: pd.DataFrame, bo_result: pd.DataFrame):
    fig, ax = plt.subplots()
    exhaustive_result[[col_exec_time]].plot(ax=ax, label='Exhaustive')
    bo_result[[col_exec_time]].plot(ax=ax, label='BO')
    ax.legend()

In [6]:
import seaborn as sns

sns.set_theme()

def seaplot_exhaustive_bo_comparison(exhaustive_result: pd.DataFrame, bo_result: pd.DataFrame, title: str):
    exhaustive_result = exhaustive_result[[col_exec_time]].rename(columns={col_exec_time: 'Exhaustive'})
    bo_result_renamed = bo_result[[col_exec_time]].rename(columns={col_exec_time: 'BO'})
    joined = exhaustive_result.join(bo_result_renamed)
    melted = joined.reset_index().melt(id_vars=[col_profile], var_name='type', value_name=col_exec_time)
    g: sns.FacetGrid = sns.relplot(
        data=melted,
        kind='line',
        x=col_profile,
        y=col_exec_time,
        hue='type',
        facet_kws=dict(sharex=True),
    )
    g.set_axis_labels('Profile', 'Execution Time (ms)')
    g.set_xticklabels(step=10, rotation=45)
    g.ax.set_title(title)

    # Mark the samples that BO decided to profile (the others were inferred)
    bo_profiled = bo_result[bo_result[col_profiled] == True].copy()
    bo_profiled.loc[:, 'type'] = 'BO'
    sns.scatterplot(
        ax=g.ax,
        data=bo_profiled,
        x=col_profile,
        y=col_exec_time,
        hue='type',
        style='type',
        legend=False,
        palette=[(0, 0, 0)],  #[sns.color_palette('flare')[0]],
        markers=['X'],
        zorder=10
    )

In [7]:
# RMSE calculation for single functions.

from sklearn.metrics import root_mean_squared_error

def normalized_rmse(exhaustive_result: pd.DataFrame, predicted_result: pd.DataFrame) -> float:
    '''Computes the normalized RMSE for a single input size.'''
    y_exhaustive_df: pd.DataFrame = exhaustive_result[[col_exec_time]]
    y_pred_df: pd.DataFrame = predicted_result[[col_exec_time]]

    # Due to errors for certain profiles in the exhaustive runs, the two DataFrames might not contain exactly the same profiles,
    # e.g., the profiling for 256mib went into timeout, while the predicted results inferred a value for this profile.
    # We want to compare only those profiles which are present in both DataFrames.
    merged = y_exhaustive_df.merge(y_pred_df, how='inner', on=[col_profile], suffixes=('_exhaustive', '_predicted'))
    y_exhaustive = merged[f'{col_exec_time}_exhaustive']
    y_pred = merged[f'{col_exec_time}_predicted']

    rmse = root_mean_squared_error(y_exhaustive, y_pred)
    y_ex_mean = y_exhaustive.mean()
    return rmse / y_ex_mean


def mean_normalized_rmse(exhaustive_results: FunctionProfilingResults, predicted_results: FunctionProfilingResults) -> float:
    '''Computes the normalized RMSE for all input sizes and returns the mean average.'''
    rmse_sum = 0.0
    for input_size in exhaustive_results.input_sizes: # [exhaustive_results.input_sizes[-3]]:
        exhaustive = exhaustive_results.results[input_size]
        predicted = predicted_results.results[input_size]
        rmse = normalized_rmse(exhaustive_result=exhaustive, predicted_result=predicted)
        rmse_sum += rmse
    
    mean_rmse = rmse_sum / float(len(exhaustive_results.input_sizes))
    return mean_rmse

In [8]:
# Mean RMSE across all input sizes for all functions.

def mean_normalized_rmse_for_all_functions(bo_xi: str, exhaustive_results: FunctionProfilingResultsMap) -> pd.Series:
    '''Computes the mean RMSE across all input sizes for all functions.'''
    predicted_results = load_results(all_functions, get_bo_results_dir(bo_xi))
    rmses: dict[str, float] = {}
    for key, exhaustive_result in exhaustive_results.items():
        predicted_result = predicted_results[key]
        rmses[key] = mean_normalized_rmse(exhaustive_results=exhaustive_result, predicted_results=predicted_result)
    return pd.Series(rmses)

In [9]:
## Compute RMSE for largest input size and plot results.

# bo_xi='0.01'
# bo_results = load_results(all_functions, f'bo/poi=0.02-minSamples=0.1/xi={bo_xi}')

# print('xi = ', bo_xi)

# for key, exhaustive_result in exhaustive_results.items():
#     bo_result = bo_results[key]
#     # exhaustive_largest_input_result = exhaustive_result.results[exhaustive_result.input_sizes[-1]]
#     # bo_largest_input_result = bo_result.results[bo_result.input_sizes[-1]]
#     # seaplot_exhaustive_bo_comparison(exhaustive_largest_input_result, bo_largest_input_result, key)
#     # rmse = normalized_rmse(exhaustive_largest_input_result, bo_largest_input_result)
#     rmse = mean_normalized_rmse(exhaustive_results=exhaustive_result, predicted_results=bo_result)
#     print(key, 'RMSE =', rmse)

In [10]:
## Compute mean RMSE across all input sizes.

# df_rmse = pd.DataFrame(0.0, exhaustive_results.keys(), bo_xi_values)

# for bo_xi in bo_xi_values:
#     rmses = mean_normalized_rmse_for_all_functions(bo_xi, exhaustive_results)
#     df_rmse[bo_xi] = rmses

# display(df_rmse)
# df_rmse.to_csv(f'{output_folder}/bo-mean-rmse-{bo_hyperparams}.csv')

In [11]:
# Calculation of distinct RMSE values for each input size, for all BO xi hyperparameter values.

class RmseByInputSizeMap(dict[int, float]):
    pass

@dataclass
class RmseAndBoXiPair:
    rmse: float
    bo_xi: str

@dataclass
class RmseInfo(RmseAndBoXiPair):
    function: str
    input_size: int


def load_all_bo_results() -> dict[str, FunctionProfilingResultsMap]:
    '''Loads all BO results indexed by bo_xi strings'''
    all_bo_results: dict[str, FunctionProfilingResultsMap] = {}
    for bo_xi in bo_xi_values:
        bo_results = load_results(all_functions, get_bo_results_dir(bo_xi))
        all_bo_results[bo_xi] = bo_results
    return all_bo_results


def normalized_rmse_for_all_input_sizes(exhaustive_results: FunctionProfilingResults, predicted_results: FunctionProfilingResults) -> RmseByInputSizeMap:
    '''Returns a dict of normalized RMSEs for every input size, indexed by the input size'''
    rmses = RmseByInputSizeMap()
    for input_size in exhaustive_results.input_sizes:
        exhaustive = exhaustive_results.results[input_size]
        predicted = predicted_results.results[input_size]
        rmse = normalized_rmse(exhaustive_result=exhaustive, predicted_result=predicted)
        rmses[input_size] = rmse
    return rmses


def normalized_rmse_for_all_bo_xis(fn_name: str, exhaustive_results: FunctionProfilingResults, all_bo_results: dict[str, FunctionProfilingResultsMap]) -> dict[str, RmseByInputSizeMap]:
    '''Calculates all normalized RMSEs for every input size for all BO results and returns them indexed by bo_xi value'''
    bo_rmses: dict[str, RmseByInputSizeMap] = {}
    for bo_xi in bo_xi_values:
        predicted_results = all_bo_results[bo_xi][fn_name]
        rmses = normalized_rmse_for_all_input_sizes(exhaustive_results=exhaustive_results, predicted_results=predicted_results)
        bo_rmses[bo_xi] = rmses
    return bo_rmses


In [12]:
# Sorted list of RMSEs grouped by input sizes.
    
def convert_to_list(fn_name: str, rmses_by_xi: dict[str, RmseByInputSizeMap]) -> list[RmseInfo]:
    ret: list[RmseInfo] = []
    for bo_xi, rmses_by_input in rmses_by_xi.items():
        for input_size, rmse in rmses_by_input.items():
            info = RmseInfo(rmse=rmse, bo_xi=bo_xi, function=fn_name, input_size=input_size)
            ret.append(info)
    return ret

def rmses_for_all_functions(exhaustive_results: FunctionProfilingResultsMap) -> list[RmseInfo]:
    all_bo_results = load_all_bo_results()
    rmses: list[RmseInfo] = []
    for fn_name, fn_exhaustive_results in exhaustive_results.items():
        rmses_by_xi = normalized_rmse_for_all_bo_xis(fn_name, fn_exhaustive_results, all_bo_results)
        fn_rmses = convert_to_list(fn_name, rmses_by_xi)
        rmses += fn_rmses
    return rmses


def sorted_rmses_for_all_functions(exhaustive_results: FunctionProfilingResultsMap) -> pd.DataFrame:
    rmses = rmses_for_all_functions(exhaustive_results)
    df = pd.DataFrame(rmses)
    df.sort_values(by=['function', 'input_size', 'rmse'], inplace=True)
    return df


In [13]:
# Min RMSE for each BO xi value.

def find_min_rmse(fn_name: str, exhaustive_results: FunctionProfilingResults, all_bo_results: dict[str, FunctionProfilingResultsMap]) -> dict[int, RmseAndBoXiPair]:
    '''Finds the min RMSE and associated bo_xi value for every input size and returns them indexed by input size'''
    min_rmses: dict[int, RmseAndBoXiPair] = {}
    bo_rmses = normalized_rmse_for_all_bo_xis(fn_name, exhaustive_results, all_bo_results)

    for input_size in exhaustive_results.input_sizes:
        min_rmse = math.inf
        min_rmse_bo_xi: str = None
        for bo_xi, rmses in bo_rmses.items():
            rmse = rmses[input_size]
            if rmse < min_rmse:
                min_rmse = rmse
                min_rmse_bo_xi = bo_xi
        min_rmses[input_size] = RmseAndBoXiPair(rmse=min_rmse, bo_xi=min_rmse_bo_xi)
    
    return min_rmses


def find_min_rmses_for_all_functions(exhaustive_results: FunctionProfilingResultsMap) -> dict[str, dict[int, RmseAndBoXiPair]]:
    all_bo_results = load_all_bo_results()
    all_min_rmses: dict[str, dict[int, RmseAndBoXiPair]] = {}
    for fn_name, fn_exhaustive_results in exhaustive_results.items():
        fn_min_rmse = find_min_rmse(fn_name, fn_exhaustive_results, all_bo_results)
        all_min_rmses[fn_name] = fn_min_rmse
        print('Min RMSE for ', fn_name)
        display(fn_min_rmse)
    return all_min_rmses

In [14]:
## Find the bo_xi values that have the minimum RMSE for each input size of a function.
# find_min_rmses_for_all_functions(exhaustive_results)

## Find all RMSEs and print them in a sorted table.
df_rmses = sorted_rmses_for_all_functions(exhaustive_results)
df_rmses.to_csv(f'{output_folder}/bo-rmses-{bo_hyperparams}.csv')
display(df_rmses)

Unnamed: 0,rmse,bo_xi,function,input_size
135,0.187651,0.01,cut-video,8450897
144,0.187651,0.05,cut-video,8450897
153,0.187651,0.1,cut-video,8450897
162,0.187651,0.5,cut-video,8450897
171,0.187651,1.0,cut-video,8450897
...,...,...,...,...
296,0.121326,1.5,validate-video-face-recog,1620000
314,0.121509,3.0,validate-video-face-recog,1620000
302,0.121623,2.0,validate-video-face-recog,1620000
308,0.122406,2.5,validate-video-face-recog,1620000
