In [None]:
import os
import pandas as pd
from datetime import datetime

In [None]:
def process_timestamp_file(results_dir):
    timestamp_df = pd.read_csv(os.path.join(results_dir, 'timestamps.csv'))
    independent_variable = timestamp_df.columns[0]
    independent_variable_values = list(timestamp_df[independent_variable])

    # Create time intervals
    timestamp_df['Start time'] = timestamp_df['Start time'].apply(datetime.fromtimestamp)
    timestamp_df['End time'] = timestamp_df['End time'].apply(datetime.fromtimestamp)
    intervals = pd.IntervalIndex.from_arrays(timestamp_df['Start time'], timestamp_df['End time'])

    return independent_variable, independent_variable_values, intervals

def read_metric_file(results_dir, file_name):
    metric = os.path.splitext(file_name)[0]
    file_path = os.path.join(results_dir, file_name)
    if os.stat(file_path).st_size == 0:
        print(file_path, 'is empty. Skipping...')
        return None
    metric_df = pd.read_json(file_path)
    metric_df.columns = ['Timestamp', metric]

    # Convert units of metric columns
    if 'cpu'in metric:
        metric_df[metric] = metric_df[metric] * 100 # Convert to %
    elif 'network' in metric:
        metric_df[metric] = metric_df[metric] / 1024 # Convert B/s to KiB/s
    elif 'memory' in metric:
        metric_df[metric] = metric_df[metric] / 1048576 # Convert B to MiB
    return metric_df

def read_experiment(results_dir, skip_memory=True):
    # Process timestamps file
    independent_variable, independent_variable_values, timestamp_intervals = process_timestamp_file(results_dir)
    
    df = None
    for file_name in os.listdir(results_dir):
        if file_name != 'timestamps.csv' and (not 'memory' in file_name or not skip_memory):
            metric_df = read_metric_file(results_dir, file_name)
            if metric_df is None:
                continue

            if df is None:
                df = metric_df
            else:
                df = df.merge(metric_df)
    
    # Assign metric samples to their corresponding timestamp intervals they were collected during
    df['Timestamp'] = df['Timestamp'].apply(datetime.fromtimestamp)
    df['Time interval'] = pd.cut(df['Timestamp'], timestamp_intervals)
    df = df.dropna()

    means_by_interval_df = df.groupby('Time interval').mean(numeric_only=True)
    means_by_interval_df[independent_variable] = independent_variable_values
    means_by_interval_df = means_by_interval_df.set_index(independent_variable)

    return means_by_interval_df

def combine_experiments(dfs, experiment_names):
    metric_names = list(dfs[0].columns)

    combined_df = None
    for df, experiment_name in zip (dfs, experiment_names):
        df.columns = [f'{experiment_name}:{col}' for col in df.columns]
        if combined_df is None:
            combined_df = df
        else:
            combined_df = combined_df.merge(df, left_index=True, right_index=True)
    return combined_df, metric_names

def plot_combined_experiments(combined_df, experiment_names, metric_names, kind='bar'):
    for metric_name in metric_names:
        column_names = [f'{experiment_name}:{metric_name}' for experiment_name in experiment_names]
        ax = combined_df.plot(y=column_names, kind=kind)
        if len(experiment_names) > 1:
            ax.legend(experiment_names)
        else:
            ax.get_legend().remove()

        title = "Average " if 'avg' in metric_name else "Maximum " if 'max' in metric_name else ""
        title += "Kepler" if 'kepler' in metric_name else "Prometheus" if 'prometheus' in metric_name else ""
        title += " CPU" if 'cpu' in metric_name else " Network" if 'network' in metric_name else " Memory" if 'memory' in metric_name else ""
        title += " Overhead"
        title += " (transmitted)" if 'transmit' in metric_name else " (received)" if 'receive' in metric_name else ""
        ax.set_title(title)

        if 'cpu' in metric_name:
            ax.set_ylabel('%CPU Utilization')
        elif 'network-transmit' in metric_name:
            ax.set_ylabel('KiB/s Transmitted')
        elif 'network-receive' in metric_name:
            ax.set_ylabel('KiB/s Received')
        elif 'memory' in metric_name:
            ax.set_ylabel('MiB')

def plot_experiments(result_directories, experiment_names, skip_memory=True, kind='bar'):
    if type(result_directories) is not list and type(experiment_names) is not list:
        result_directories = [result_directories]
        experiment_names = [experiment_names]
    if len(result_directories) != len(experiment_names):
        print("Error: len(result_directories) != len(experiment_names). You must provide an experiment name for each result directory passed")
        return None
    dfs = []
    for result_directory in result_directories:
        experiment_df = read_experiment(result_directory, skip_memory=skip_memory)
        dfs.append(experiment_df)
    combined_df, metric_names = combine_experiments(dfs, experiment_names)
    plot_combined_experiments(combined_df, experiment_names, metric_names, kind=kind)

In [None]:
# By default these directories will be saved in: _output/results/
directory1 =
exeriment_name1 =

directory2 =
exeriment_name2 =

In [None]:
# Plot a single experiment
result_directories = directory1
experiment_names = exeriment_name1
plot_experiments(result_directories, experiment_names)

In [None]:
# Plot multiple experiments on the same plots
result_directories = [directory1, directory2]
experiment_names = [exeriment_name1, exeriment_name2]
plot_experiments(result_directories, experiment_names)