In [None]:
import pickle
import pandas as pd
import seaborn as sns

In [None]:
# read ../benchmarks_results.pickle
with open('../benchmark_results2.pickle', 'rb') as f:
    results = pickle.load(f)

In [None]:
results

In [None]:
# Step 1: Flatten the nested dictionary
flattened_data = []
for model_name, evaluator_data in results.items():
    for evaluator_name, time_taken in evaluator_data.items():
        for time_value in time_taken:
            flattened_data.append((model_name, evaluator_name, time_value))

# Step 2: Convert the flattened data into a pandas DataFrame
df = pd.DataFrame(flattened_data, columns=["Model", "Evaluator", "TimeTaken"])
df.head()

In [None]:
# For each evaluator, create a boxplot with Seaborn
sns.boxplot(x="Evaluator", y="TimeTaken", data=df)

In [None]:
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 6))
sns.boxplot(data=df, x="Model", y="TimeTaken", hue="Evaluator")
plt.title("Boxplot of Models vs. TimeTaken for different Evaluators")
plt.show()

In [None]:
experiments_dict = {
    "python": 10000,
    "lake": 1000,
    "flu": 250,
}

In [None]:
# Assuming df is your dataframe
unique_evaluators = df["Evaluator"].unique()
palette = sns.color_palette("pastel", len(unique_evaluators))

g = sns.FacetGrid(df, col="Model", col_wrap=3, height=5, sharey=False)
g = g.map_dataframe(sns.boxplot, x="Evaluator", y="TimeTaken", hue="Evaluator", showmeans=True, showfliers=False)
# Set y-axis label
g.set_axis_labels("Evaluator", "Time (in seconds)\n(lower is better)")
# Set subplot titles
for ax, title in zip(g.axes.flat, df["Model"].unique()):
    ax.set_title(f"model = {title}\nexperiments = {experiments_dict[title]}")
# set title
g.fig.suptitle("Total runtime of different models and evaluators", y=1.1)
g.set(ylim=(0, None))
# Add subscript
g.fig.text(0.5, -0.05, "The mean is shown as a green triangle.\n10 replications have been done for each model and evaluator combination. Outliers are not shown.\nPython 3.10.12 | Ubuntu 22.04 via WSL2 | Core i7-12700H | n_processes = 10", ha="center", fontsize=8)

plt.show()

# Save figure as svg
g.savefig("boxplot.svg")

In [None]:
# Calculate the average time taken for each model and evaluator
mean_df = df.groupby(["Model", "Evaluator"]).mean()
mean_df

In [None]:
# Convert to table, with the evaluators as columns
mean_df = mean_df.reset_index().pivot(index="Model", columns="Evaluator", values="TimeTaken")
mean_df

In [None]:
# normalize on SequentialEvaluator
mean_df = mean_df.div(mean_df["SequentialEvaluator"], axis=0)

In [None]:
# Take the inverse, so that higher is better
mean_df = 1 / mean_df

In [None]:
# Revert the column and row order
mean_df = mean_df[["SequentialEvaluator", "MultiprocessingEvaluator", "MPIEvaluator"]]
mean_df = mean_df.reindex(["python", "lake", "flu"])

In [None]:
# Color the table, with green above 1, and red below 1
def color_red_green(val):
    color = 'green' if val > 1 else 'red'
    return 'color: %s' % color

mean_df.style.applymap(color_red_green)

In [None]:
# Round to two decimal places
mean_df = mean_df.round(3)
mean_df

In [None]:
# convert to markdown table
print(mean_df.to_markdown())

In [None]:
# Divide the MultiprocessingEvaluator and MPIEvaluator by 10
mean_df["MultiprocessingEvaluator"] = mean_df["MultiprocessingEvaluator"] / 10
mean_df["MPIEvaluator"] = mean_df["MPIEvaluator"] / 10
print(mean_df.to_markdown())