In [None]:
import subprocess
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
plt.rcParams['figure.figsize'] = [12, 8]
plt.rcParams['figure.dpi'] = 100 # 200 e.g. is really fine, but slower

In [None]:
commit = str(subprocess.check_output(['git', 'rev-parse', 'HEAD'])).replace("b'", "").replace("\\n'", "")

In [None]:
df = pd.read_csv("benchmarks/results_10_trials_7b884.csv")
df.sort_values('throughput', ascending=False)

In [None]:
cols = ["n_samples_train", "n_samples_test", "n_features", "working_memory"]
df[cols] = df[cols].astype(np.uint32)

In [None]:
df_grouped = df.groupby(["n_samples_train", "n_samples_test", "n_features", "n_neighbors"])

In [None]:
y_labels = {
    "time_elapsed": "Time elapsed (in s)",
    "throughput": "Thoughput (in GB/s)"
}

In [None]:
for vals, df in df_grouped:
    fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(20, 10))
    axes = axes.flatten()
    for col, ax in zip(["time_elapsed", "throughput"], axes):
        _ = sns.barplot(x="working_memory", y=col, hue="implementation", data=df, ax=ax)
        _ = ax.set_ylabel(y_labels[col])
        _ = ax.set_xlabel("Total working memory (in bytes)")
    title = f"NearestNeighbors@{commit}" + " - Euclidean Distance, dtype=np.float64, 10 trials \n n_samples_train=%s - n_samples_test=%s - n_features=%s - n_neighbors=%s" % vals
    _ = fig.suptitle(title, fontsize=16)
    fig.savefig(title.replace(" - ","_").replace("\n","_").lower() + ".png")