In [None]:
import os
import subprocess
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn import show_versions
plt.rcParams['figure.dpi'] = 100

In [None]:
RESULTS_FILE_PATH = "benchmarks/results/results.csv"
df = pd.read_csv(RESULTS_FILE_PATH)

In [None]:
commit = str(subprocess.check_output(['git', 'rev-parse', 'HEAD'])).replace("b'", "").replace("\\n'", "")[:10]

In [None]:
cols = ["n_samples_train", "n_samples_test", "n_features", "n_neighbors"]
df[cols] = df[cols].astype(np.uint32)

In [None]:
df_grouped = df.groupby(["n_samples_train", "n_samples_test", "n_features", "n_neighbors"])

In [None]:
y_labels = {
    "time_elapsed": "Time elapsed (in s)",
    "throughput": "Thoughput (in GB/s)"
}

In [None]:
for vals, df in df_grouped:
    fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(20, 10))
    axes = axes.flatten()
    for col, ax in zip(["time_elapsed", "throughput"], axes):
        _ = sns.barplot(x="chunk_info", y=col, hue="implementation", data=df, ax=ax)
        _ = ax.set_ylabel(y_labels[col])
        _ = ax.set_xlabel("Chunk size (number of vectors), Number of X_train chunks")
        _ = ax.tick_params(labelrotation=45)
    title = f"NearestNeighbors@{commit} - Euclidean Distance, dtype=np.float64, "
    title += f"{df.trial.max() + 1} trials, OMP_NUM_THREADS={os.getenv('OMP_NUM_THREADS')}, OPENBLAS_NUM_THREADS={os.getenv('OPENBLAS_NUM_THREADS')}, MKL_NUM_THREADS={os.getenv('MKL_NUM_THREADS')}\n"
    title += "n_samples_train=%s - n_samples_test=%s - n_features=%s - n_neighbors=%s" % vals
    _ = fig.suptitle(title, fontsize=16)

### Machine specifications

In [None]:
! cat /proc/version

In [None]:
! lscpu

In [None]:
! gcc -v

In [None]:
! env

### Environment specifications

In [None]:
show_versions()

In [None]:
! conda list

---

In [None]:
! date