In [None]:
from sklearn_benchmarks.reporting.hpo import HPOReporting
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

# Gradient boosting: randomized HPO benchmarks

In [None]:
reporting_hpo = HPOReporting(config="config.yml")
reporting_hpo.run()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn_benchmarks.utils.misc import find_nearest

In [None]:
fit_times_for_max_scores = []

In [None]:
# Smoothed curves

plt.figure(figsize=(15, 10))

for hpo_result in reporting_hpo.data:
    idx_max_score = np.argmax(hpo_result.grid_scores, axis=0)
    fit_time_for_max_score = hpo_result.mean_grid_times[idx_max_score]
    fit_times_for_max_scores.append(fit_time_for_max_score)
    plt.plot(
        hpo_result.mean_grid_times,
        hpo_result.grid_scores,
        c=f"tab:{hpo_result.color}",
        label=hpo_result.legend
    )

min_fit_time_all_constant = min(fit_times_for_max_scores)
plt.xlim(right=min_fit_time_all_constant)
plt.xlabel("Cumulated fit times in s")
plt.ylabel("Validation scores")
plt.legend()
plt.show()

In [None]:
# Speedup barplots

thresholds = [0.74, 0.76, 0.78]
_, axes = plt.subplots(len(thresholds), figsize=(12, 20))

base_hpo_result = list(filter(lambda result: result.lib == 'sklearn', reporting_hpo.data))[0]


for ax, threshold in zip(axes, thresholds):
    base_scores = base_hpo_result.scores
    base_fit_times = base_hpo_result.fit_times
    
    base_idx_closest, _ = find_nearest(base_scores, threshold)
    base_time = base_fit_times.iloc[base_idx_closest]

    df_threshold = pd.DataFrame(columns=["speedup", "legend", "color"])
    for hpo_result in reporting_hpo.data:
        idx_closest, _ = find_nearest(hpo_result.scores, threshold)
        lib_time = hpo_result.fit_times.iloc[idx_closest]
        speedup = base_time / lib_time
        row = dict(
            speedup=speedup,
            legend=hpo_result.legend,
            color=hpo_result.color
        )
        df_threshold = df_threshold.append(row, ignore_index=True)
        
    ax.bar(x=df_threshold["legend"], height=df_threshold["speedup"], width=0.3, color=df_threshold["color"])
    ax.set_xlabel("Library")
    ax.set_ylabel(f"Speedup")
    ax.set_title(f"At score {threshold}")

plt.tight_layout()
plt.show()