In [40]:
from asboostreg import SparseAdditiveBoostingRegressor
from pmlb import fetch_data
from sklearn.model_selection import KFold
import numpy as np

from utils import median_score

In [49]:
from math import fsum

In [41]:
n_estimators = 320
params = {
    "n_estimators": n_estimators,
    "learning_rate": 0.3,
    "row_subsample": 0.7,
    "max_bins": 512,
    "l2_regularization": 0.6,
    "min_samples_leaf": 8,
    "max_leaves": 32,
    "redundancy_exponent": 1.0,
    "random_state": 0,
}

In [54]:
lst = [
    .72, 1., .16, 1.08, 1.00
]
fsum(lst) / len(lst)

0.792

In [42]:
datos_column = [
    "562_cpu_small",
    "197_cpu_act",
    "227_cpu_small",
    "564_fried",
    "201_pol"
]

In [43]:
best_scores = [
    0.75, 0.78, 0.75, 0.66, 0.4
]

In [44]:
scores = np.empty(5, dtype=np.float64)
for dataset, base_score in zip(datos_column, best_scores):
    print(f"{dataset}: {base_score:.2f}")
    X, y = fetch_data(dataset, return_X_y=True)
    X = X[:, X.std(axis=0) > 0]
    model = SparseAdditiveBoostingRegressor(**params)
    cv = KFold()
    for j, (train_index, test_index) in enumerate(cv.split(X)):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        model.fit(X_train, y_train, validation_set=(X_test, y_test))
        y_pred = model.predict(X_test)
        scores[j] = median_score(y_test, y_pred)
    score = np.mean(scores)
    print(f"Score: {score:.2f}")
    print(f"Ratio of default to best: {score/base_score:.2f}")
    print()

562_cpu_small: 0.75
Score: 0.74
Ratio of default to best: 0.98

197_cpu_act: 0.78
Score: 0.76
Ratio of default to best: 0.98

227_cpu_small: 0.75
Score: 0.74
Ratio of default to best: 0.98

564_fried: 0.66
Score: 0.61
Ratio of default to best: 0.92

201_pol: 0.40
Score: 0.38
Ratio of default to best: 0.95

