# OpenML Regression Benchmark 2025 – **Complete Notebook**

This notebook benchmarks a variety of regression models on the `New_OpenML_Suite_2025_regression` collection, logs experiments to **Weights & Biases**, runs hyper‑parameter sweeps, and tracks full data/model lineage.


In [None]:
# ---- Setup ------------------------------------------------------------------
%pip install --upgrade -openml wandb scikit-learn optuna-integration[sklearn] pandas nbformat numpy joblib setuptools

import warnings, uuid, joblib
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import openml
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import cross_validate, KFold, train_test_split
from sklearn.metrics import (
    make_scorer,
    r2_score,
    mean_absolute_error,
    mean_squared_error,
)
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.inspection import permutation_importance

from IPython.display import display
from joblib import Memory
import wandb

# Authenticate W&B (expects env var or local API key)
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mspicecat[0m ([33mspicecat-club[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [None]:
# ---- Configuration ----------------------------------------------------------
SUITE_NAME = "8f0ea660163b436bbd4abd49665c7b1d"  # OpenML-CTR23 - A curated tabular regression benchmarking suite
WANDB_PROJECT = "openml_regression_benchmark_2025"
CV_FOLDS = 5
RANDOM_STATE = 42

scoring = {
    "r2": make_scorer(r2_score),
    "mae": make_scorer(mean_absolute_error, greater_is_better=False),
    "mse": make_scorer(mean_squared_error, greater_is_better=False),
}

In [3]:
# ---- Helper • preprocessing --------------------------------------------------
def make_preprocessing_pipeline(X: pd.DataFrame):
    """Return ColumnTransformer that imputes numeric / categorical and outputs dense arrays."""
    numeric_features = X.select_dtypes(include=["number"]).columns.tolist()
    categorical_features = X.select_dtypes(exclude=["number"]).columns.tolist()

    numeric_transformer = Pipeline(
        steps=[
            ("imputer", SimpleImputer(strategy="median")),
            ("scaler", StandardScaler()),
        ]
    )

    categorical_transformer = Pipeline(
        steps=[
            ("imputer", SimpleImputer(strategy="most_frequent")),
            (
                "encoder",
                OneHotEncoder(handle_unknown="ignore", sparse_output=False, max_categories=20),
            ),
        ]
    )

    preprocessor = ColumnTransformer(
        transformers=[
            ("num", numeric_transformer, numeric_features),
            ("cat", categorical_transformer, categorical_features),
        ],
        sparse_threshold=0.0,  # guarantee dense output
    )
    return preprocessor

In [4]:
# ---- Helper • reproducibility & lineage -------------------------------------
def log_dataset_artifact(run, dataset):
    art = wandb.Artifact(dataset.name.replace(" ", "_"), type="dataset")
    # add reference so artifact links back to OpenML
    if getattr(dataset, "url", None):
        art.add_reference(dataset.url)
    run.use_artifact(art)


def save_model_artifact(run, pipe_fitted, dataset_name, model_name):
    path = (
        f"/tmp/{dataset_name.replace(' ', '_')}_{model_name}_{uuid.uuid4().hex}.joblib"
    )
    joblib.dump(pipe_fitted, path)
    art = wandb.Artifact(f"{dataset_name}-{model_name}", type="model")
    art.add_file(path)
    run.log_artifact(art)

In [5]:
# ---- Helper • W&B plotting ---------------------------------------------------
def log_wandb_regressor_plots(
    pipe_fitted, X_train, X_test, y_train, y_test, model_name
):
    try:
        pre = pipe_fitted.named_steps.get("pre")
        if pre is not None:
            X_train_enc = pre.transform(X_train)
            X_test_enc = pre.transform(X_test)
            if hasattr(X_train_enc, "toarray"):
                X_train_enc = X_train_enc.toarray()
                X_test_enc = X_test_enc.toarray()
            reg = pipe_fitted.named_steps["model"]
        else:
            X_train_enc, X_test_enc = X_train, X_test
            reg = pipe_fitted

        wandb.sklearn.plot_regressor(
            reg, X_train_enc, X_test_enc, y_train, y_test, model_name=model_name
        )
        wandb.sklearn.plot_learning_curve(
            pipe_fitted, X_train_enc, y_train, cv=5, model_name=model_name
        )
    except Exception as exc:
        print(f"[W&B plots skipped] {exc}")

In [6]:
# ---- Helper • permutation importance ----------------------------------------
def log_permutation_importance(pipe_fitted, X_val, y_val, model_name):
    try:
        pre = pipe_fitted.named_steps.get("pre")
        X_val_enc = pre.transform(X_val) if pre else X_val
        if hasattr(X_val_enc, "toarray"):
            X_val_enc = X_val_enc.toarray()
        res = permutation_importance(
            pipe_fitted,
            X_val_enc,
            y_val,
            n_repeats=5,
            random_state=RANDOM_STATE,
            n_jobs=-1,
        )
        wandb.log({f"perm_imp_{model_name}": res.importances_mean})
    except Exception as exc:
        print(f"[perm importance skipped] {exc}")

In [None]:
# ---- Helper • evaluation & W&B logging --------------------------------------
import copy


def evaluate_model(model_name: str, model, X, y):
    memory = Memory(location="/tmp/sk_cache", verbose=0)
    pre = make_preprocessing_pipeline(X)
    pipe = Pipeline(memory=memory, steps=[("pre", pre), ("model", model)])

    cv = KFold(n_splits=CV_FOLDS, shuffle=True, random_state=RANDOM_STATE)
    cv_results = cross_validate(
        pipe, X, y, cv=cv, scoring=scoring, return_train_score=False, n_jobs=-1
    )

    metrics = {
        "r2": np.mean(cv_results["test_r2"]),
        "mae": -np.mean(cv_results["test_mae"]),
        "mse": -np.mean(cv_results["test_mse"]),
    }

    # Hold‑out split for plots & importance
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=RANDOM_STATE
    )
    pipe_fit = Pipeline(
        memory=memory, steps=[("pre", pre), ("model", copy.deepcopy(model))]
    )
    pipe_fit.fit(X_train, y_train)

    # log_wandb_regressor_plots(pipe_fit, X_train, X_test, y_train, y_test, model_name)
    # log_permutation_importance(pipe_fit, X_test, y_test, model_name)

    return metrics, pipe_fit

In [8]:
# ---- Helper • sweep model builder -------------------------------------------
def make_model_from_sweep(cfg):
    if cfg.model == "RandomForest":
        return RandomForestRegressor(
            n_estimators=cfg.rf_n_estimators,
            max_depth=cfg.rf_max_depth,
            random_state=RANDOM_STATE,
        )
    if cfg.model == "MLP":
        return MLPRegressor(
            hidden_layer_sizes=tuple(cfg.mlp_layers),
            learning_rate_init=cfg.mlp_lr,
            max_iter=cfg.mlp_max_iter,
            random_state=RANDOM_STATE,
        )
    raise ValueError(cfg.model)

In [9]:
# ---- Benchmark loop – per‑dataset evaluation --------------------------------
results = []
suite = openml.study.get_suite(SUITE_NAME)
print(f"Evaluating {len(suite.tasks)} tasks from suite '{SUITE_NAME}'")

# Baseline models
baseline_models = {
    "LinearRegression": LinearRegression(),
    "RandomForest": RandomForestRegressor(n_estimators=200, random_state=RANDOM_STATE),
}

for task_id in suite.tasks[:2]:
    task = openml.tasks.get_task(task_id)
    dataset = task.get_dataset()
    X, y, *_ = dataset.get_data(
        target=dataset.default_target_attribute, dataset_format="dataframe"
    )

    print(f"\nDataset: {dataset.name} (task_id={task_id}) | shape={X.shape}")
    for name, model in baseline_models.items():
        run = wandb.init(
            project=WANDB_PROJECT,
            name=f"{dataset.name}-{name}",
            config={"dataset": dataset.name, "task_id": task_id, "model": name},
            reinit="finish_previous",
        )
        # log_dataset_artifact(run, dataset)

        try:
            metrics, pipe_fit = evaluate_model(name, model, X, y)
            wandb.log(metrics)
            # save_model_artifact(run, pipe_fit, dataset.name, name)
            results.append({"dataset": dataset.name, "model": name, **metrics})
        except Exception as exc:
            print(f"⚠️ {name} on {dataset.name} failed: {exc}")
            wandb.log({"error": str(exc)})
        finally:
            run.finish()

Evaluating 35 tasks from suite '8f0ea660163b436bbd4abd49665c7b1d'

Dataset: abalone (task_id=361234) | shape=(4177, 8)


0,1
mae,▁
mape,▁
mse,▁
r2,▁

0,1
mae,1.58555
mape,0.1598
mse,4.90657
r2,0.52571


0,1
mae,▁
mape,▁
mse,▁
r2,▁

0,1
mae,1.5517
mape,0.15493
mse,4.79935
r2,0.53579



Dataset: airfoil_self_noise (task_id=361235) | shape=(1503, 5)


0,1
mae,▁
mape,▁
mse,▁
r2,▁

0,1
mae,3.7542
mape,0.03016
mse,23.36428
r2,0.50514


0,1
mae,▁
mape,▁
mse,▁
r2,▁

0,1
mae,1.29732
mape,0.01047
mse,3.30809
r2,0.93015


In [10]:
# ---- Results summary ---------------------------------------------------------
df_results = pd.DataFrame(results)
display(df_results.head())

Unnamed: 0,dataset,model,r2,mae,mse,mape
0,abalone,LinearRegression,0.525713,1.58555,4.906571,0.1598
1,abalone,RandomForest,0.535787,1.551699,4.799352,0.154928
2,airfoil_self_noise,LinearRegression,0.505137,3.754198,23.364282,0.030157
3,airfoil_self_noise,RandomForest,0.930148,1.297316,3.308089,0.01047


## W&B Sweeps

Optimize hyper‑parameters across **datasets** and **models** with a Bayesian sweep.


In [11]:
# ---- Define and run a W&B sweep ---------------------------------------------
sweep_config = {
    "method": "bayes",
    "metric": {"name": "rmse", "goal": "minimize"},
    "parameters": {
        "dataset_idx": {"values": list(range(len(suite.tasks)))},
        "model": {"values": ["RandomForest", "MLP"]},
        "rf_n_estimators": {"values": [100, 300, 500]},
        "rf_max_depth": {"values": [None, 10, 20]},
        "gb_n_estimators": {"values": [100, 300]},
        "gb_lr": {"values": [0.05, 0.1, 0.2]},
        "mlp_layers": {"values": [(64,), (128, 64)]},
        "mlp_lr": {"values": [0.001, 0.01]},
        "mlp_max_iter": {"values": [200, 400]},
    },
}

sweep_id = wandb.sweep(sweep_config, project=WANDB_PROJECT)


def sweep_train():
    with wandb.init() as run:
        cfg = wandb.config
        task = openml.tasks.get_task(suite.tasks[cfg.dataset_idx])
        ds = task.get_dataset()
        X_sw, y_sw, *_ = ds.get_data(
            target=ds.default_target_attribute, dataset_format="dataframe"
        )
        model = make_model_from_sweep(cfg)
        metrics, _ = evaluate_model(cfg.model, model, X_sw, y_sw)
        wandb.log(metrics)


# ➡️ Uncomment to launch 20 trials locally
wandb.agent(sweep_id, function=sweep_train, count=20)

Create sweep with ID: d1ru4g8r
Sweep URL: https://wandb.ai/spicecat-club/openml_regression_benchmark_2025/sweeps/d1ru4g8r


[34m[1mwandb[0m: Agent Starting Run: f0bxpfuv with config:
[34m[1mwandb[0m: 	dataset_idx: 27
[34m[1mwandb[0m: 	gb_lr: 0.05
[34m[1mwandb[0m: 	gb_n_estimators: 100
[34m[1mwandb[0m: 	mlp_layers: [64]
[34m[1mwandb[0m: 	mlp_lr: 0.001
[34m[1mwandb[0m: 	mlp_max_iter: 200
[34m[1mwandb[0m: 	model: MLP
[34m[1mwandb[0m: 	rf_max_depth: None
[34m[1mwandb[0m: 	rf_n_estimators: 300




0,1
mae,▁
mape,▁
mse,▁
r2,▁

0,1
mae,6510.55463
mape,2.38699
mse,176348532.11954
r2,0.53471


[34m[1mwandb[0m: Agent Starting Run: o3yf9pae with config:
[34m[1mwandb[0m: 	dataset_idx: 21
[34m[1mwandb[0m: 	gb_lr: 0.1
[34m[1mwandb[0m: 	gb_n_estimators: 100
[34m[1mwandb[0m: 	mlp_layers: [64]
[34m[1mwandb[0m: 	mlp_lr: 0.01
[34m[1mwandb[0m: 	mlp_max_iter: 400
[34m[1mwandb[0m: 	model: MLP
[34m[1mwandb[0m: 	rf_max_depth: 20
[34m[1mwandb[0m: 	rf_n_estimators: 100


0,1
mae,▁
mape,▁
mse,▁
r2,▁

0,1
mae,224.73346
mape,0.541
mse,146811.34733
r2,0.29409


[34m[1mwandb[0m: Agent Starting Run: uj6ndq1l with config:
[34m[1mwandb[0m: 	dataset_idx: 6
[34m[1mwandb[0m: 	gb_lr: 0.05
[34m[1mwandb[0m: 	gb_n_estimators: 300
[34m[1mwandb[0m: 	mlp_layers: [64]
[34m[1mwandb[0m: 	mlp_lr: 0.001
[34m[1mwandb[0m: 	mlp_max_iter: 200
[34m[1mwandb[0m: 	model: MLP
[34m[1mwandb[0m: 	rf_max_depth: None
[34m[1mwandb[0m: 	rf_n_estimators: 100




0,1
mae,▁
mape,▁
mse,▁
r2,▁

0,1
mae,12.54087
mape,1.09043
mse,276.5612
r2,0.1796


[34m[1mwandb[0m: Agent Starting Run: hih7usw1 with config:
[34m[1mwandb[0m: 	dataset_idx: 16
[34m[1mwandb[0m: 	gb_lr: 0.05
[34m[1mwandb[0m: 	gb_n_estimators: 100
[34m[1mwandb[0m: 	mlp_layers: [64]
[34m[1mwandb[0m: 	mlp_lr: 0.01
[34m[1mwandb[0m: 	mlp_max_iter: 200
[34m[1mwandb[0m: 	model: MLP
[34m[1mwandb[0m: 	rf_max_depth: None
[34m[1mwandb[0m: 	rf_n_estimators: 500


0,1
mae,▁
mape,▁
mse,▁
r2,▁

0,1
mae,1.98055
mape,356542061653898.8
mse,7.4747
r2,0.97771


[34m[1mwandb[0m: Agent Starting Run: feumdrbz with config:
[34m[1mwandb[0m: 	dataset_idx: 22
[34m[1mwandb[0m: 	gb_lr: 0.2
[34m[1mwandb[0m: 	gb_n_estimators: 100
[34m[1mwandb[0m: 	mlp_layers: [64]
[34m[1mwandb[0m: 	mlp_lr: 0.001
[34m[1mwandb[0m: 	mlp_max_iter: 200
[34m[1mwandb[0m: 	model: MLP
[34m[1mwandb[0m: 	rf_max_depth: 20
[34m[1mwandb[0m: 	rf_n_estimators: 500




0,1
mae,▁
mape,▁
mse,▁
r2,▁

0,1
mae,8.83217
mape,2921369361258947.0
mse,416.34922
r2,0.76814


[34m[1mwandb[0m: Agent Starting Run: 01sk3oad with config:
[34m[1mwandb[0m: 	dataset_idx: 22
[34m[1mwandb[0m: 	gb_lr: 0.1
[34m[1mwandb[0m: 	gb_n_estimators: 100
[34m[1mwandb[0m: 	mlp_layers: [64]
[34m[1mwandb[0m: 	mlp_lr: 0.01
[34m[1mwandb[0m: 	mlp_max_iter: 200
[34m[1mwandb[0m: 	model: MLP
[34m[1mwandb[0m: 	rf_max_depth: 10
[34m[1mwandb[0m: 	rf_n_estimators: 100




0,1
mae,▁
mape,▁
mse,▁
r2,▁

0,1
mae,5.72077
mape,1480457647596168.2
mse,318.79148
r2,0.82716


[34m[1mwandb[0m: Agent Starting Run: mg5ejl7v with config:
[34m[1mwandb[0m: 	dataset_idx: 19
[34m[1mwandb[0m: 	gb_lr: 0.05
[34m[1mwandb[0m: 	gb_n_estimators: 300
[34m[1mwandb[0m: 	mlp_layers: [64]
[34m[1mwandb[0m: 	mlp_lr: 0.001
[34m[1mwandb[0m: 	mlp_max_iter: 400
[34m[1mwandb[0m: 	model: MLP
[34m[1mwandb[0m: 	rf_max_depth: None
[34m[1mwandb[0m: 	rf_n_estimators: 500


0,1
mae,▁
mape,▁
mse,▁
r2,▁

0,1
mae,0.04525
mape,14.63226
mse,0.00324
r2,-1.50484


[34m[1mwandb[0m: Agent Starting Run: brd0hffe with config:
[34m[1mwandb[0m: 	dataset_idx: 33
[34m[1mwandb[0m: 	gb_lr: 0.05
[34m[1mwandb[0m: 	gb_n_estimators: 300
[34m[1mwandb[0m: 	mlp_layers: [64]
[34m[1mwandb[0m: 	mlp_lr: 0.01
[34m[1mwandb[0m: 	mlp_max_iter: 400
[34m[1mwandb[0m: 	model: RandomForest
[34m[1mwandb[0m: 	rf_max_depth: 20
[34m[1mwandb[0m: 	rf_n_estimators: 300


0,1
mae,▁
mape,▁
mse,▁
r2,▁

0,1
mae,1563.7427
mape,0.07395
mse,5180635.6179
r2,0.94473


[34m[1mwandb[0m: Agent Starting Run: c9eb1n0d with config:
[34m[1mwandb[0m: 	dataset_idx: 33
[34m[1mwandb[0m: 	gb_lr: 0.05
[34m[1mwandb[0m: 	gb_n_estimators: 100
[34m[1mwandb[0m: 	mlp_layers: [128, 64]
[34m[1mwandb[0m: 	mlp_lr: 0.001
[34m[1mwandb[0m: 	mlp_max_iter: 400
[34m[1mwandb[0m: 	model: RandomForest
[34m[1mwandb[0m: 	rf_max_depth: 10
[34m[1mwandb[0m: 	rf_n_estimators: 100


0,1
mae,▁
mape,▁
mse,▁
r2,▁

0,1
mae,1542.77881
mape,0.0728
mse,4941433.3304
r2,0.94741


[34m[1mwandb[0m: Agent Starting Run: d6o9380m with config:
[34m[1mwandb[0m: 	dataset_idx: 32
[34m[1mwandb[0m: 	gb_lr: 0.2
[34m[1mwandb[0m: 	gb_n_estimators: 300
[34m[1mwandb[0m: 	mlp_layers: [128, 64]
[34m[1mwandb[0m: 	mlp_lr: 0.001
[34m[1mwandb[0m: 	mlp_max_iter: 200
[34m[1mwandb[0m: 	model: MLP
[34m[1mwandb[0m: 	rf_max_depth: 10
[34m[1mwandb[0m: 	rf_n_estimators: 300




0,1
mae,▁
mape,▁
mse,▁
r2,▁

0,1
mae,0.64795
mape,0.25326
mse,0.83466
r2,0.5978


[34m[1mwandb[0m: Agent Starting Run: r4tc83w6 with config:
[34m[1mwandb[0m: 	dataset_idx: 4
[34m[1mwandb[0m: 	gb_lr: 0.05
[34m[1mwandb[0m: 	gb_n_estimators: 100
[34m[1mwandb[0m: 	mlp_layers: [128, 64]
[34m[1mwandb[0m: 	mlp_lr: 0.001
[34m[1mwandb[0m: 	mlp_max_iter: 200
[34m[1mwandb[0m: 	model: RandomForest
[34m[1mwandb[0m: 	rf_max_depth: None
[34m[1mwandb[0m: 	rf_n_estimators: 300


Exception ignored in: <function ResourceTracker.__del__ at 0x7f95809ab740>
Traceback (most recent call last):
  File "/usr/lib/python3.13/multiprocessing/resource_tracker.py", line 82, in __del__
  File "/usr/lib/python3.13/multiprocessing/resource_tracker.py", line 91, in _stop
  File "/usr/lib/python3.13/multiprocessing/resource_tracker.py", line 116, in _stop_locked
ChildProcessError: [Errno 10] No child processes
Exception ignored in: <function ResourceTracker.__del__ at 0x7f4fd48ab740>
Traceback (most recent call last):
  File "/usr/lib/python3.13/multiprocessing/resource_tracker.py", line 82, in __del__
  File "/usr/lib/python3.13/multiprocessing/resource_tracker.py", line 91, in _stop
  File "/usr/lib/python3.13/multiprocessing/resource_tracker.py", line 116, in _stop_locked
ChildProcessError: [Errno 10] No child processes
Exception ignored in: <function ResourceTracker.__del__ at 0x7ff8f6f93740>
Traceback (most recent call last):
  File "/usr/lib/python3.13/multiprocessing/reso

0,1
mae,▁
mape,▁
mse,▁
r2,▁

0,1
mae,2.36975
mape,169034227266562.72
mse,12.31048
r2,0.67099


[34m[1mwandb[0m: Agent Starting Run: bkr4g360 with config:
[34m[1mwandb[0m: 	dataset_idx: 30
[34m[1mwandb[0m: 	gb_lr: 0.2
[34m[1mwandb[0m: 	gb_n_estimators: 100
[34m[1mwandb[0m: 	mlp_layers: [128, 64]
[34m[1mwandb[0m: 	mlp_lr: 0.001
[34m[1mwandb[0m: 	mlp_max_iter: 400
[34m[1mwandb[0m: 	model: RandomForest
[34m[1mwandb[0m: 	rf_max_depth: None
[34m[1mwandb[0m: 	rf_n_estimators: 100


0,1
mae,▁
mape,▁
mse,▁
r2,▁

0,1
mae,23.45767
mape,3.848348641012656e+16
mse,5068.00597
r2,-1.36367


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 4pf646yh with config:
[34m[1mwandb[0m: 	dataset_idx: 4
[34m[1mwandb[0m: 	gb_lr: 0.1
[34m[1mwandb[0m: 	gb_n_estimators: 100
[34m[1mwandb[0m: 	mlp_layers: [64]
[34m[1mwandb[0m: 	mlp_lr: 0.01
[34m[1mwandb[0m: 	mlp_max_iter: 400
[34m[1mwandb[0m: 	model: RandomForest
[34m[1mwandb[0m: 	rf_max_depth: 20
[34m[1mwandb[0m: 	rf_n_estimators: 100


0,1
mae,▁
mape,▁
mse,▁
r2,▁

0,1
mae,2.41145
mape,167780036131076.78
mse,12.57428
r2,0.66394


[34m[1mwandb[0m: Agent Starting Run: bqephm6j with config:
[34m[1mwandb[0m: 	dataset_idx: 5
[34m[1mwandb[0m: 	gb_lr: 0.1
[34m[1mwandb[0m: 	gb_n_estimators: 100
[34m[1mwandb[0m: 	mlp_layers: [128, 64]
[34m[1mwandb[0m: 	mlp_lr: 0.01
[34m[1mwandb[0m: 	mlp_max_iter: 200
[34m[1mwandb[0m: 	model: MLP
[34m[1mwandb[0m: 	rf_max_depth: 10
[34m[1mwandb[0m: 	rf_n_estimators: 300




0,1
mae,▁
mape,▁
mse,▁
r2,▁

0,1
mae,6.83381
mape,9.05535
mse,125.45064
r2,0.89301


[34m[1mwandb[0m: Agent Starting Run: crottbwl with config:
[34m[1mwandb[0m: 	dataset_idx: 33
[34m[1mwandb[0m: 	gb_lr: 0.05
[34m[1mwandb[0m: 	gb_n_estimators: 300
[34m[1mwandb[0m: 	mlp_layers: [64]
[34m[1mwandb[0m: 	mlp_lr: 0.01
[34m[1mwandb[0m: 	mlp_max_iter: 400
[34m[1mwandb[0m: 	model: RandomForest
[34m[1mwandb[0m: 	rf_max_depth: 10
[34m[1mwandb[0m: 	rf_n_estimators: 100


0,1
mae,▁
mape,▁
mse,▁
r2,▁

0,1
mae,1542.77881
mape,0.0728
mse,4941433.3304
r2,0.94741


[34m[1mwandb[0m: Agent Starting Run: k0937s40 with config:
[34m[1mwandb[0m: 	dataset_idx: 12
[34m[1mwandb[0m: 	gb_lr: 0.1
[34m[1mwandb[0m: 	gb_n_estimators: 300
[34m[1mwandb[0m: 	mlp_layers: [64]
[34m[1mwandb[0m: 	mlp_lr: 0.001
[34m[1mwandb[0m: 	mlp_max_iter: 400
[34m[1mwandb[0m: 	model: RandomForest
[34m[1mwandb[0m: 	rf_max_depth: 10
[34m[1mwandb[0m: 	rf_n_estimators: 100


0,1
mae,▁
mape,▁
mse,▁
r2,▁

0,1
mae,1.1837
mape,0.13335
mse,7.75663
r2,0.97008


[34m[1mwandb[0m: Agent Starting Run: xtppi6pb with config:
[34m[1mwandb[0m: 	dataset_idx: 0
[34m[1mwandb[0m: 	gb_lr: 0.05
[34m[1mwandb[0m: 	gb_n_estimators: 300
[34m[1mwandb[0m: 	mlp_layers: [64]
[34m[1mwandb[0m: 	mlp_lr: 0.01
[34m[1mwandb[0m: 	mlp_max_iter: 400
[34m[1mwandb[0m: 	model: MLP
[34m[1mwandb[0m: 	rf_max_depth: None
[34m[1mwandb[0m: 	rf_n_estimators: 500


0,1
mae,▁
mape,▁
mse,▁
r2,▁

0,1
mae,1.50358
mape,0.15064
mse,4.47108
r2,0.56773


[34m[1mwandb[0m: Agent Starting Run: 3idcof7y with config:
[34m[1mwandb[0m: 	dataset_idx: 32
[34m[1mwandb[0m: 	gb_lr: 0.1
[34m[1mwandb[0m: 	gb_n_estimators: 100
[34m[1mwandb[0m: 	mlp_layers: [128, 64]
[34m[1mwandb[0m: 	mlp_lr: 0.001
[34m[1mwandb[0m: 	mlp_max_iter: 200
[34m[1mwandb[0m: 	model: MLP
[34m[1mwandb[0m: 	rf_max_depth: 20
[34m[1mwandb[0m: 	rf_n_estimators: 100




0,1
mae,▁
mape,▁
mse,▁
r2,▁

0,1
mae,0.64795
mape,0.25326
mse,0.83466
r2,0.5978


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: i5z0hzcn with config:
[34m[1mwandb[0m: 	dataset_idx: 15
[34m[1mwandb[0m: 	gb_lr: 0.2
[34m[1mwandb[0m: 	gb_n_estimators: 300
[34m[1mwandb[0m: 	mlp_layers: [64]
[34m[1mwandb[0m: 	mlp_lr: 0.01
[34m[1mwandb[0m: 	mlp_max_iter: 400
[34m[1mwandb[0m: 	model: MLP
[34m[1mwandb[0m: 	rf_max_depth: None
[34m[1mwandb[0m: 	rf_n_estimators: 300




0,1
mae,▁
mape,▁
mse,▁
r2,▁

0,1
mae,45579.99009
mape,0.26234
mse,4117247573.88396
r2,0.69078


[34m[1mwandb[0m: Agent Starting Run: 294e383h with config:
[34m[1mwandb[0m: 	dataset_idx: 6
[34m[1mwandb[0m: 	gb_lr: 0.05
[34m[1mwandb[0m: 	gb_n_estimators: 300
[34m[1mwandb[0m: 	mlp_layers: [128, 64]
[34m[1mwandb[0m: 	mlp_lr: 0.001
[34m[1mwandb[0m: 	mlp_max_iter: 400
[34m[1mwandb[0m: 	model: RandomForest
[34m[1mwandb[0m: 	rf_max_depth: 20
[34m[1mwandb[0m: 	rf_n_estimators: 500


0,1
mae,▁
mape,▁
mse,▁
r2,▁

0,1
mae,12.00533
mape,1.06944
mse,245.53834
r2,0.2743


## Appendix

Additional experiments and notes can go here.
