## Imports

In [15]:
import os

os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"

import warnings
from collections import defaultdict
from itertools import product

import lineartree as lt
import numpy as np
from JOPLEn.st_loss import LogisticLoss, SquaredError
from JOPLEn.st_penalty import (
    SquaredFNorm,
    Group21Norm,
    GroupInf1Norm,
    L1Norm,
    SquaredLaplacian,
    TreeGaussWeight,
    EuclidMultiQuadWeight,
    TreeMultiQuadWeight,
    EuclidGaussWeight,
    LaplacianType,
)
from JOPLEn.singletask import JOPLEn
from linear_operator.utils.warnings import NumericalWarning
from lineartree import (
    LinearBoostClassifier,
    LinearBoostRegressor,
    LinearForestClassifier,
    LinearForestRegressor,
)
from pmlb import classification_dataset_names, regression_dataset_names
from sklearn import datasets
from sklearn.base import ClassifierMixin
from sklearn.dummy import DummyClassifier, DummyRegressor
from sklearn.ensemble import (
    AdaBoostClassifier,
    AdaBoostRegressor,
    ExtraTreesClassifier,
    ExtraTreesRegressor,
    GradientBoostingClassifier,
    GradientBoostingRegressor,
    RandomForestClassifier,
    RandomForestRegressor,
)
from sklearn.linear_model import LinearRegression, Ridge, RidgeClassifier
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier, XGBRegressor

# Hide future warnings because ax uses deprecated functions from pandas
warnings.simplefilter(action="ignore", category=FutureWarning)
# Hide unfixable warning from ax (warns about default behavior but there isn't
# a clear way to turn the warning off)
warnings.simplefilter(action="ignore", category=UserWarning)
# Ax gives warning about non PSD matrix.
# TODO: Should I fix this?
warnings.simplefilter(action="ignore", category=NumericalWarning)
import logging
import sys
import time
from copy import copy, deepcopy
from pathlib import Path
from pprint import pprint

import lightgbm
import matplotlib.pyplot as plt
import yaml
from ax import optimize
from ax.service.ax_client import AxClient, ObjectiveProperties
from ax.utils.common.logger import ROOT_STREAM_HANDLER
from catboost import CatBoostClassifier, CatBoostRegressor, Pool
from JOPLEn.competing import FriedmanRefit
from JOPLEn.enums import CellModel
from JOPLEn.partitioner import (
    CBPartition,
    GBPartition,
    LinearBoostPartition,
    LinearForestPartition,
    RFPartition,
    VarMaxForestPartition,
    VPartition,
)
from JOPLEn.st_loss import LogisticLoss, SquaredError
from lightgbm import LGBMClassifier, LGBMRegressor
from nn import NN
from sklearn.base import ClassifierMixin
from sklearn.linear_model import Lasso, LogisticRegression
from sklearn.metrics import log_loss, roc_auc_score, zero_one_loss
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OrdinalEncoder, StandardScaler
from tqdm.notebook import tqdm, trange

fastel_path = Path().resolve().parent
sys.path.append(str(fastel_path))

from FASTEL.src.engine import MultiTaskTrees
from sklearn.preprocessing import LabelEncoder

ROOT_STREAM_HANDLER.setLevel(logging.ERROR)

CACHE_DIR = Path("ax_runs") / "prediction"
CACHE_DIR.mkdir(parents=True, exist_ok=True)

DS_PATH = (Path("..") / "datasets" / "pmlb" / "processed").resolve()
PARAM_PATH = (Path(".") / "parameters").resolve()
PLOT_PATH = (Path(".") / "plots").resolve()

In [16]:
# too many samples, causes JOPLEn to crash
EXCLUDE = [
    # regression
    "1191_BNG_pbc",
    "215_2dplanes",
    "1201_BNG_breastTumor",
    "1196_BNG_pharynx",
    "1595_poker",
    "1203_BNG_pwLinear",
    "594_fri_c2_100_5",
    "218_house_8L",
    "1193_BNG_lowbwt",
    "537_houses",
    "564_fried",
    "344_mv",
    "574_house_16H",
    "573_cpu_act",
    "562_cpu_small",
    "1199_BNG_echoMonths",
    "294_satellite_image",
    "197_cpu_act",
    "201_pol",
    "227_cpu_small",
    "503_wind",
    # classification
    # "Hill_Valley_with_noise",
    # "Hill_Valley_without_noise",
    # "breast_cancer_wisconsin",
    # "appendicitis",
    # "prnn_synth",
    # "sonar",
    # "phoneme",
    # "twonorm",
    # "magic",
    # "wdbc",
    "adult",
    # crashing for some reason, fix later
    "Hill_Valley_without_noise",
    # crashes for joplen with catboost partitioner with early stopping
    "magic",
    "225_puma8NH",
    # Crashes for joplen with l1 regularization
    "624_fri_c0_100_5",
    "banana",  # laplacian
]

## Parameters

In [17]:
model_info = {
    "reg": {},
    "class": {},
}

for t in ["reg", "class"]:
    for model in (PARAM_PATH / t).glob("*.yaml"):
        model_info[t][model.stem] = yaml.safe_load(open(model, "r"))

## Training Functions

In [18]:
lst_reg_strings = [
    "SquaredFNorm",
    "Group21Norm",
    "GroupInf1Norm",
    "L1Norm",
    "SquaredLaplacian",
]

eval_param_names = [
    "laplacian_type",
    "weight_class",
]


def pop_regularizers(params):
    regularizers = []

    for p in lst_reg_strings:
        reg_params = {}

        for k, v in params.items():
            if k.startswith(p):
                param_name = "_".join(k.split("_")[1:])
                eval_v = eval(v) if param_name in eval_param_names else v
                reg_params[param_name] = eval_v

        if reg_params:
            regularizers.append(eval(p)(**reg_params))

    params = {k: v for k, v in params.items() if k.split("_")[0] not in lst_reg_strings}

    return regularizers, params


def rmse(y_true, y_pred):
    return mean_squared_error(y_true, y_pred, squared=False)


def auc(y_true, y_pred):
    if len(set(y_true)) == 2:
        return float(roc_auc_score(y_true, y_pred))
    else:
        return None


def convert_to_ordinals(feature_type, x_train, x_val, x_test):
    cat_idxs = np.array(
        [i for i, t in enumerate(feature_type) if t in ["categorical", "binary"]]
    )

    if len(cat_idxs) > 0:
        # lgbm doesn't like negative values for categorical values. Technically
        # negative indicates that the value is actually quantized scalar, but
        # PMLB doesn't distinguish these from regular categorical values.
        x_train = x_train.copy()
        x_val = x_val.copy()

        enc = OrdinalEncoder().fit(x_train[:, cat_idxs])

        x_train[:, cat_idxs] = enc.transform(x_train[:, cat_idxs])
        x_val[:, cat_idxs] = enc.transform(x_val[:, cat_idxs])

        if x_test is not None:
            x_test = x_test.copy()
            x_test[:, cat_idxs] = enc.transform(x_test[:, cat_idxs])

    return cat_idxs, x_train, x_val, x_test


def loss(
    y_true: np.ndarray, y_pred: np.ndarray, loss_str: str
) -> tuple[float, dict[str, float]]:
    if loss_str in ["mse", "rmse", "regression", False, "reg:squarederror"]:
        return float(rmse(y_true, y_pred)), {}
    elif loss_str in ["log_loss", "binary", True, "reg:logistic"]:
        y_class_pred = (y_pred > 0.5).astype(int)

        return float(log_loss(y_true, y_pred)), {
            "auc": float(roc_auc_score(y_true, y_pred)),
            "zo_loss": float(zero_one_loss(y_true, y_class_pred)),
        }
    else:
        raise ValueError(f"Unknown loss function: {loss_str}")


def timer_decorator(func):
    def wrapper(*args, **kwargs):
        start_time = time.time()
        result = func(*args, **kwargs)
        end_time = time.time()
        elapsed_time = end_time - start_time

        return (result, start_time, end_time, elapsed_time)

    return wrapper


@timer_decorator
def train_lgbm(
    ModelClass,
    params,
    x_train,
    y_train,
    x_val,
    y_val,
    x_test=None,
    y_test=None,
    feature_type=[],
):
    params = deepcopy(params)

    early_stopping_rounds = params.pop("early_stopping_rounds")
    model = ModelClass(**params)

    is_classifier = issubclass(ModelClass, ClassifierMixin)

    early_stop = lightgbm.early_stopping(
        stopping_rounds=early_stopping_rounds,
        verbose=False,
    )

    cat_idxs, x_train, x_val, x_test = convert_to_ordinals(
        feature_type, x_train, x_val, x_test
    )

    model.fit(
        x_train,
        y_train.flatten(),
        eval_set=[(x_val, y_val.flatten())],
        # verbose=-1,
        callbacks=[early_stop],
        categorical_feature=cat_idxs,
    )

    val_pred = (
        model.predict_proba(x_val)[:, 1] if is_classifier else model.predict(x_val)
    )

    val_error = loss(y_val, val_pred, params["objective"])

    if x_test is not None and y_test is not None:
        y_pred = (
            model.predict_proba(x_test)[:, 1]
            if is_classifier
            else model.predict(x_test)
        )

        test_error = loss(y_test, y_pred, params["objective"])

        return (
            val_error[0],
            test_error[0],
            model,
            {"val": val_error[1], "test": test_error[1]},
        )
    else:
        return val_error[0], model


@timer_decorator
def train_xgboost(
    ModelClass,
    params,
    x_train,
    y_train,
    x_val,
    y_val,
    x_test=None,
    y_test=None,
    feature_type=[],
):
    params = deepcopy(params)
    early_stopping_rounds = params.pop("early_stopping_rounds")

    model = ModelClass(**params)

    is_classifier = issubclass(ModelClass, ClassifierMixin)

    model.fit(
        x_train,
        eval_set=[(x_val, y_val.flatten())],
        early_stopping_rounds=early_stopping_rounds,
        verbose=False,
        eval_metric="logloss" if is_classifier else "rmse",
    )

    val_pred = (
        model.predict_proba(x_val)[:, 1] if is_classifier else model.predict(x_val)
    )

    val_error = loss(y_val, val_pred, params["objective"])

    if x_test is not None and y_test is not None:
        y_pred = (
            model.predict_proba(x_test)[:, 1]
            if is_classifier
            else model.predict(x_test)
        )

        test_error = loss(y_test, y_pred, params["objective"])

        return (
            val_error[0],
            test_error[0],
            model,
            {"val": val_error[1], "test": test_error[1]},
        )
    else:
        return val_error[0], model


@timer_decorator
def train_catboost(
    ModelClass,
    params,
    x_train,
    y_train,
    x_val,
    y_val,
    x_test=None,
    y_test=None,
    feature_type=[],
):
    feature_type = [
        i for i, t in enumerate(feature_type) if t in ["categorical", "binary"]
    ]

    train_pool = Pool(x_train, y_train, cat_features=feature_type)
    val_pool = Pool(x_val, y_val, cat_features=feature_type)

    # https://catboost.ai/en/docs/concepts/python-reference_catboostregressor
    model = ModelClass(**params)

    # They don't subclass ClassifierMixin, so we have to check manually
    is_classifier = ModelClass == CatBoostClassifier

    model.fit(
        train_pool,
        eval_set=val_pool,
        verbose=False,
    )

    val_pred = (
        model.predict_proba(val_pool)[:, 1]
        if is_classifier
        else model.predict(val_pool)
    )

    loss_fn = "log_loss" if is_classifier else "rmse"

    val_error = loss(y_val, val_pred, loss_fn)

    if x_test is not None and y_test is not None:
        test_pool = Pool(x_test, cat_features=feature_type)

        y_pred = (
            model.predict_proba(test_pool)[:, 1]
            if is_classifier
            else model.predict(test_pool)
        )

        test_error = loss(y_test, y_pred, loss_fn)

        return (
            val_error[0],
            test_error[0],
            model,
            {"val": val_error[1], "test": test_error[1]},
        )
    else:
        return val_error[0], model


@timer_decorator
def train_sklearn(
    ModelClass,
    params,
    x_train,
    y_train,
    x_val,
    y_val,
    x_test=None,
    y_test=None,
    rescale=False,
    feature_type=[],
):
    if rescale:
        model = Pipeline(
            [("scaler", StandardScaler()), ("model", ModelClass(**params))]
        )
    else:
        model = ModelClass(**params)

    model.fit(x_train, y_train.flatten())

    is_classification = issubclass(ModelClass, ClassifierMixin)

    if is_classification:
        val_error = loss(y_val, model.predict_proba(x_val)[:, 1], True)
    else:
        val_error = loss(y_val, model.predict(x_val), False)

    if x_test is not None and y_test is not None:
        if is_classification:
            y_pred = model.predict_proba(x_test)[:, 1]
        else:
            y_pred = model.predict(x_test)

        test_error = loss(y_test, y_pred, is_classification)

        return (
            val_error[0],
            test_error[0],
            model,
            {"val": val_error[1], "test": test_error[1]},
        )
    else:
        return val_error[0], model


def train_gbr(
    ModelClass,
    params,
    x_train,
    y_train,
    x_val,
    y_val,
    x_test=None,
    y_test=None,
    feature_type=[],
):
    return train_sklearn(
        ModelClass,
        params,
        x_train,
        y_train,
        x_val,
        y_val,
        x_test,
        y_test,
        feature_type=[],
    )


def train_rfr(
    ModelClass,
    params,
    x_train,
    y_train,
    x_val,
    y_val,
    x_test=None,
    y_test=None,
    feature_type=[],
):
    return train_sklearn(
        ModelClass,
        params,
        x_train,
        y_train,
        x_val,
        y_val,
        x_test,
        y_test,
        feature_type=[],
    )


def train_etr(
    ModelClass,
    params,
    x_train,
    y_train,
    x_val,
    y_val,
    x_test=None,
    y_test=None,
    feature_type=[],
):
    return train_sklearn(
        ModelClass,
        params,
        x_train,
        y_train,
        x_val,
        y_val,
        x_test,
        y_test,
        feature_type=[],
    )


# TODO: compare JOPLEn to AdaBoost
def train_abr(
    ModelClass,
    params,
    x_train,
    y_train,
    x_val,
    y_val,
    x_test=None,
    y_test=None,
    feature_type=[],
):
    return train_sklearn(
        ModelClass,
        params,
        x_train,
        y_train,
        x_val,
        y_val,
        x_test,
        y_test,
        feature_type=[],
    )


def train_lf(
    ModelClass,
    params,
    x_train,
    y_train,
    x_val,
    y_val,
    x_test=None,
    y_test=None,
    feature_type=[],
):
    return train_sklearn(
        ModelClass,
        {**params, "base_estimator": LinearRegression()},
        x_train,
        y_train,
        x_val,
        y_val,
        x_test,
        y_test,
        feature_type=[],
    )


def train_ridge(
    ModelClass,
    params,
    x_train,
    y_train,
    x_val,
    y_val,
    x_test=None,
    y_test=None,
    feature_type=[],
):
    return train_sklearn(
        ModelClass,
        params,
        x_train,
        y_train,
        x_val,
        y_val,
        x_test,
        y_test,
        rescale=True,
        feature_type=[],
    )


@timer_decorator
def train_pen(
    params,
    x_train,
    y_train,
    x_val,
    y_val,
    x_test=None,
    y_test=None,
    feature_type=[],
):
    params = deepcopy(params)
    is_classification = eval(params.get("loss_fn", "SquaredError")) == LogisticLoss

    fit_params = {"verbose": params.pop("verbose", False)}

    params["partitioner"] = eval(params["partitioner"])
    params["cell_model"] = eval(params.pop("cell_model", "SquaredError"))

    regularizers, params = pop_regularizers(params)

    model = JOPLEn(
        loss_fn=eval(params.pop("loss_fn", "SquaredError")),
        regularizers=regularizers,
        **params,
    )

    history = model.fit(
        x_train,
        y_train,
        val_x=x_val,
        val_y=y_val,
        print_epochs=1,
        **fit_params,
    )

    val_error = loss(y_val, model.predict(x_val), is_classification)

    if x_test is not None and y_test is not None:
        y_pred = model.predict(x_test)
        test_error = loss(y_test, y_pred, is_classification)
        return (
            val_error[0],
            test_error[0],
            model,
            {
                "n_epochs": (len(history["train"]["loss"])),
                "val": val_error[1],
                "test": test_error[1],
            },
        )
    else:
        return val_error[0], model


def train_joplen(
    _,  # ModelClass is not used
    params,
    x_train,
    y_train,
    x_val,
    y_val,
    x_test=None,
    y_test=None,
    feature_type=[],
):
    fn = train_cb_joplen if params["partitioner"] == "CBPartition" else train_pen

    return fn(
        params,
        x_train,
        y_train,
        x_val,
        y_val,
        x_test,
        y_test,
        feature_type=[],
    )


@timer_decorator
def train_cb_joplen(
    params,
    x_train,
    y_train,
    x_val,
    y_val,
    x_test=None,
    y_test=None,
    feature_type=[],
):
    params = deepcopy(params)
    is_classification = eval(params.get("loss_fn", "SquaredError")) == LogisticLoss

    fit_params = {"verbose": params.pop("verbose", False)}

    params["cell_model"] = eval(params.pop("cell_model", "SquaredError"))
    params["partitioner"] = eval(params["partitioner"])
    params["n_cells"] = 2 ** params["max_depth"]

    regularizers, params = pop_regularizers(params)

    partitioner_keys = [
        "od_wait",
        "learning_rate",
        "l2_leaf_reg",
        "od_type",
        "subsample",
        "grow_policy",
        "allow_writing_files",
    ]
    params["part_kwargs"] = {k: params.pop(k) for k in partitioner_keys if k in params}
    params["part_kwargs"]["cat_features"] = feature_type

    model = JOPLEn(
        loss_fn=eval(params.pop("loss_fn", "SquaredError")),
        regularizers=regularizers,
        **params,
    )

    history = model.fit(
        x_train,
        y_train,
        val_x=x_val,
        val_y=y_val,
        rescale=False,
        print_epochs=1,
        **fit_params,
    )

    val_error = loss(y_val, model.predict(x_val), is_classification)

    if x_test is not None and y_test is not None:
        y_pred = model.predict(x_test)
        test_error = loss(y_test, y_pred, is_classification)
        return (
            val_error[0],
            test_error[0],
            model,
            {
                "n_epochs": (len(history["train"]["loss"])),
                "val": val_error[1],
                "test": test_error[1],
            },
        )
    else:
        return val_error[0], model


@timer_decorator
def train_friedman(
    _,
    params,
    x_train,
    y_train,
    x_val,
    y_val,
    x_test=None,
    y_test=None,
    feature_type=[],
):
    params = deepcopy(params)

    base_model = eval(params.pop("base_model"))
    refit_model = eval(params.pop("refit_model"))

    is_classification = issubclass(refit_model, ClassifierMixin)

    all_params = {
        "base_params": {k: v for k, v in params.items() if "base" in k},
        "refit_params": {k: v for k, v in params.items() if "refit" in k},
        "shared_params": {k: v for k, v in params.items() if "shared" in k},
    }

    for k, v in all_params.items():
        all_params[k] = {"_".join(k.split("_")[1:]): v for k, v in v.items()}

    model = FriedmanRefit(
        base_model,
        refit_model,
    )

    model.fit(
        x_train,
        y_train,
        base_params={**all_params["base_params"], **all_params["shared_params"]},
        refit_params={**all_params["refit_params"], **all_params["shared_params"]},
    )

    val_pred = model.predict_proba(x_val) if is_classification else model.predict(x_val)
    val_error = loss(y_val, val_pred, is_classification)

    if x_test is not None and y_test is not None:
        test_pred = (
            model.predict_proba(x_test) if is_classification else model.predict(x_test)
        )
        test_error = loss(y_test, test_pred, is_classification)
        return (
            val_error[0],
            test_error[0],
            model,
            {"val": val_error[1], "test": test_error[1]},
        )
    else:
        return val_error[0], model


@timer_decorator
def train_fastel(
    ModelClass,
    params,
    x_train,
    y_train,
    x_val,
    y_val,
    x_test=None,
    y_test=None,
    feature_type=[],
):
    assert (
        params.get("loss_criteria", "mse") != "log_loss"
    ), "FASTEL does not support the logistic loss"

    xs = StandardScaler().fit(x_train)
    x_train = xs.transform(x_train)
    x_val = xs.transform(x_val)
    x_test = xs.transform(x_test) if x_test is not None else None

    ys = StandardScaler().fit(y_train.reshape(-1, 1))
    y_train = ys.transform(y_train.reshape(-1, 1))
    y_val = ys.transform(y_val.reshape(-1, 1))
    y_test = ys.transform(y_test.reshape(-1, 1)) if y_test is not None else None

    model = MultiTaskTrees(
        input_shape=x_train.shape[1:],
        **params,
    )

    model.train(
        x_train,
        y_train.reshape(-1, 1),
        np.ones((y_train.shape[0], 1)),
        x_val,
        y_val.reshape(-1, 1),
        np.ones((y_val.shape[0], 1)),
    )

    y_val_pred = ys.inverse_transform(model.predict(x_val)[:, None])
    val_error = loss(y_val, y_val_pred.flatten(), params.get("loss_criteria", "mse"))

    if x_test is not None and y_test is not None:
        y_test_pred = ys.inverse_transform(model.predict(x_test)[:, None])
        test_error = loss(
            y_test, y_test_pred.flatten(), params.get("loss_criteria", "mse")
        )
        return (
            val_error[0],
            test_error[0],
            model,
            {
                "val": val_error[1],
                "test": test_error[1],
            },
        )
    else:
        return val_error[0], model


@timer_decorator
def train_nn(
    ModelClass,
    params,
    x_train,
    y_train,
    x_val,
    y_val,
    x_test=None,
    y_test=None,
    feature_type=[],
):
    # TODO: should rescale the y values as well
    xs = StandardScaler().fit(x_train)

    loss_criteria = params.pop("loss_criteria", "mse")

    assert loss_criteria == "mse"

    tmp_params = deepcopy(params)

    model = NN(
        hidden_layer_size=tmp_params.pop("hidden_layer_size"),
        n_hidden_layers=tmp_params.pop("n_hidden_layers"),
        activation=tmp_params.pop("activation"),
        sel_feat=False,
    )
    model.fit(
        xs.transform(x_train),
        y_train,
        xs.transform(x_val),
        y_val,
        **tmp_params,
    )

    y_val_pred = model.predict(xs.transform(x_val))
    val_error = loss(y_val, y_val_pred.flatten(), loss_criteria)

    if x_test is not None and y_test is not None:
        y_test_pred = model.predict(xs.transform(x_test))
        test_error = loss(y_test, y_test_pred.flatten(), loss_criteria)
        return (
            val_error[0],
            test_error[0],
            model,
            {"val": val_error[1], "test": test_error[1]},
        )
    else:
        return val_error[0], model

In [19]:
def dummy_prediction(
    x_train,
    x_val,
    x_test,
    y_train,
    y_val,
    y_test,
    is_classification,
):
    if is_classification:
        dummy = DummyClassifier(strategy="most_frequent")
    else:
        dummy = DummyRegressor(strategy="mean")

    dummy.fit(x_train, y_train)
    y_pred = dummy.predict(x_test)

    res = loss(y_test, y_pred, is_classification)

    return {
        "model_name": dummy.__class__.__name__,
        "loss": res[0],
        "metadata": res[1],
    }

## Run Experiments

In [20]:
train_fn = {
    FriedmanRefit.__name__: train_friedman,
    LGBMRegressor.__name__: train_lgbm,
    LGBMClassifier.__name__: train_lgbm,
    CatBoostRegressor.__name__: train_catboost,
    CatBoostClassifier.__name__: train_catboost,
    XGBRegressor.__name__: train_xgboost,
    XGBClassifier.__name__: train_xgboost,
    GradientBoostingRegressor.__name__: train_gbr,
    GradientBoostingClassifier.__name__: train_gbr,
    RandomForestRegressor.__name__: train_rfr,
    RandomForestClassifier.__name__: train_rfr,
    ExtraTreesRegressor.__name__: train_etr,
    ExtraTreesClassifier.__name__: train_etr,
    JOPLEn.__name__: train_joplen,
    LinearForestRegressor.__name__: train_lf,
    LinearForestClassifier.__name__: train_lf,
    Ridge.__name__: train_ridge,
    MultiTaskTrees.__name__: train_fastel,
    NN.__name__: train_nn,
}


def optimize_model(model_info, ds_path, n_trials, skip_categorical, ds_metadata):
    ds_name = ds_path.name
    params = model_info["parameters"]

    is_classification = ds_metadata["pmlb_metadata"]["target"]["type"] == "categorical"

    loss_type = "log_loss" if is_classification else "rmse"

    dir_path = (
        CACHE_DIR
        / ("class" if is_classification else "regr")
        / model_info["dir_name"]
        / ds_name
    )
    exp_path = dir_path / "experiment.json"
    metadata_path = dir_path / "metadata.yaml"

    if metadata_path.exists():
        with open(metadata_path, "r") as f:
            metadata = yaml.load(f, Loader=yaml.FullLoader)

        return metadata

    cont_mask = np.array([t == "continuous" for t in ds_metadata["feature_type"]])
    bl_categorical = np.any(~cont_mask)

    if np.sum(cont_mask) == 0:
        return None

    x_train = np.loadtxt(ds_path / "x_train.csv", delimiter=",")[:, cont_mask]
    x_val = np.loadtxt(ds_path / "x_val.csv", delimiter=",")[:, cont_mask]
    x_test = np.loadtxt(ds_path / "x_test.csv", delimiter=",")[:, cont_mask]
    y_train = np.loadtxt(ds_path / "y_train.csv", delimiter=",")
    y_val = np.loadtxt(ds_path / "y_val.csv", delimiter=",")
    y_test = np.loadtxt(ds_path / "y_test.csv", delimiter=",")

    if is_classification:
        enc = LabelEncoder()
        y_train = enc.fit_transform(y_train)
        y_val = enc.transform(y_val)
        y_test = enc.transform(y_test)

    dummy_info = dummy_prediction(
        x_train,
        x_val,
        x_test,
        y_train,
        y_val,
        y_test,
        is_classification=is_classification,
    )

    if not exp_path.exists():
        ax_client = AxClient(
            random_seed=0,
            verbose_logging=False,
        )

        ax_client.create_experiment(
            name=f"{model_info['model']}_{ds_name}",
            parameters=params,
            objectives={loss_type: ObjectiveProperties(minimize=True)},
            overwrite_existing_experiment=True,
        )

        for _ in trange(n_trials, leave=False, position=1):
            round_params, trial_index = ax_client.get_next_trial()

            try:
                val_error, _ = train_fn[model_info["model"]](
                    eval(model_info["model"]),
                    round_params,
                    x_train=x_train,
                    y_train=y_train,
                    x_val=x_val,
                    y_val=y_val,
                    feature_type=[],
                )[0]
                ax_client.complete_trial(
                    trial_index=trial_index, raw_data=float(val_error)
                )
            except ValueError as e:
                print(e)
                ax_client.abandon_trial(
                    trial_index=trial_index,
                    reason=str(e),
                )

        exp_path.parent.mkdir(parents=True, exist_ok=True)
        ax_client.save_to_json_file(
            filepath=exp_path,
        )
    else:
        ax_client = AxClient.load_from_json_file(filepath=exp_path)

    best_parameters, values = ax_client.get_best_parameters()

    (val_error, test_error, model, metadata), _, _, train_time = train_fn[
        model_info["model"]
    ](
        eval(model_info["model"]),
        best_parameters,
        x_train=x_train,
        y_train=y_train,
        x_val=x_val,
        y_val=y_val,
        x_test=x_test,
        y_test=y_test,
        feature_type=[],
    )

    metadata = {
        "model_name": model_info["model"],
        "val_score": float(val_error),
        "test_score": float(test_error),
        "train_time": float(train_time),
        "params": best_parameters,
        "dummy_loss": float(dummy_info["loss"]),
        "contains_categorical": "postprocessed" if bl_categorical else False,
        "metadata": metadata,
        "dummy_metadata": dummy_info["metadata"],
    }

    with open(metadata_path, "w") as f:
        yaml.dump(metadata, f)

    return metadata

In [21]:
ignored_models = [
    "adaboost",
    "joplen_const_linforest_part",
    "lf",
    "et",
    # "joplen_const_rf_part",
    "lgbm",
    "fastel",
    "joplen_const",
    "nn",
    # "gb",  # normal GB
    "joplen_linear_gb_part",
    "rf",
    # "joplen_const_gb_part_l1",
    "joplen_linear_inf",
    "ridge",
    # "joplen_const_gb_part_l2",  # fast joplen loss
    "joplen_linear_linforest_part",
    # "xgboost",  # gradient boosting with penalty term
    "joplen_const_gb_part_sl2",
    "joplen_linear_rf_part",
    "joplen_const_gb_part",
    "joplen_linear",
    "jp_cb",
    "joplen_const_gb_part_l2L2",
    "joplen_const_gb_part_L2",
    "jp_cb_stop",
    "jp_const_gb_part_choose_l2L2",
]

reg_datasets = [d for d in (DS_PATH / "reg").iterdir() if d.is_dir()]
class_datasets = [d for d in (DS_PATH / "class").iterdir() if d.is_dir()]

metadata = {}

for name in ["reg", "class"]:
    metadata[name] = yaml.safe_load(open(DS_PATH.parent / f"{name}_metadata.yaml", "r"))


reg_res = defaultdict(dict)

for name, lst in zip(["reg", "class"], [reg_datasets, class_datasets]):
    print(f"Running {name} datasets")

    itr = tqdm(lst, position=0)

    for ds_path in itr:
        if ds_path.name in EXCLUDE:
            continue

        # print(ignored_models)
        for file_name, info in model_info[name].items():
            if file_name in ignored_models:
                continue

            # print(f"Running {file_name} on {ds_path.name}")

            model_str = f"{file_name} on {ds_path.name}"
            itr.set_description(f"Running {model_str : <50}")
            res = optimize_model(
                info,
                ds_path,
                50,
                False,
                metadata[name][ds_path.name],
            )

            if res is not None:
                reg_res[info["name"]][ds_path.name] = res

reg_res = dict(reg_res)

Running reg datasets


  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

Trying again with a new set of initial conditions.


  0%|          | 0/50 [00:00<?, ?it/s]

  ret = umr_sum(arr, axis, dtype, out, keepdims, where=where)
  ret = umr_sum(arr, axis, dtype, out, keepdims, where=where)
  ret = umr_sum(arr, axis, dtype, out, keepdims, where=where)
  ret = umr_sum(arr, axis, dtype, out, keepdims, where=where)
  ret = umr_sum(arr, axis, dtype, out, keepdims, where=where)
  ret = umr_sum(arr, axis, dtype, out, keepdims, where=where)
  ret = umr_sum(arr, axis, dtype, out, keepdims, where=where)
  ret = umr_sum(arr, axis, dtype, out, keepdims, where=where)
  ret = umr_sum(arr, axis, dtype, out, keepdims, where=where)
  ret = umr_sum(arr, axis, dtype, out, keepdims, where=where)
  ret = umr_sum(arr, axis, dtype, out, keepdims, where=where)
  ret = umr_sum(arr, axis, dtype, out, keepdims, where=where)
  ret = umr_sum(arr, axis, dtype, out, keepdims, where=where)
  ret = umr_sum(arr, axis, dtype, out, keepdims, where=where)
  ret = umr_sum(arr, axis, dtype, out, keepdims, where=where)
  ret = umr_sum(arr, axis, dtype, out, keepdims, where=where)
  ret = 