Skip to content
This repository has been archived by the owner on Nov 14, 2023. It is now read-only.

Use tune.with_parameters #237

Merged
merged 3 commits into from
Mar 16, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@ dist
.DS_Store
*.egg-info
MANIFEST

catboost_info
test-result
70 changes: 40 additions & 30 deletions tests/test_trainable.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import unittest
import ray
from ray import tune
from tune_sklearn._trainable import _Trainable
from tune_sklearn._detect_booster import (has_xgboost,
has_required_lightgbm_version)
from tune_sklearn._detect_booster import (
has_xgboost, has_required_lightgbm_version, has_catboost)

from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression, SGDClassifier
Expand Down Expand Up @@ -33,8 +34,6 @@ def create_catboost():


class TrainableTest(unittest.TestCase):
X_id = None
y_id = None
X = None
y = None

Expand All @@ -43,7 +42,6 @@ def setUpClass(cls):
ray.init(local_mode=True)
X, y = make_classification(
n_samples=50, n_features=50, n_informative=3, random_state=0)
cls.X_id, cls.y_id = ray.put(X), ray.put(y)
cls.y = y
cls.X = X

Expand All @@ -52,15 +50,9 @@ def tearDownClass(cls):
ray.shutdown()

def base_params(self, estimator_list):
config = {
"estimator_ids": [
ray.put(estimator) for estimator in estimator_list
]
}
config = {}
cv = check_cv(
cv=len(estimator_list), y=self.y, classifier=estimator_list[0])
config["X_id"] = self.X_id
config["y_id"] = self.y_id
config["early_stopping"] = False
config["early_stop_type"] = get_early_stop_type(
estimator_list[0], False)
Expand All @@ -75,8 +67,11 @@ def base_params(self, estimator_list):
return config

def test_basic_train(self):
config = self.base_params(estimator_list=[SVC(), SVC()])
trainable = _Trainable(config)
estimator_list = [SVC(), SVC()]
config = self.base_params(estimator_list)
trainable = tune.with_parameters(
_Trainable, X=self.X, y=self.y,
estimator_list=estimator_list)(config)
trainable.train()
trainable.stop()

Expand All @@ -87,7 +82,9 @@ def testXGBoostEarlyStop(self):
config["early_stopping"] = True
config["early_stop_type"] = get_early_stop_type(
estimator_list[0], True)
trainable = _Trainable(config)
trainable = tune.with_parameters(
_Trainable, X=self.X, y=self.y,
estimator_list=estimator_list)(config)
trainable.train()
assert all(trainable.saved_models)
trainable.train()
Expand All @@ -96,11 +93,12 @@ def testXGBoostEarlyStop(self):

@unittest.skipIf(not has_xgboost(), "xgboost not installed")
def testXGBoostNoEarlyStop(self):
config = self.base_params(
estimator_list=[create_xgboost(),
create_xgboost()])
estimator_list = [create_xgboost(), create_xgboost()]
config = self.base_params(estimator_list=estimator_list)
config["early_stopping"] = False
trainable = _Trainable(config)
trainable = tune.with_parameters(
_Trainable, X=self.X, y=self.y,
estimator_list=estimator_list)(config)
trainable.train()
assert not any(trainable.saved_models)
trainable.stop()
Expand All @@ -113,7 +111,9 @@ def testLGBMEarlyStop(self):
config["early_stopping"] = True
config["early_stop_type"] = get_early_stop_type(
estimator_list[0], True)
trainable = _Trainable(config)
trainable = tune.with_parameters(
_Trainable, X=self.X, y=self.y,
estimator_list=estimator_list)(config)
trainable.train()
assert all(trainable.saved_models)
trainable.train()
Expand All @@ -126,33 +126,37 @@ def testLGBMNoEarlyStop(self):
estimator_list = [create_lightgbm(), create_lightgbm()]
config = self.base_params(estimator_list=estimator_list)
config["early_stopping"] = False
trainable = _Trainable(config)
trainable = tune.with_parameters(
_Trainable, X=self.X, y=self.y,
estimator_list=estimator_list)(config)
trainable.train()
assert not any(trainable.saved_models)
trainable.stop()

# @unittest.skipIf(not has_catboost(), "catboost not installed")
@unittest.skip("Catboost needs to be updated.")
@unittest.skipIf(not has_catboost(), "catboost not installed")
def testCatboostEarlyStop(self):
estimator_list = [create_catboost(), create_catboost()]
config = self.base_params(estimator_list=estimator_list)
config["early_stopping"] = True
config["early_stop_type"] = get_early_stop_type(
estimator_list[0], True)
trainable = _Trainable(config)
trainable = tune.with_parameters(
_Trainable, X=self.X, y=self.y,
estimator_list=estimator_list)(config)
trainable.train()
assert all(trainable.saved_models)
trainable.train()
assert all(trainable.saved_models)
trainable.stop()

# @unittest.skipIf(not has_catboost(), "catboost not installed")
@unittest.skip("Catboost needs to be updated.")
@unittest.skipIf(not has_catboost(), "catboost not installed")
def testCatboostNoEarlyStop(self):
estimator_list = [create_catboost(), create_catboost()]
config = self.base_params(estimator_list=estimator_list)
config["early_stopping"] = False
trainable = _Trainable(config)
trainable = tune.with_parameters(
_Trainable, X=self.X, y=self.y,
estimator_list=estimator_list)(config)
trainable.train()
assert not any(trainable.saved_models)
trainable.stop()
Expand All @@ -163,7 +167,9 @@ def testPartialFit(self):
config["early_stopping"] = True
config["early_stop_type"] = get_early_stop_type(
estimator_list[0], True)
trainable = _Trainable(config)
trainable = tune.with_parameters(
_Trainable, X=self.X, y=self.y,
estimator_list=estimator_list)(config)
trainable.train()
assert trainable.estimator_list[0].t_ > 0
previous_t = trainable.estimator_list[0].t_
Expand All @@ -175,7 +181,9 @@ def testNoPartialFit(self):
estimator_list = [SGDClassifier(), SGDClassifier()]
config = self.base_params(estimator_list)
config["early_stopping"] = False
trainable = _Trainable(config)
trainable = tune.with_parameters(
_Trainable, X=self.X, y=self.y,
estimator_list=estimator_list)(config)
trainable.train()
assert not hasattr(trainable.estimator_list[0], "t_")
trainable.train()
Expand All @@ -189,7 +197,9 @@ def testWarmStart(self):
config["early_stopping"] = True
config["early_stop_type"] = get_early_stop_type(
estimator_list[0], True)
trainable = _Trainable(config)
trainable = tune.with_parameters(
_Trainable, X=self.X, y=self.y,
estimator_list=estimator_list)(config)
trainable.train()
trainable.train()
trainable.stop()
4 changes: 4 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,3 +168,7 @@ def __array__(self, dtype=None):
# input validation in cross-validation does not try to call that
# method.
return self.array

def take(self, key, axis=None, **kwargs):
indexer = self.iloc
return indexer[:, key] if axis else indexer[key]
18 changes: 9 additions & 9 deletions tune_sklearn/_trainable.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
"""
from sklearn.model_selection import cross_validate
from sklearn.utils.metaestimators import _safe_split
from sklearn.base import clone
import numpy as np
import os
from pickle import PicklingError
import warnings
import inspect

import ray
from ray.tune import Trainable
import ray.cloudpickle as cpickle
from tune_sklearn.utils import (EarlyStopping, _aggregate_score_dicts)
Expand All @@ -27,8 +27,11 @@ class _Trainable(Trainable):
def main_estimator(self):
return self.estimator_list[0]

def setup(self, config):
def setup(self, config, X=None, y=None, estimator_list=None):
# forward-compatbility
self.X = X
self.y = y
self.original_estimator_list = estimator_list
self._setup(config)

def _setup(self, config):
Expand All @@ -42,15 +45,8 @@ def _setup(self, config):
stopping if it is set to true.

"""
estimator_ids = list(config.pop("estimator_ids"))
self.estimator_list = ray.get(estimator_ids)
self.early_stopping = config.pop("early_stopping")
self.early_stop_type = config.pop("early_stop_type")
X_id = config.pop("X_id")
self.X = ray.get(X_id)

y_id = config.pop("y_id")
self.y = ray.get(y_id)
self.groups = config.pop("groups")
self.fit_params = config.pop("fit_params")
self.scoring = config.pop("scoring")
Expand All @@ -64,6 +60,10 @@ def _setup(self, config):
self.test_accuracy = None
self.saved_models = [] # XGBoost specific

self.estimator_list = [
clone(est) for est in self.original_estimator_list
]

if self.early_stopping:
n_splits = self._setup_early_stopping()

Expand Down
20 changes: 10 additions & 10 deletions tune_sklearn/tune_basesearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -518,14 +518,9 @@ def _fit(self, X, y=None, groups=None, tune_params=None, **fit_params):
gpu_fraction = int(np.ceil(gpu_fraction))
resources_per_trial = {"cpu": cpu_fraction, "gpu": gpu_fraction}

X_id = ray.put(X)
y_id = ray.put(y)

config = {}
config["early_stopping"] = bool(self.early_stopping_)
config["early_stop_type"] = self.early_stop_type
config["X_id"] = X_id
config["y_id"] = y_id
config["groups"] = groups
config["cv"] = cv
config["fit_params"] = fit_params
Expand All @@ -536,7 +531,7 @@ def _fit(self, X, y=None, groups=None, tune_params=None, **fit_params):
config["metric_name"] = self._metric_name

self._fill_config_hyperparam(config)
self.analysis_ = self._tune_run(config, resources_per_trial,
self.analysis_ = self._tune_run(X, y, config, resources_per_trial,
tune_params)

self.cv_results_ = self._format_results(self.n_splits, self.analysis_)
Expand Down Expand Up @@ -687,10 +682,16 @@ def _fill_config_hyperparam(self, config):
"""
raise NotImplementedError("Define in child class")

def _tune_run(self, config, resources_per_trial, tune_params=None):
def _tune_run(self, X, y, config, resources_per_trial, tune_params=None):
"""Wrapper to call ``tune.run``. Implement this in a child class.

Args:
X (:obj:`array-like` (shape = [n_samples, n_features])):
Training vector, where n_samples is the number of samples and
n_features is the number of features.
y (:obj:`array-like`): Shape of array expected to be [n_samples]
or [n_samples, n_output]). Target relative to X for
classification or regression; None for unsupervised learning.
config (:obj:`dict`): dictionary to be passed in as the
configuration for `tune.run`.
resources_per_trial (:obj:`dict` of int): dictionary specifying the
Expand Down Expand Up @@ -734,9 +735,8 @@ def _clean_config_dict(self, config):
and the values are the numeric values set to those variables.
"""
for key in [
"estimator_ids", "early_stopping", "X_id", "y_id", "groups",
"cv", "fit_params", "scoring", "max_iters",
"return_train_score", "n_jobs", "metric_name",
"early_stopping", "groups", "cv", "fit_params", "scoring",
"max_iters", "return_train_score", "n_jobs", "metric_name",
"early_stop_type"
]:
config.pop(key, None)
Expand Down
23 changes: 17 additions & 6 deletions tune_sklearn/tune_gridsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@
"""
import warnings

import ray
from ray.tune.stopper import CombinedStopper
from sklearn.model_selection import ParameterGrid
from sklearn.base import clone

from ray import tune
from ray.tune.stopper import CombinedStopper

from tune_sklearn.list_searcher import ListSearcher
from tune_sklearn.utils import (_check_param_grid_tune_grid_search,
check_is_pipeline, check_error_warm_start,
Expand Down Expand Up @@ -223,12 +225,18 @@ def _list_grid_num_samples(self):
"""
return len(list(ParameterGrid(self.param_grid)))

def _tune_run(self, config, resources_per_trial, tune_params=None):
def _tune_run(self, X, y, config, resources_per_trial, tune_params=None):
"""Wrapper to call ``tune.run``. Multiple estimators are generated when
early stopping is possible, whereas a single estimator is
generated when early stopping is not possible.

Args:
X (:obj:`array-like` (shape = [n_samples, n_features])):
Training vector, where n_samples is the number of samples and
n_features is the number of features.
y (:obj:`array-like`): Shape of array expected to be [n_samples]
or [n_samples, n_output]). Target relative to X for
classification or regression; None for unsupervised learning.
config (dict): Configurations such as hyperparameters to run
``tune.run`` on.
resources_per_trial (dict): Resources to use per trial within Ray.
Expand All @@ -249,11 +257,11 @@ def _tune_run(self, config, resources_per_trial, tune_params=None):
trainable = _PipelineTrainable

if self.early_stopping_ is not None:
config["estimator_ids"] = [
ray.put(self.estimator) for _ in range(self.n_splits)
estimator_list = [
clone(self.estimator) for _ in range(self.n_splits)
]
else:
config["estimator_ids"] = [ray.put(self.estimator)]
estimator_list = [clone(self.estimator)]

stopper = MaximumIterationStopper(max_iter=self.max_iters)
if self.stopper:
Expand Down Expand Up @@ -283,6 +291,9 @@ def _tune_run(self, config, resources_per_trial, tune_params=None):
run_args = self._override_run_args_with_tune_params(
run_args, tune_params)

trainable = tune.with_parameters(
trainable, X=X, y=y, estimator_list=estimator_list)

with warnings.catch_warnings():
warnings.filterwarnings(
"ignore", message="fail_fast='raise' "
Expand Down
Loading