scikit-learn · jnothman · Aug 5, 2018 · Aug 21, 2017 · Aug 22, 2017 · Aug 22, 2017
diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
@@ -305,6 +305,13 @@ Model evaluation and meta-estimators
   hyperparameter optimization and refitting the best model on the whole
   dataset. :issue:`11310` by :user:`Matthias Feurer <mfeurer>`.
 
+- `BaseSearchCV` now has an experimental, private interface to support
+  customized parameter search strategies, through its ``_run_search``
+  method.  See the implementations in :class:`model_selection.GridSearchCV`
+  and :class:`model_selection.RandomizedSearchCV` and please provide feedback
+  if you use this. Note that we do not assure the stability of this API
+  beyond version 0.20. :issue:`9599` by `Joel Nothman`_
+
 Decomposition and manifold learning
 
 - Speed improvements for both 'exact' and 'barnes_hut' methods in

diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py
@@ -406,7 +406,8 @@ def __repr__(self):
 
 class BaseSearchCV(six.with_metaclass(ABCMeta, BaseEstimator,
                                       MetaEstimatorMixin)):
-    """Base class for hyper parameter search with cross-validation."""
+    """Abstract base class for hyper parameter search with cross-validation.
+    """
 
     @abstractmethod
     def __init__(self, estimator, scoring=None,
@@ -577,6 +578,30 @@ def classes_(self):
         self._check_is_fitted("classes_")
         return self.best_estimator_.classes_
 
+    @abstractmethod
+    def _run_search(self, evaluate_candidates):
+        """Repeatedly calls `evaluate_candidates` to conduct a search.
+
+        Parameters
+        ----------
+        evaluate_candidates : callable
+            This callback accepts a list of candidates, where each candidate is
+            a dict of parameter settings. It returns a dict of all results so
+            far, formatted like ``cv_results_``.
+
+        Examples
+        --------
+
+        ::
+
+            def _run_search(self):
+                'Try C=0.1 only if C=1 is better than C=10'
+                all_results = evaluate_candidates([{'C': 1}, {'C': 10}])
+                score = all_results['mean_test_score']
+                if score[0] < score[1]:
+                    evaluate_candidates([{'C': 0.1}])
+        """
+
     def fit(self, X, y=None, groups=None, **fit_params):
         """Run fit with all sets of parameters.
 
@@ -636,29 +661,86 @@ def fit(self, X, y=None, groups=None, **fit_params):
 
         X, y, groups = indexable(X, y, groups)
         n_splits = cv.get_n_splits(X, y, groups)
-        # Regenerate parameter iterable for each fit
-        candidate_params = list(self._get_param_iterator())
-        n_candidates = len(candidate_params)
-        if self.verbose > 0:
-            print("Fitting {0} folds for each of {1} candidates, totalling"
-                  " {2} fits".format(n_splits, n_candidates,
-                                     n_candidates * n_splits))
 
         base_estimator = clone(self.estimator)
-        pre_dispatch = self.pre_dispatch
-
-        out = Parallel(
-            n_jobs=self.n_jobs, verbose=self.verbose,
-            pre_dispatch=pre_dispatch
-        )(delayed(_fit_and_score)(clone(base_estimator), X, y, scorers, train,
-                                  test, self.verbose, parameters,
-                                  fit_params=fit_params,
-                                  return_train_score=self.return_train_score,
-                                  return_n_test_samples=True,
-                                  return_times=True, return_parameters=False,
-                                  error_score=self.error_score)
-          for parameters, (train, test) in product(candidate_params,
-                                                   cv.split(X, y, groups)))
+
+        parallel = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,
+                            pre_dispatch=self.pre_dispatch)
+
+        fit_and_score_kwargs = dict(scorer=scorers,
+                                    fit_params=fit_params,
+                                    return_train_score=self.return_train_score,
+                                    return_n_test_samples=True,
+                                    return_times=True,
+                                    return_parameters=False,
+                                    error_score=self.error_score,
+                                    verbose=self.verbose)
+        results_container = [{}]
+        with parallel:
+            all_candidate_params = []
+            all_out = []
+
+            def evaluate_candidates(candidate_params):
+                candidate_params = list(candidate_params)
+                n_candidates = len(candidate_params)
+
+                if self.verbose > 0:
+                    print("Fitting {0} folds for each of {1} candidates,"
+                          " totalling {2} fits".format(
+                              n_splits, n_candidates, n_candidates * n_splits))
+
+                out = parallel(delayed(_fit_and_score)(clone(base_estimator),
+                                                       X, y,
+                                                       train=train, test=test,
+                                                       parameters=parameters,
+                                                       **fit_and_score_kwargs)
+                               for parameters, (train, test)
+                               in product(candidate_params,
+                                          cv.split(X, y, groups)))
+
+                all_candidate_params.extend(candidate_params)
+                all_out.extend(out)
+
+                # XXX: When we drop Python 2 support, we can use nonlocal
+                # instead of results_container
+                results_container[0] = self._format_results(
+                    all_candidate_params, scorers, n_splits, all_out)
+                return results_container[0]
+
+            self._run_search(evaluate_candidates)
+
+        results = results_container[0]
+
+        # For multi-metric evaluation, store the best_index_, best_params_ and
+        # best_score_ iff refit is one of the scorer names
+        # In single metric evaluation, refit_metric is "score"
+        if self.refit or not self.multimetric_:
+            self.best_index_ = results["rank_test_%s" % refit_metric].argmin()
+            self.best_params_ = results["params"][self.best_index_]
+            self.best_score_ = results["mean_test_%s" % refit_metric][
+                self.best_index_]
+
+        if self.refit:
+            self.best_estimator_ = clone(base_estimator).set_params(
+                **self.best_params_)
+            refit_start_time = time.time()
+            if y is not None:
+                self.best_estimator_.fit(X, y, **fit_params)
+            else:
+                self.best_estimator_.fit(X, **fit_params)
+            refit_end_time = time.time()
+            self.refit_time_ = refit_end_time - refit_start_time
+
+        # Store the only scorer not as a dict for single metric evaluation
+        self.scorer_ = scorers if self.multimetric_ else scorers['score']
+
+        self.cv_results_ = results
+        self.n_splits_ = n_splits
+
+        return self
+
+    def _format_results(self, candidate_params, scorers, n_splits, out):
+        n_candidates = len(candidate_params)
 
         # if one choose to see train score, "out" will contain train score info
         if self.return_train_score:
@@ -744,7 +826,6 @@ def _store(key_name, array, weights=None, splits=False, rank=False):
                 prev_keys = set(results.keys())
                 _store('train_%s' % scorer_name, train_scores[scorer_name],
                        splits=True)
-
                 if self.return_train_score == 'warn':
                     for key in set(results.keys()) - prev_keys:
                         message = (
@@ -755,33 +836,7 @@ def _store(key_name, array, weights=None, splits=False, rank=False):
                         # warn on key access
                         results.add_warning(key, message, FutureWarning)
 
-        # For multi-metric evaluation, store the best_index_, best_params_ and
-        # best_score_ iff refit is one of the scorer names
-        # In single metric evaluation, refit_metric is "score"
-        if self.refit or not self.multimetric_:
-            self.best_index_ = results["rank_test_%s" % refit_metric].argmin()
-            self.best_params_ = candidate_params[self.best_index_]
-            self.best_score_ = results["mean_test_%s" % refit_metric][
-                self.best_index_]
-
-        if self.refit:
-            self.best_estimator_ = clone(base_estimator).set_params(
-                **self.best_params_)
-            refit_start_time = time.time()
-            if y is not None:
-                self.best_estimator_.fit(X, y, **fit_params)
-            else:
-                self.best_estimator_.fit(X, **fit_params)
-            refit_end_time = time.time()
-            self.refit_time_ = refit_end_time - refit_start_time
-
-        # Store the only scorer not as a dict for single metric evaluation
-        self.scorer_ = scorers if self.multimetric_ else scorers['score']
-
-        self.cv_results_ = results
-        self.n_splits_ = n_splits
-
-        return self
+        return results
 
 
 class GridSearchCV(BaseSearchCV):
@@ -1100,9 +1155,9 @@ def __init__(self, estimator, param_grid, scoring=None, fit_params=None,
         self.param_grid = param_grid
         _check_param_grid(param_grid)
 
-    def _get_param_iterator(self):
-        """Return ParameterGrid instance for the given param_grid"""
-        return ParameterGrid(self.param_grid)
+    def _run_search(self, evaluate_candidates):
+        """Search all candidates in param_grid"""
+        evaluate_candidates(ParameterGrid(self.param_grid))
 
 
 class RandomizedSearchCV(BaseSearchCV):
@@ -1414,8 +1469,8 @@ def __init__(self, estimator, param_distributions, n_iter=10, scoring=None,
             pre_dispatch=pre_dispatch, error_score=error_score,
             return_train_score=return_train_score)
 
-    def _get_param_iterator(self):
-        """Return ParameterSampler instance for the given distributions"""
-        return ParameterSampler(
+    def _run_search(self, evaluate_candidates):
+        """Search n_iter candidates from param_distributions"""
+        evaluate_candidates(ParameterSampler(
             self.param_distributions, self.n_iter,
-            random_state=self.random_state)
+            random_state=self.random_state))
diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py
@@ -25,6 +25,7 @@
 from sklearn.utils.testing import assert_false, assert_true
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_array_almost_equal
+from sklearn.utils.testing import assert_allclose
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_greater_equal
 from sklearn.utils.testing import ignore_warnings
@@ -53,6 +54,7 @@
 from sklearn.model_selection import RandomizedSearchCV
 from sklearn.model_selection import ParameterGrid
 from sklearn.model_selection import ParameterSampler
+from sklearn.model_selection._search import BaseSearchCV
 
 from sklearn.model_selection._validation import FitFailedWarning
 
@@ -881,8 +883,8 @@ def test_random_search_cv_results():
         check_cv_results_array_types(search, param_keys, score_keys)
         check_cv_results_keys(cv_results, param_keys, score_keys, n_cand)
         # For random_search, all the param array vals should be unmasked
-        assert_false(any(cv_results['param_C'].mask) or
-                     any(cv_results['param_gamma'].mask))
+        assert_false(any(np.ma.getmaskarray(cv_results['param_C'])) or
+                     any(np.ma.getmaskarray(cv_results['param_gamma'])))
 
 
 @ignore_warnings(category=DeprecationWarning)
@@ -1539,6 +1541,55 @@ def test_transform_inverse_transform_round_trip():
     assert_array_equal(X, X_round_trip)
 
 
+def test_custom_run_search():
+    def check_results(results, gscv):
+        exp_results = gscv.cv_results_
+        assert sorted(results.keys()) == sorted(exp_results)
+        for k in results:
+            if not k.endswith('_time'):
+                # XXX: results['params'] is a list :|
+                results[k] = np.asanyarray(results[k])
+                if results[k].dtype.kind == 'O':
+                    assert_array_equal(exp_results[k], results[k],
+                                       err_msg='Checking ' + k)
+                else:
+                    assert_allclose(exp_results[k], results[k],
+                                    err_msg='Checking ' + k)
+
+    def fit_grid(param_grid):
+        return GridSearchCV(clf, param_grid, cv=5,
+                            return_train_score=True).fit(X, y)
+
+    class CustomSearchCV(BaseSearchCV):
+        def __init__(self, estimator, **kwargs):
+            super(CustomSearchCV, self).__init__(estimator, **kwargs)
+
+        def _run_search(self, evaluate):
+            results = evaluate([{'max_depth': 1}, {'max_depth': 2}])
+            check_results(results, fit_grid({'max_depth': [1, 2]}))
+            results = evaluate([{'min_samples_split': 5},
+                                {'min_samples_split': 10}])
+            check_results(results, fit_grid([{'max_depth': [1, 2]},
+                                             {'min_samples_split': [5, 10]}]))
+
+    # Using regressor to make sure each score differs
+    clf = DecisionTreeRegressor(random_state=0)
+    X, y = make_classification(n_samples=100, n_informative=4,
+                               random_state=0)
+    mycv = CustomSearchCV(clf, cv=5, return_train_score=True).fit(X, y)
+    gscv = fit_grid([{'max_depth': [1, 2]},
+                     {'min_samples_split': [5, 10]}])
+
+    results = mycv.cv_results_
+    check_results(results, gscv)
+    for attr in dir(gscv):
+        if attr[0].islower() and attr[-1:] == '_' and \
+           attr not in {'cv_results_', 'best_estimator_',
+                        'refit_time_'}:
+            assert getattr(gscv, attr) == getattr(mycv, attr), \
+                   "Attribute %s not equal" % attr
+
+
 def test_deprecated_grid_search_iid():
     depr_message = ("The default of the `iid` parameter will change from True "
                     "to False in version 0.22")

diff --git a/sklearn/utils/testing.py b/sklearn/utils/testing.py
@@ -541,7 +541,8 @@ def uninstall_mldata_mock():
                    "RFE", "RFECV", "BaseEnsemble", "ClassifierChain",
                    "RegressorChain"]
 # estimators that there is no way to default-construct sensibly
-OTHER = ["Pipeline", "FeatureUnion", "GridSearchCV", "RandomizedSearchCV",
+OTHER = ["Pipeline", "FeatureUnion",
+         "GridSearchCV", "RandomizedSearchCV",
          "SelectFromModel", "ColumnTransformer"]
 
 # some strange ones