update fixes.py and grid_search.py to improve coverage

tgsmith61591 · Oct 12, 2016 · a1f5216 · a1f5216
1 parent 8b4a049
commit a1f5216
Show file tree

Hide file tree

Showing 2 changed files with 34 additions and 42 deletions.
diff --git a/skutil/grid_search.py b/skutil/grid_search.py
@@ -182,8 +182,7 @@ def score(self, X, y=None):
 
             score : float
 
-            Notes
-            -----
+            **Notes**
 
              * The long-standing behavior of this method changed in version 0.16.
              * It no longer uses the metric provided by ``estimator.score`` if the
@@ -537,8 +536,7 @@ class GridSearchCV(BaseSearchCV):
             Scorer function used on the held out data to choose the best
             parameters for the model.
 
-        Notes
-        -----
+        **Notes**
 
         The parameters selected are those that maximize the score of the left out
         data, unless an explicit score is passed in which case it is used instead.
@@ -550,8 +548,7 @@ class GridSearchCV(BaseSearchCV):
         `pre_dispatch` many times. A reasonable value for `pre_dispatch` is `2 *
         n_jobs`.
 
-        See Also
-        --------
+        **See Also**
 
         :class:`ParameterGrid`:
             generates all the combinations of a hyperparameter grid.
@@ -718,8 +715,7 @@ class RandomizedSearchCV(BaseSearchCV):
         best_params_ : dict
             Parameter setting that gave the best results on the hold out data.
 
-        Notes
-        -----
+        **Notes**
 
         The parameters selected are those that maximize the score of the held-out
         data, according to the scoring parameter.
@@ -731,8 +727,7 @@ class RandomizedSearchCV(BaseSearchCV):
         `pre_dispatch` many times. A reasonable value for `pre_dispatch` is `2 *
         n_jobs`.
 
-        See Also
-        --------
+        **See Also**
 
         :class:`GridSearchCV`:
             Does exhaustive search over a grid of parameters.

diff --git a/skutil/utils/fixes.py b/skutil/utils/fixes.py
@@ -21,25 +21,24 @@
     from sklearn.model_selection import ParameterSampler, ParameterGrid
     from sklearn.utils.validation import indexable
 
-
-    def do_fit(n_jobs, verbose, pre_dispatch, base_estimator,
-               X, y, scorer, parameter_iterable, fit_params,
+    def do_fit(n_jobs, verbose, pre_dispatch, base_estimator, 
+               X, y, scorer, parameter_iterable, fit_params, 
                error_score, cv, **kwargs):
         groups = kwargs.pop('groups')
 
         # test_score, n_samples, parameters
         out = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch)(
             delayed(_fit_and_score)(
                 clone(base_estimator), X, y, scorer,
-                train, test, verbose, parameters,
-                fit_params=fit_params,
-                return_train_score=False,
-                return_n_test_samples=True,
-                return_times=False,
-                return_parameters=True,
-                error_score=error_score)
-            for parameters in parameter_iterable
-            for train, test in cv.split(X, y, groups))
+                    train, test, verbose, parameters,
+                    fit_params=fit_params,
+                    return_train_score=False,
+                    return_n_test_samples=True,
+                    return_times=False, 
+                    return_parameters=True,
+                    error_score=error_score)
+                for parameters in parameter_iterable
+                for train, test in cv.split(X, y, groups))
 
         # test_score, n_samples, _, parameters
         return [(mod[0], mod[1], None, mod[2]) for mod in out]
@@ -54,19 +53,18 @@ def do_fit(n_jobs, verbose, pre_dispatch, base_estimator,
         from sklearn.cross_validation import _fit_and_score
         from sklearn.grid_search import ParameterSampler, ParameterGrid
 
-
-    def do_fit(n_jobs, verbose, pre_dispatch, base_estimator,
-               X, y, scorer, parameter_iterable, fit_params,
+    def do_fit(n_jobs, verbose, pre_dispatch, base_estimator, 
+               X, y, scorer, parameter_iterable, fit_params, 
                error_score, cv, **kwargs):
         # test_score, n_samples, score_time, parameters
         return Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch)(
             delayed(_fit_and_score)(
                 clone(base_estimator), X, y, scorer,
-                train, test, verbose, parameters,
-                fit_params, return_parameters=True,
-                error_score=error_score)
-            for parameters in parameter_iterable
-            for train, test in cv)
+                    train, test, verbose, parameters,
+                    fit_params, return_parameters=True,
+                    error_score=error_score)
+                for parameters in parameter_iterable
+                for train, test in cv)
 
 
 def cv_len(cv, X, y):
@@ -80,7 +78,6 @@ def set_cv(cv, X, y, classifier):
 def get_groups(X, y):
     return (X, y, None) if not SK18 else indexable(X, y, None)
 
-
 __all__ = [
     '_as_numpy',
     '_validate_X',
@@ -112,7 +109,7 @@ def _validate_X(X):
 
 def _validate_y(y):
     """Returns y if y isn't a series, otherwise the array"""
-    if y is None:  # unsupervised
+    if y is None: # unsupervised
         return y
 
     # if it's a series
@@ -129,7 +126,6 @@ def _validate_y(y):
     # bail and let the sklearn function handle validation
     return y
 
-
 def _check_param_grid(param_grid):
     if hasattr(param_grid, 'items'):
         param_grid = [param_grid]
@@ -162,7 +158,6 @@ class _CVScoreTuple(namedtuple('_CVScoreTuple', ('parameters', 'mean_validation_
     dynamic attributes. Furthermore we don't need any additional slot in the
     subclass so we set __slots__ to the empty tuple. """
     __slots__ = tuple()
-
     def __repr__(self):
         """Simple custom repr to summarize the main info"""
         return "mean: {0:.5f}, std: {1:.5f}, params: {2}".format(
@@ -172,7 +167,7 @@ def __repr__(self):
 
 
 class _SK17BaseSearchCV(six.with_metaclass(ABCMeta, BaseEstimator,
-                                           MetaEstimatorMixin)):
+                                      MetaEstimatorMixin)):
     """Base class for hyper parameter search with cross-validation.
     scikit-utils must redefine this class, because sklearn's version
     internally treats all Xs and ys as lists or np.ndarrays. We redefine
@@ -339,8 +334,8 @@ def inverse_transform(self, Xt):
 
     def _fit(self, X, y, parameter_iterable):
         """Actual fitting,  performing the search over parameters."""
-        X = _validate_X(X)  # if it's a frame, will be turned into a matrix
-        y = _validate_y(y)  # if it's a series, make it into a list
+        X = _validate_X(X) # if it's a frame, will be turned into a matrix
+        y = _validate_y(y) # if it's a series, make it into a list
 
         # for debugging
         assert not isinstance(X, pd.DataFrame)
@@ -373,12 +368,12 @@ def _fit(self, X, y, parameter_iterable):
 
         # get groups, add it to kwargs
         X, y, groups = get_groups(X, y)
-        kwargs = {'groups': groups}
+        kwargs = {'groups':groups}
 
         # test_score, n_samples, _, parameters
-        out = do_fit(self.n_jobs, self.verbose, pre_dispatch,
-                     base_estimator, X, y, self.scorer_, parameter_iterable,
-                     self.fit_params, self.error_score, cv, **kwargs)
+        out = do_fit(self.n_jobs, self.verbose, pre_dispatch, 
+            base_estimator, X, y, self.scorer_, parameter_iterable, 
+            self.fit_params, self.error_score, cv, **kwargs)
 
         # Out is a list of triplet: score, estimator, n_test_samples
         n_fits = len(out)
@@ -592,6 +587,7 @@ class _SK17GridSearchCV(_SK17BaseSearchCV):
     def __init__(self, estimator, param_grid, scoring=None, fit_params=None,
                  n_jobs=1, iid=True, refit=True, cv=None, verbose=0,
                  pre_dispatch='2*n_jobs', error_score='raise'):
+
         super(_SK17GridSearchCV, self).__init__(
             estimator, scoring, fit_params, n_jobs, iid,
             refit, cv, verbose, pre_dispatch, error_score)
@@ -764,6 +760,7 @@ def __init__(self, estimator, param_distributions, n_iter=10, scoring=None,
                  fit_params=None, n_jobs=1, iid=True, refit=True, cv=None,
                  verbose=0, pre_dispatch='2*n_jobs', random_state=None,
                  error_score='raise'):
+
         self.param_distributions = param_distributions
         self.n_iter = n_iter
         self.random_state = random_state
@@ -790,4 +787,4 @@ def fit(self, X, y=None):
                                           random_state=self.random_state)
 
         # the super class will handle the X, y validation
-        return self._fit(X, y, sampled_params)
+        return self._fit(X, y, sampled_params)