Skip to content

Commit

Permalink
update fixes.py and grid_search.py to improve coverage
Browse files Browse the repository at this point in the history
  • Loading branch information
charlesdrotar committed Oct 12, 2016
1 parent 8b4a049 commit a1f5216
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 42 deletions.
15 changes: 5 additions & 10 deletions skutil/grid_search.py
Expand Up @@ -182,8 +182,7 @@ def score(self, X, y=None):
score : float
Notes
-----
**Notes**
* The long-standing behavior of this method changed in version 0.16.
* It no longer uses the metric provided by ``estimator.score`` if the
Expand Down Expand Up @@ -537,8 +536,7 @@ class GridSearchCV(BaseSearchCV):
Scorer function used on the held out data to choose the best
parameters for the model.
Notes
-----
**Notes**
The parameters selected are those that maximize the score of the left out
data, unless an explicit score is passed in which case it is used instead.
Expand All @@ -550,8 +548,7 @@ class GridSearchCV(BaseSearchCV):
`pre_dispatch` many times. A reasonable value for `pre_dispatch` is `2 *
n_jobs`.
See Also
--------
**See Also**
:class:`ParameterGrid`:
generates all the combinations of a hyperparameter grid.
Expand Down Expand Up @@ -718,8 +715,7 @@ class RandomizedSearchCV(BaseSearchCV):
best_params_ : dict
Parameter setting that gave the best results on the hold out data.
Notes
-----
**Notes**
The parameters selected are those that maximize the score of the held-out
data, according to the scoring parameter.
Expand All @@ -731,8 +727,7 @@ class RandomizedSearchCV(BaseSearchCV):
`pre_dispatch` many times. A reasonable value for `pre_dispatch` is `2 *
n_jobs`.
See Also
--------
**See Also**
:class:`GridSearchCV`:
Does exhaustive search over a grid of parameters.
Expand Down
61 changes: 29 additions & 32 deletions skutil/utils/fixes.py
Expand Up @@ -21,25 +21,24 @@
from sklearn.model_selection import ParameterSampler, ParameterGrid
from sklearn.utils.validation import indexable


def do_fit(n_jobs, verbose, pre_dispatch, base_estimator,
X, y, scorer, parameter_iterable, fit_params,
def do_fit(n_jobs, verbose, pre_dispatch, base_estimator,
X, y, scorer, parameter_iterable, fit_params,
error_score, cv, **kwargs):
groups = kwargs.pop('groups')

# test_score, n_samples, parameters
out = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch)(
delayed(_fit_and_score)(
clone(base_estimator), X, y, scorer,
train, test, verbose, parameters,
fit_params=fit_params,
return_train_score=False,
return_n_test_samples=True,
return_times=False,
return_parameters=True,
error_score=error_score)
for parameters in parameter_iterable
for train, test in cv.split(X, y, groups))
train, test, verbose, parameters,
fit_params=fit_params,
return_train_score=False,
return_n_test_samples=True,
return_times=False,
return_parameters=True,
error_score=error_score)
for parameters in parameter_iterable
for train, test in cv.split(X, y, groups))

# test_score, n_samples, _, parameters
return [(mod[0], mod[1], None, mod[2]) for mod in out]
Expand All @@ -54,19 +53,18 @@ def do_fit(n_jobs, verbose, pre_dispatch, base_estimator,
from sklearn.cross_validation import _fit_and_score
from sklearn.grid_search import ParameterSampler, ParameterGrid


def do_fit(n_jobs, verbose, pre_dispatch, base_estimator,
X, y, scorer, parameter_iterable, fit_params,
def do_fit(n_jobs, verbose, pre_dispatch, base_estimator,
X, y, scorer, parameter_iterable, fit_params,
error_score, cv, **kwargs):
# test_score, n_samples, score_time, parameters
return Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch)(
delayed(_fit_and_score)(
clone(base_estimator), X, y, scorer,
train, test, verbose, parameters,
fit_params, return_parameters=True,
error_score=error_score)
for parameters in parameter_iterable
for train, test in cv)
train, test, verbose, parameters,
fit_params, return_parameters=True,
error_score=error_score)
for parameters in parameter_iterable
for train, test in cv)


def cv_len(cv, X, y):
Expand All @@ -80,7 +78,6 @@ def set_cv(cv, X, y, classifier):
def get_groups(X, y):
return (X, y, None) if not SK18 else indexable(X, y, None)


__all__ = [
'_as_numpy',
'_validate_X',
Expand Down Expand Up @@ -112,7 +109,7 @@ def _validate_X(X):

def _validate_y(y):
"""Returns y if y isn't a series, otherwise the array"""
if y is None: # unsupervised
if y is None: # unsupervised
return y

# if it's a series
Expand All @@ -129,7 +126,6 @@ def _validate_y(y):
# bail and let the sklearn function handle validation
return y


def _check_param_grid(param_grid):
if hasattr(param_grid, 'items'):
param_grid = [param_grid]
Expand Down Expand Up @@ -162,7 +158,6 @@ class _CVScoreTuple(namedtuple('_CVScoreTuple', ('parameters', 'mean_validation_
dynamic attributes. Furthermore we don't need any additional slot in the
subclass so we set __slots__ to the empty tuple. """
__slots__ = tuple()

def __repr__(self):
"""Simple custom repr to summarize the main info"""
return "mean: {0:.5f}, std: {1:.5f}, params: {2}".format(
Expand All @@ -172,7 +167,7 @@ def __repr__(self):


class _SK17BaseSearchCV(six.with_metaclass(ABCMeta, BaseEstimator,
MetaEstimatorMixin)):
MetaEstimatorMixin)):
"""Base class for hyper parameter search with cross-validation.
scikit-utils must redefine this class, because sklearn's version
internally treats all Xs and ys as lists or np.ndarrays. We redefine
Expand Down Expand Up @@ -339,8 +334,8 @@ def inverse_transform(self, Xt):

def _fit(self, X, y, parameter_iterable):
"""Actual fitting, performing the search over parameters."""
X = _validate_X(X) # if it's a frame, will be turned into a matrix
y = _validate_y(y) # if it's a series, make it into a list
X = _validate_X(X) # if it's a frame, will be turned into a matrix
y = _validate_y(y) # if it's a series, make it into a list

# for debugging
assert not isinstance(X, pd.DataFrame)
Expand Down Expand Up @@ -373,12 +368,12 @@ def _fit(self, X, y, parameter_iterable):

# get groups, add it to kwargs
X, y, groups = get_groups(X, y)
kwargs = {'groups': groups}
kwargs = {'groups':groups}

# test_score, n_samples, _, parameters
out = do_fit(self.n_jobs, self.verbose, pre_dispatch,
base_estimator, X, y, self.scorer_, parameter_iterable,
self.fit_params, self.error_score, cv, **kwargs)
out = do_fit(self.n_jobs, self.verbose, pre_dispatch,
base_estimator, X, y, self.scorer_, parameter_iterable,
self.fit_params, self.error_score, cv, **kwargs)

# Out is a list of triplet: score, estimator, n_test_samples
n_fits = len(out)
Expand Down Expand Up @@ -592,6 +587,7 @@ class _SK17GridSearchCV(_SK17BaseSearchCV):
def __init__(self, estimator, param_grid, scoring=None, fit_params=None,
n_jobs=1, iid=True, refit=True, cv=None, verbose=0,
pre_dispatch='2*n_jobs', error_score='raise'):

super(_SK17GridSearchCV, self).__init__(
estimator, scoring, fit_params, n_jobs, iid,
refit, cv, verbose, pre_dispatch, error_score)
Expand Down Expand Up @@ -764,6 +760,7 @@ def __init__(self, estimator, param_distributions, n_iter=10, scoring=None,
fit_params=None, n_jobs=1, iid=True, refit=True, cv=None,
verbose=0, pre_dispatch='2*n_jobs', random_state=None,
error_score='raise'):

self.param_distributions = param_distributions
self.n_iter = n_iter
self.random_state = random_state
Expand All @@ -790,4 +787,4 @@ def fit(self, X, y=None):
random_state=self.random_state)

# the super class will handle the X, y validation
return self._fit(X, y, sampled_params)
return self._fit(X, y, sampled_params)

0 comments on commit a1f5216

Please sign in to comment.