Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] OOB-aware grid search. #3720

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
38 changes: 38 additions & 0 deletions sklearn/grid_search.py
Expand Up @@ -758,3 +758,41 @@ def fit(self, X, y=None):
self.n_iter,
random_state=self.random_state)
return self._fit(X, y, sampled_params)


class GridSearchOOB(BaseEstimator):

def __init__(self, estimator, param_grid, scoring):
self.estimator = estimator
self.param_grid = param_grid
self.scoring = scoring

def _score(self, y, y_pred):
s = self.scorer_
# Need this hack because the current scorer API recomputes the
# predictions: scoring(estimator, X, y)...
return s._score_func(y, y_pred, **s._kwargs) * s._sign

def fit(self, X, y):
self.scorer_ = check_scoring(self.estimator, scoring=self.scoring)

best_score = None
best_estimator = None

for params in ParameterGrid(self.param_grid):
estimator = clone(self.estimator)
estimator.set_params(**params)
estimator.fit(X, y)

score = self._score(y, estimator.oob_prediction_)

if best_score is None or score > best_score:
best_estimator = estimator
best_score = score

self.best_estimator_ = best_estimator

return self

def predict(self, X):
return self.best_estimator_.predict(X)
2 changes: 1 addition & 1 deletion sklearn/metrics/scorer.py
Expand Up @@ -86,7 +86,7 @@ def __call__(self, estimator, X, y_true, sample_weight=None):
else:
return self._sign * self._score_func(y_true, y_pred,
**self._kwargs)


class _ProbaScorer(_BaseScorer):
def __call__(self, clf, X, y, sample_weight=None):
Expand Down
30 changes: 26 additions & 4 deletions sklearn/tests/test_grid_search.py
Expand Up @@ -34,10 +34,17 @@
from sklearn.datasets import make_classification
from sklearn.datasets import make_blobs
from sklearn.datasets import make_multilabel_classification
from sklearn.grid_search import (GridSearchCV, RandomizedSearchCV,
ParameterGrid, ParameterSampler,
ChangedBehaviorWarning)
from sklearn.svm import LinearSVC, SVC
from sklearn.datasets import load_diabetes

from sklearn.grid_search import GridSearchCV
from sklearn.grid_search import GridSearchOOB
from sklearn.grid_search import RandomizedSearchCV
from sklearn.grid_search import ParameterGrid
from sklearn.grid_search import ParameterSampler
from sklearn.grid_search import ChangedBehaviorWarning
from sklearn.grid_search import ParameterSampler

from sklearn.svm import LinearSVC, SVC, SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.tree import DecisionTreeClassifier
from sklearn.cluster import KMeans, SpectralClustering
Expand All @@ -47,6 +54,7 @@
from sklearn.cross_validation import KFold, StratifiedKFold
from sklearn.preprocessing import Imputer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import BaggingRegressor


# Neither of the following two estimators inherit from BaseEstimator,
Expand Down Expand Up @@ -674,3 +682,17 @@ def test_grid_search_allows_nans():
('classifier', MockClassifier()),
])
GridSearchCV(p, {'classifier__foo_param': [1, 2, 3]}, cv=2).fit(X, y)


def test_grid_search_oob():
data = load_diabetes()
X, y = data.data, data.target
param_grid = {"base_estimator__C": [0.1, 1, 10],
"base_estimator__gamma": [0.1, 1, 10]}
reg = BaggingRegressor(SVR(kernel="rbf"), n_estimators=50, oob_score=True,
random_state=0)

gs = GridSearchOOB(reg, param_grid, scoring="r2")
gs.fit(X, y)
assert_equal(gs.best_estimator_.estimators_[0].C, 10)
assert_equal(gs.best_estimator_.estimators_[0].gamma, 10)