-
Notifications
You must be signed in to change notification settings - Fork 872
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Adds fit_params support for stacking classifiers #255
base: master
Are you sure you want to change the base?
Changes from all commits
61a027d
24473ac
2b3668b
7a67ce5
ab389b1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -77,8 +77,8 @@ def __init__(self, classifiers, meta_classifier, | |
self.verbose = verbose | ||
self.use_features_in_secondary = use_features_in_secondary | ||
|
||
def fit(self, X, y): | ||
""" Fit ensemble classifers and the meta-classifier. | ||
def fit(self, X, y, **fit_params): | ||
"""Fit ensemble classifers and the meta-classifier. | ||
|
||
Parameters | ||
---------- | ||
|
@@ -87,18 +87,26 @@ def fit(self, X, y): | |
n_features is the number of features. | ||
y : array-like, shape = [n_samples] or [n_samples, n_outputs] | ||
Target values. | ||
fit_params : dict, optional | ||
Parameters to pass to the fit methods of `classifiers` and | ||
`meta_classifier`. | ||
|
||
Returns | ||
------- | ||
self : object | ||
|
||
""" | ||
self.clfs_ = [clone(clf) for clf in self.classifiers] | ||
self.named_clfs_ = {key: value for key, value in | ||
_name_estimators(self.clfs_)} | ||
self.meta_clf_ = clone(self.meta_classifier) | ||
self.named_meta_clf_ = {'meta-%s' % key: value for key, value in | ||
_name_estimators([self.meta_clf_])} | ||
|
||
if self.verbose > 0: | ||
print("Fitting %d classifiers..." % (len(self.classifiers))) | ||
|
||
for clf in self.clfs_: | ||
for name, clf in six.iteritems(self.named_clfs_): | ||
|
||
if self.verbose > 0: | ||
i = self.clfs_.index(clf) + 1 | ||
|
@@ -112,14 +120,27 @@ def fit(self, X, y): | |
if self.verbose > 1: | ||
print(_name_estimators((clf,))[0][1]) | ||
|
||
clf.fit(X, y) | ||
# Extract fit_params for clf | ||
clf_fit_params = {} | ||
for key, value in six.iteritems(fit_params): | ||
if name in key and 'meta-' not in key: | ||
clf_fit_params[key.replace(name+'__', '')] = value | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
might be more efficient since the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I suppose |
||
|
||
clf.fit(X, y, **clf_fit_params) | ||
|
||
meta_features = self._predict_meta_features(X) | ||
# Extract fit_params for meta_clf_ | ||
meta_fit_params = {} | ||
meta_clf_name = list(self.named_meta_clf_.keys())[0] | ||
for key, value in six.iteritems(fit_params): | ||
if meta_clf_name in key and 'meta-' in meta_clf_name: | ||
meta_fit_params[key.replace(meta_clf_name+'__', '')] = value | ||
|
||
if not self.use_features_in_secondary: | ||
self.meta_clf_.fit(meta_features, y) | ||
self.meta_clf_.fit(meta_features, y, **meta_fit_params) | ||
else: | ||
self.meta_clf_.fit(np.hstack((X, meta_features)), y) | ||
self.meta_clf_.fit(np.hstack((X, meta_features)), y, | ||
**meta_fit_params) | ||
|
||
return self | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -111,29 +111,36 @@ def __init__(self, classifiers, meta_classifier, | |
self.stratify = stratify | ||
self.shuffle = shuffle | ||
|
||
def fit(self, X, y, groups=None): | ||
def fit(self, X, y, groups=None, **fit_params): | ||
""" Fit ensemble classifers and the meta-classifier. | ||
|
||
Parameters | ||
---------- | ||
X : numpy array, shape = [n_samples, n_features] | ||
Training vectors, where n_samples is the number of samples and | ||
n_features is the number of features. | ||
|
||
y : numpy array, shape = [n_samples] | ||
Target values. | ||
|
||
groups : numpy array/None, shape = [n_samples] | ||
The group that each sample belongs to. This is used by specific | ||
folding strategies such as GroupKFold() | ||
fit_params : dict, optional | ||
Parameters to pass to the fit methods of `classifiers` and | ||
`meta_classifier`. Note that only fit parameters for `classifiers` | ||
that are the same for each cross-validation split are supported | ||
(e.g. `sample_weight` is not currently supported). | ||
|
||
Returns | ||
------- | ||
self : object | ||
|
||
""" | ||
self.clfs_ = [clone(clf) for clf in self.classifiers] | ||
self.named_clfs_ = {key: value for key, value in | ||
_name_estimators(self.clfs_)} | ||
self.meta_clf_ = clone(self.meta_classifier) | ||
self.named_meta_clf_ = {'meta-%s' % key: value for key, value in | ||
_name_estimators([self.meta_clf_])} | ||
if self.verbose > 0: | ||
print("Fitting %d classifiers..." % (len(self.classifiers))) | ||
|
||
|
@@ -144,8 +151,23 @@ def fit(self, X, y, groups=None): | |
final_cv.shuffle = self.shuffle | ||
skf = list(final_cv.split(X, y, groups)) | ||
|
||
# Get fit_params for each classifier in self.named_clfs_ | ||
named_clfs_fit_params = {} | ||
for name, clf in six.iteritems(self.named_clfs_): | ||
clf_fit_params = {} | ||
for key, value in six.iteritems(fit_params): | ||
if name in key and 'meta-' not in key: | ||
clf_fit_params[key.replace(name+'__', '')] = value | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
named_clfs_fit_params[name] = clf_fit_params | ||
# Get fit_params for self.named_meta_clf_ | ||
meta_fit_params = {} | ||
meta_clf_name = list(self.named_meta_clf_.keys())[0] | ||
for key, value in six.iteritems(fit_params): | ||
if meta_clf_name in key and 'meta-' in meta_clf_name: | ||
meta_fit_params[key.replace(meta_clf_name+'__', '')] = value | ||
|
||
all_model_predictions = np.array([]).reshape(len(y), 0) | ||
for model in self.clfs_: | ||
for name, model in six.iteritems(self.named_clfs_): | ||
|
||
if self.verbose > 0: | ||
i = self.clfs_.index(model) + 1 | ||
|
@@ -172,7 +194,8 @@ def fit(self, X, y, groups=None): | |
((num + 1), final_cv.get_n_splits())) | ||
|
||
try: | ||
model.fit(X[train_index], y[train_index]) | ||
model.fit(X[train_index], y[train_index], | ||
**named_clfs_fit_params[name]) | ||
except TypeError as e: | ||
raise TypeError(str(e) + '\nPlease check that X and y' | ||
'are NumPy arrays. If X and y are lists' | ||
|
@@ -215,16 +238,17 @@ def fit(self, X, y, groups=None): | |
X[test_index])) | ||
|
||
# Fit the base models correctly this time using ALL the training set | ||
for model in self.clfs_: | ||
model.fit(X, y) | ||
for name, model in six.iteritems(self.named_clfs_): | ||
model.fit(X, y, **named_clfs_fit_params[name]) | ||
|
||
# Fit the secondary model | ||
if not self.use_features_in_secondary: | ||
self.meta_clf_.fit(all_model_predictions, reordered_labels) | ||
self.meta_clf_.fit(all_model_predictions, reordered_labels, | ||
**meta_fit_params) | ||
else: | ||
self.meta_clf_.fit(np.hstack((reordered_features, | ||
all_model_predictions)), | ||
reordered_labels) | ||
reordered_labels, **meta_fit_params) | ||
|
||
return self | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,7 +11,7 @@ | |
import numpy as np | ||
from numpy.testing import assert_almost_equal | ||
from nose.tools import raises | ||
from sklearn.model_selection import GridSearchCV | ||
from sklearn.model_selection import GridSearchCV, cross_val_score | ||
|
||
# Generating a sample dataset | ||
np.random.seed(1) | ||
|
@@ -108,6 +108,23 @@ def test_gridsearch_numerate_regr(): | |
assert best == got | ||
|
||
|
||
def test_StackingRegressor_fit_params(): | ||
lr = LinearRegression() | ||
svr_lin = SVR(kernel='linear') | ||
ridge = Ridge(random_state=1) | ||
svr_rbf = SVR(kernel='rbf') | ||
stregr = StackingRegressor(regressors=[svr_lin, lr, ridge], | ||
meta_regressor=svr_rbf) | ||
|
||
fit_params = {'ridge__sample_weight': np.ones(X1.shape[0]), | ||
'svr__sample_weight': np.ones(X1.shape[0]), | ||
'meta-svr__sample_weight': np.ones(X1.shape[0])} | ||
|
||
scores = cross_val_score(stregr, X1, y, cv=5, fit_params=fit_params) | ||
scores_mean = (round(scores.mean(), 1)) | ||
assert scores_mean == 0.1 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just set up codacy today ... these msgs are annoying. Will see if I can disable those (at least for the asserts) |
||
|
||
|
||
def test_get_coeff(): | ||
lr = LinearRegression() | ||
svr_lin = SVR(kernel='linear') | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
dict(_name_estimators(self.clfs_))
might work, but no guarantee.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
hm yeah, I think it should be equivalent indeed