-
Notifications
You must be signed in to change notification settings - Fork 857
/
ensemble_vote.py
177 lines (143 loc) · 6.55 KB
/
ensemble_vote.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
# Ensemble Voting Regressor
from ..externals.estimator_checks import check_is_fitted
from ..externals.name_estimators import _name_estimators
from sklearn.base import BaseEstimator
import numpy as np
from sklearn.base import RegressorMixin
from sklearn.base import TransformerMixin
from sklearn.base import clone
from ..externals import six
class EnsembleVotingRegressor (BaseEstimator, RegressorMixin, TransformerMixin):
"""A Ensemble voting regressor for scikit-learn estimators for regression.
Parameters
----------
regressors : array-like, shape = [n_regressors]
A list of regressors.
Invoking the `fit` method on the `EnsembleVotingRegressor` will fit clones
of those original regressors that will
be stored in the class attribute
`self.regr_`.
weights : array-like, shape = [n_classifiers], optional (default=`None`)
Sequence of weights (`float` or `int`) to weight the occurances of
predicted class labels (`hard` voting) or class probabilities
before averaging (`soft` voting). Uses uniform weights if `None`.
verbose : int, optional (default=0)
Controls the verbosity of the building process.
- `verbose=0` (default): Prints nothing
- `verbose=1`: Prints the number & name of the regressor being fitted
- `verbose=2`: Prints info about the parameters of the
regressor being fitted
- `verbose>2`: Changes `verbose` param of the underlying regressor to
self.verbose - 2
Attributes
----------
regressors : array-like, shape = [n_predictions]
The unmodified input regressors
regr_ : list, shape=[n_regressors]
Fitted regressors (clones of the original regressors)
refit : bool (default: True)
Clones the regressors for stacking regression if True (default)
or else uses the original ones, which will be refitted on the dataset
upon calling the `fit` method. Setting refit=False is
recommended if you are working with estimators that are supporting
the scikit-learn fit/predict API interface but are not compatible
to scikit-learn's `clone` function.
"""
def __init__(self, regressors, weights=None, verbose=0, refit=True):
self.regressors = regressors
self.weights = weights
self.verbose = verbose
self.refit = refit
self.named_clfs = {key: value for key, value in _name_estimators(regressors)}
def fit(self, X, y, sample_weight=None):
"""Learn weight coefficients from training data for each classifier.
Parameters
----------
X : {array-like, sparse matrix}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
y : array-like, shape = [n_samples]
Target values.
sample_weight : array-like, shape = [n_samples], optional
Sample weights passed as sample_weights to each regressor
in the regressors list .
Raises error if some regressor does not support
sample_weight in the fit() method.
Returns
-------
self : object
"""
if self.weights and len(self.weights) != len(self.regressors):
raise ValueError('Number of regressors and weights must be equal'
'; got %d weights, %d regressors'
% (len(self.weights), len(self.regressors)))
if not self.refit:
self.regr_ = [clf for clf in self.regressors]
else:
self.regr_ = [clone(clf) for clf in self.regressors]
if self.verbose > 0:
print("Fitting %d regressors..." % (len(self.regressors)))
for reg in self.regr_:
if self.verbose > 0:
i = self.regr_.index(reg) + 1
print("Fitting clf%d: %s (%d/%d)" %
(i, _name_estimators((reg,))[0][0], i,
len(self.regr_)))
if self.verbose > 2:
if hasattr(reg, 'verbose'):
reg.set_params(verbose=self.verbose - 2)
if self.verbose > 1:
print(_name_estimators((reg,))[0][1])
if sample_weight is None:
reg.fit(X, y)
else:
reg.fit(X, y, sample_weight=sample_weight)
return self
def predict(self, X):
""" Predict class labels for X.
Parameters
----------
X : {array-like, sparse matrix}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
Returns
----------
maj : array-like, shape = [n_samples]
Predicted class labels.
"""
check_is_fitted(self, 'regr_')
res = np.average(self._predict(X), axis=1,
weights=self.weights)
return res
def transform(self, X):
""" Return class labels or probabilities for X for each estimator.
Parameters
----------
X : {array-like, sparse matrix}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
Returns
-------
If `voting='soft'` : array-like = [n_classifiers, n_samples, n_classes]
Class probabilties calculated by each classifier.
If `voting='hard'` : array-like = [n_classifiers, n_samples]
Class labels predicted by each classifier.
"""
check_is_fitted(self, 'regr_')
return self._predict(X)
def get_params(self, deep=True):
"""Return estimator parameter names for GridSearch support."""
if not deep:
return super(EnsembleVotingRegressor, self).get_params(deep=False)
else:
out = self.named_clfs.copy()
for name, step in six.iteritems(self.named_clfs):
for key, value in six.iteritems(step.get_params(deep=True)):
out['%s__%s' % (name, key)] = value
for key, value in six.iteritems(super(EnsembleVotingRegressor,
self).get_params(deep=False)):
out['%s' % key] = value
return out
def _predict(self, X):
"""Collect results from clf.predict calls."""
return np.asarray([clf.predict(X) for clf in self.regr_]).T