-
Notifications
You must be signed in to change notification settings - Fork 182
/
multi_output_learner.py
243 lines (194 loc) · 8.92 KB
/
multi_output_learner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
import numpy as np
import copy as cp
import warnings
from inspect import signature
from sklearn.linear_model import SGDClassifier
from skmultiflow.core import BaseSKMObject, ClassifierMixin, MetaEstimatorMixin, MultiOutputMixin
_CLASSIFIER_TYPE = "classifier"
_REGRESSOR_TYPE = "regressor"
class MultiOutputLearner(BaseSKMObject, ClassifierMixin, MetaEstimatorMixin, MultiOutputMixin):
""" Multi-Output Learner for multi-target classification or regression.
Parameters
----------
base_estimator: skmultiflow.core.BaseSKMObject or sklearn.BaseEstimator
(default=SGDClassifier(max_iter=100)) Each member of the ensemble is
an instance of the base estimator.
Notes
-----
Use this meta learner to make single output predictors capable of learning
a multi output problem, by applying them individually to each output. In
the classification context, this is the "binary relevance" estimator.
A Multi-Output Learner model learns to predict multiple outputs for each
instance. The outputs may either be discrete (i.e., classification),
or continuous (i.e., regression). This estimator takes any base learner
(which by default is LogisticRegression) and builds a separate model
for each output, and will distribute each instance to each model
for individual learning and prediction.
Examples
--------
>>> from skmultiflow.meta.multi_output_learner import MultiOutputLearner
>>> from skmultiflow.data.file_stream import FileStream
>>> from sklearn.linear_model import Perceptron
>>> # Setup the file stream
>>> stream = FileStream("https://raw.githubusercontent.com/scikit-multiflow/"
... "streaming-datasets/master/moving_squares.csv", 0, 6)
>>> # Setup the MultiOutputLearner using sklearn Perceptron
>>> classifier = MultiOutputLearner(base_estimator=Perceptron())
>>> # Setup the pipeline
>>> # Pre training the classifier with 150 samples
>>> X, y = stream.next_sample(150)
>>> classifier.partial_fit(X, y, classes=stream.target_values)
>>> # Keeping track of sample count, true labels and predictions to later
>>> # compute the classifier's hamming score
>>> count = 0
>>> true_labels = []
>>> predicts = []
>>> while stream.has_more_samples():
... X, y = stream.next_sample()
... p = classifier.predict(X)
... classifier.partial_fit(X, y)
... predicts.extend(p)
... true_labels.extend(y)
... count += 1
>>>
>>> perf = hamming_score(true_labels, predicts)
>>> print('Total samples analyzed: ' + str(count))
>>> print("The classifier's static Hamming score : " + str(perf))
"""
def __init__(self, base_estimator=SGDClassifier(max_iter=100)):
super().__init__()
self.base_estimator = base_estimator
self._check_estimator_type()
self.ensemble = None
self.n_targets = None
def _check_estimator_type(self):
if hasattr(self, "_estimator_type"):
if self.base_estimator._estimator_type == _CLASSIFIER_TYPE:
self._estimator_type = _CLASSIFIER_TYPE
return
elif self.base_estimator._estimator_type == _REGRESSOR_TYPE:
self._estimator_type = _REGRESSOR_TYPE
return
warnings.warn("Unknown base-estimator type, default is classification.", RuntimeWarning)
self._estimator_type = _CLASSIFIER_TYPE
def __configure(self):
self.ensemble = [cp.deepcopy(self.base_estimator) for _ in range(self.n_targets)]
def fit(self, X, y, classes=None, sample_weight=None):
""" Fit the model.
Fit n-estimators, one for each learning task.
Parameters
----------
X : numpy.ndarray of shape (n_samples, n_features)
The features to train the model.
y: numpy.ndarray of shape (n_samples, n_targets)
An array-like with the target values of all samples in X.
classes: numpy.ndarray, optional (default=None)
Array with all possible/known class labels. Usage varies depending
on the base estimator. Not used for regression.
sample_weight: numpy.ndarray of shape (n_samples), optional (default=None)
Samples weight. If not provided, uniform weights are assumed. Usage varies depending
on the base estimator.
Returns
-------
MultiOutputLearner
self
"""
N, L = y.shape
self.n_targets = L
self.__configure()
for j in range(self.n_targets):
if 'sample_weight' and 'classes' in signature(self.ensemble[j].fit).parameters:
self.ensemble[j].fit(X, y[:, j], classes=classes, sample_weight=sample_weight)
elif 'sample_weight' in signature(self.ensemble[j].fit).parameters:
self.ensemble[j].fit(X, y[:, j], sample_weight=sample_weight)
else:
self.ensemble[j].fit(X, y[:, j])
return self
def partial_fit(self, X, y, classes=None, sample_weight=None):
""" Partially (incrementally) fit the model.
Partially fit each of the estimators on the X matrix and the
corresponding y matrix.
Parameters
----------
X : numpy.ndarray of shape (n_samples, n_features)
The features to train the model.
y: numpy.ndarray of shape (n_samples, n_targets)
An array-like with the target values of all samples in X.
classes: numpy.ndarray, optional (default=None)
Array with all possible/known class labels. This is an optional parameter, except
for the first partial_fit call where it is compulsory. Not used for regression.
sample_weight: numpy.ndarray of shape (n_samples), optional (default=None)
Samples weight. If not provided, uniform weights are assumed. Usage varies depending
on the base estimator.
Returns
-------
MultiOutputLearner
self
"""
if self.n_targets is None:
# This is the first time that the model is fit
self.fit(X=X, y=y, classes=classes, sample_weight=sample_weight)
return self
N, self.n_targets = y.shape
if self.ensemble is None:
self.__configure()
for j in range(self.n_targets):
if 'sample_weight' and 'classes' in signature(self.ensemble[j].partial_fit).parameters:
self.ensemble[j].partial_fit(
X, y[:, j], classes=classes, sample_weight=sample_weight)
elif 'sample_weight' in signature(self.ensemble[j].partial_fit).parameters:
self.ensemble[j].partial_fit(X, y[:, j], sample_weight=sample_weight)
else:
self.ensemble[j].partial_fit(X, y[:, j])
return self
def predict(self, X):
""" Predict target values for the passed data.
Iterates over all the estimators, predicting with each one, to obtain
the multi output prediction.
Parameters
----------
X : numpy.ndarray of shape (n_samples, n_features)
The set of data samples to predict the target values for.
Returns
-------
numpy.ndarray
numpy.ndarray of shape (n_samples, n_targets)
All the predictions for the samples in X.
"""
N, D = X.shape
predictions = np.zeros((N, self.n_targets))
for j in range(self.n_targets):
predictions[:, j] = self.ensemble[j].predict(X)
return predictions
def predict_proba(self, X):
""" Estimates the probability of each sample in X belonging to each of
the existing labels for each of the classification tasks.
It's a simple call to all of the classifier's predict_proba function,
return the probabilities for all the classification problems.
Not applicable for regression tasks.
Parameters
----------
X : numpy.ndarray of shape (n_samples, n_features)
The set of data samples to predict the class labels for.
Returns
-------
numpy.ndarray
An array of shape (n_samples, n_classification_tasks, n_labels), in which
we store the probability that each sample in X belongs to each of the labels,
in each of the classification tasks.
"""
N, D = X.shape
proba = np.zeros((N, self.n_targets))
for j in range(self.n_targets):
try:
proba[:, j] = self.ensemble[j].predict_proba(X)[:, 1]
except NotImplementedError or AttributeError:
raise AttributeError("Estimator {} has no predict_proba method".format(
type(self.base_estimator)))
return proba
def reset(self):
self.ensemble = None
self.n_targets = None
def _more_tags(self):
return {'multioutput': True,
'multioutput_only': True}