/
_perceptron.py
229 lines (181 loc) · 7.53 KB
/
_perceptron.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
# Author: Mathieu Blondel
# License: BSD 3 clause
from numbers import Real
from ..utils._param_validation import Interval, StrOptions
from ._stochastic_gradient import BaseSGDClassifier
class Perceptron(BaseSGDClassifier):
"""Linear perceptron classifier.
The implementation is a wrapper around :class:`~sklearn.linear_model.SGDClassifier`
by fixing the `loss` and `learning_rate` parameters as::
SGDClassifier(loss="perceptron", learning_rate="constant")
Other available parameters are described below and are forwarded to
:class:`~sklearn.linear_model.SGDClassifier`.
Read more in the :ref:`User Guide <perceptron>`.
Parameters
----------
penalty : {'l2','l1','elasticnet'}, default=None
The penalty (aka regularization term) to be used.
alpha : float, default=0.0001
Constant that multiplies the regularization term if regularization is
used.
l1_ratio : float, default=0.15
The Elastic Net mixing parameter, with `0 <= l1_ratio <= 1`.
`l1_ratio=0` corresponds to L2 penalty, `l1_ratio=1` to L1.
Only used if `penalty='elasticnet'`.
.. versionadded:: 0.24
fit_intercept : bool, default=True
Whether the intercept should be estimated or not. If False, the
data is assumed to be already centered.
max_iter : int, default=1000
The maximum number of passes over the training data (aka epochs).
It only impacts the behavior in the ``fit`` method, and not the
:meth:`partial_fit` method.
.. versionadded:: 0.19
tol : float or None, default=1e-3
The stopping criterion. If it is not None, the iterations will stop
when (loss > previous_loss - tol).
.. versionadded:: 0.19
shuffle : bool, default=True
Whether or not the training data should be shuffled after each epoch.
verbose : int, default=0
The verbosity level.
eta0 : float, default=1
Constant by which the updates are multiplied.
n_jobs : int, default=None
The number of CPUs to use to do the OVA (One Versus All, for
multi-class problems) computation.
``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
``-1`` means using all processors. See :term:`Glossary <n_jobs>`
for more details.
random_state : int, RandomState instance or None, default=0
Used to shuffle the training data, when ``shuffle`` is set to
``True``. Pass an int for reproducible output across multiple
function calls.
See :term:`Glossary <random_state>`.
early_stopping : bool, default=False
Whether to use early stopping to terminate training when validation
score is not improving. If set to True, it will automatically set aside
a stratified fraction of training data as validation and terminate
training when validation score is not improving by at least `tol` for
`n_iter_no_change` consecutive epochs.
.. versionadded:: 0.20
validation_fraction : float, default=0.1
The proportion of training data to set aside as validation set for
early stopping. Must be between 0 and 1.
Only used if early_stopping is True.
.. versionadded:: 0.20
n_iter_no_change : int, default=5
Number of iterations with no improvement to wait before early stopping.
.. versionadded:: 0.20
class_weight : dict, {class_label: weight} or "balanced", default=None
Preset for the class_weight fit parameter.
Weights associated with classes. If not given, all classes
are supposed to have weight one.
The "balanced" mode uses the values of y to automatically adjust
weights inversely proportional to class frequencies in the input data
as ``n_samples / (n_classes * np.bincount(y))``.
warm_start : bool, default=False
When set to True, reuse the solution of the previous call to fit as
initialization, otherwise, just erase the previous solution. See
:term:`the Glossary <warm_start>`.
Attributes
----------
classes_ : ndarray of shape (n_classes,)
The unique classes labels.
coef_ : ndarray of shape (1, n_features) if n_classes == 2 else \
(n_classes, n_features)
Weights assigned to the features.
intercept_ : ndarray of shape (1,) if n_classes == 2 else (n_classes,)
Constants in decision function.
loss_function_ : concrete LossFunction
The function that determines the loss, or difference between the
output of the algorithm and the target values.
n_features_in_ : int
Number of features seen during :term:`fit`.
.. versionadded:: 0.24
feature_names_in_ : ndarray of shape (`n_features_in_`,)
Names of features seen during :term:`fit`. Defined only when `X`
has feature names that are all strings.
.. versionadded:: 1.0
n_iter_ : int
The actual number of iterations to reach the stopping criterion.
For multiclass fits, it is the maximum over every binary fit.
t_ : int
Number of weight updates performed during training.
Same as ``(n_iter_ * n_samples + 1)``.
See Also
--------
sklearn.linear_model.SGDClassifier : Linear classifiers
(SVM, logistic regression, etc.) with SGD training.
Notes
-----
``Perceptron`` is a classification algorithm which shares the same
underlying implementation with ``SGDClassifier``. In fact,
``Perceptron()`` is equivalent to `SGDClassifier(loss="perceptron",
eta0=1, learning_rate="constant", penalty=None)`.
References
----------
https://en.wikipedia.org/wiki/Perceptron and references therein.
Examples
--------
>>> from sklearn.datasets import load_digits
>>> from sklearn.linear_model import Perceptron
>>> X, y = load_digits(return_X_y=True)
>>> clf = Perceptron(tol=1e-3, random_state=0)
>>> clf.fit(X, y)
Perceptron()
>>> clf.score(X, y)
0.939...
"""
_parameter_constraints: dict = {**BaseSGDClassifier._parameter_constraints}
_parameter_constraints.pop("loss")
_parameter_constraints.pop("average")
_parameter_constraints.update(
{
"penalty": [StrOptions({"l2", "l1", "elasticnet"}), None],
"alpha": [Interval(Real, 0, None, closed="left")],
"l1_ratio": [Interval(Real, 0, 1, closed="both")],
"eta0": [Interval(Real, 0, None, closed="left")],
}
)
def __init__(
self,
*,
penalty=None,
alpha=0.0001,
l1_ratio=0.15,
fit_intercept=True,
max_iter=1000,
tol=1e-3,
shuffle=True,
verbose=0,
eta0=1.0,
n_jobs=None,
random_state=0,
early_stopping=False,
validation_fraction=0.1,
n_iter_no_change=5,
class_weight=None,
warm_start=False,
):
super().__init__(
loss="perceptron",
penalty=penalty,
alpha=alpha,
l1_ratio=l1_ratio,
fit_intercept=fit_intercept,
max_iter=max_iter,
tol=tol,
shuffle=shuffle,
verbose=verbose,
random_state=random_state,
learning_rate="constant",
eta0=eta0,
early_stopping=early_stopping,
validation_fraction=validation_fraction,
n_iter_no_change=n_iter_no_change,
power_t=0.5,
warm_start=warm_start,
class_weight=class_weight,
n_jobs=n_jobs,
)