Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MRG] Adding max_fun parameter to MLP for use in lbfgs optimization #9274

Merged
merged 28 commits into from
Jul 2, 2019
Merged
Show file tree
Hide file tree
Changes from 23 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
71e6d30
Setting both maxiter and maxfun in call to lbfgs.
daniel-perry Jul 3, 2017
f46d716
Merge branch 'master' of github.com:scikit-learn/scikit-learn into ML…
daniel-perry Sep 7, 2017
646a7fd
[MRG] adding max_fun parameter to MLP
daniel-perry Sep 7, 2017
9e30730
better comments, increasing default max_fun to 15000
daniel-perry Sep 8, 2017
71daad8
better comments, fixing some PEP8 warnings
daniel-perry Oct 18, 2017
27a212a
Merge branch 'master' of github.com:scikit-learn/scikit-learn into ML…
daniel-perry Oct 18, 2017
63dadf1
Merge branch 'master' into MLP-maxiters
agramfort Feb 25, 2019
97268c1
test exception
agramfort Feb 25, 2019
3b721b8
update what's new
agramfort Feb 25, 2019
77e9074
doctests
agramfort Feb 25, 2019
0182849
Merge branch 'master' into MLP-maxiters
agramfort Feb 26, 2019
06d1312
lint
agramfort Feb 26, 2019
8a27683
clarifying comments about function call
daniel-perry May 12, 2019
7864227
fixing PEP8 errors
daniel-perry May 12, 2019
508a42d
Merge branch 'master' into MLP-maxiters
daniel-perry May 14, 2019
60e5b07
moving datasets to @pytest.mark.parameterize
daniel-perry May 15, 2019
13b1e11
Merge branch 'master' into MLP-maxiters
daniel-perry May 15, 2019
e056c9c
fixing PEP8 errors.
daniel-perry May 15, 2019
8084f40
Update sklearn/neural_network/multilayer_perceptron.py
daniel-perry Jun 27, 2019
772a828
Update sklearn/neural_network/multilayer_perceptron.py
daniel-perry Jun 27, 2019
211dbb1
Update sklearn/neural_network/multilayer_perceptron.py
daniel-perry Jun 27, 2019
f65e291
moving feature announcement to v0.22.rst
daniel-perry Jun 27, 2019
b60859a
Merge branch 'master' into MLP-maxiters
daniel-perry Jun 27, 2019
28e73c9
addressing review comments.
daniel-perry Jun 28, 2019
2af8196
Merge branch 'master' into MLP-maxiters
daniel-perry Jun 28, 2019
e11ca94
adding convergence warnings
daniel-perry Jun 28, 2019
11b9fab
Merge branch 'master' into MLP-maxiters
rth Jul 2, 2019
72e38d1
Lint
rth Jul 2, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions doc/whats_new/v0.22.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ random sampling procedures.

- :class:`decomposition.SparsePCA` where `normalize_components` has no effect
due to deprecation.
..
TO FILL IN AS WE GO
daniel-perry marked this conversation as resolved.
Show resolved Hide resolved

Details are listed in the changelog below.

Expand Down Expand Up @@ -122,6 +124,18 @@ Changelog
rather than variance in this case.
:pr:`13704` by `Roddy MacSween <rlms>`.


:mod:`sklearn.neural_network`
.............................

- |Feature| Add `max_fun` parameter in
:class:`neural_network.BaseMultilayerPerceptron`,
:class:`neural_network.MLPRegressor`, and
:class:`neural_network.MLPClassifier` to give control over
maximum number of function evaluation to not meet ``tol`` improvement.
:issue:`9274` by :user:`Daniel Perry <daniel-perry>`.


Miscellaneous
.............

Expand Down
38 changes: 29 additions & 9 deletions sklearn/neural_network/multilayer_perceptron.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def __init__(self, hidden_layer_sizes, activation, solver,
max_iter, loss, shuffle, random_state, tol, verbose,
warm_start, momentum, nesterovs_momentum, early_stopping,
validation_fraction, beta_1, beta_2, epsilon,
n_iter_no_change):
n_iter_no_change, max_fun):
self.activation = activation
self.solver = solver
self.alpha = alpha
Expand All @@ -75,6 +75,7 @@ def __init__(self, hidden_layer_sizes, activation, solver,
self.beta_2 = beta_2
self.epsilon = epsilon
self.n_iter_no_change = n_iter_no_change
self.max_fun = max_fun

def _unpack(self, packed_parameters):
"""Extract the coefficients and intercepts from packed_parameters."""
Expand Down Expand Up @@ -172,7 +173,6 @@ def _loss_grad_lbfgs(self, packed_coef_inter, X, y, activations, deltas,
self._unpack(packed_coef_inter)
loss, coef_grads, intercept_grads = self._backprop(
X, y, activations, deltas, coef_grads, intercept_grads)
self.n_iter_ += 1
grad = _pack(coef_grads, intercept_grads)
return loss, grad

Expand Down Expand Up @@ -381,6 +381,8 @@ def _validate_hyperparameters(self):
self.shuffle)
if self.max_iter <= 0:
raise ValueError("max_iter must be > 0, got %s." % self.max_iter)
if self.max_fun <= 0:
raise ValueError("max_fun must be > 0, got %s." % self.max_fun)
if self.alpha < 0.0:
raise ValueError("alpha must be >= 0, got %s." % self.alpha)
if (self.learning_rate in ["constant", "invscaling", "adaptive"] and
Expand Down Expand Up @@ -459,10 +461,12 @@ def _fit_lbfgs(self, X, y, activations, deltas, coef_grads,
optimal_parameters, self.loss_, d = fmin_l_bfgs_b(
x0=packed_coef_inter,
func=self._loss_grad_lbfgs,
maxfun=self.max_iter,
maxfun=self.max_fun,
maxiter=self.max_iter,
iprint=iprint,
pgtol=self.tol,
args=(X, y, activations, deltas, coef_grads, intercept_grads))
self.n_iter_ = d['nit']

self._unpack(optimal_parameters)

Expand Down Expand Up @@ -833,6 +837,15 @@ class MLPClassifier(BaseMultilayerPerceptron, ClassifierMixin):

.. versionadded:: 0.20

max_fun : int, optional, default 15000
Only used when solver='lbfgs'. Maximum number of loss function calls.
The solver iterates until convergence (determined by 'tol'), number
of iterations reaches max_iter, or this number of loss function calls.
Note that number of loss function calls will be greater than or equal
to the number of iterations for the `MLPClassifier`.

.. versionadded:: 0.22

Attributes
----------
classes_ : array or list of array of shape (n_classes,)
Expand Down Expand Up @@ -898,8 +911,7 @@ def __init__(self, hidden_layer_sizes=(100,), activation="relu",
verbose=False, warm_start=False, momentum=0.9,
nesterovs_momentum=True, early_stopping=False,
validation_fraction=0.1, beta_1=0.9, beta_2=0.999,
epsilon=1e-8, n_iter_no_change=10):

epsilon=1e-8, n_iter_no_change=10, max_fun=15000):
super().__init__(
hidden_layer_sizes=hidden_layer_sizes,
activation=activation, solver=solver, alpha=alpha,
Expand All @@ -912,7 +924,7 @@ def __init__(self, hidden_layer_sizes=(100,), activation="relu",
early_stopping=early_stopping,
validation_fraction=validation_fraction,
beta_1=beta_1, beta_2=beta_2, epsilon=epsilon,
n_iter_no_change=n_iter_no_change)
n_iter_no_change=n_iter_no_change, max_fun=max_fun)

def _validate_input(self, X, y, incremental):
X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'],
Expand Down Expand Up @@ -1216,6 +1228,15 @@ class MLPRegressor(BaseMultilayerPerceptron, RegressorMixin):

.. versionadded:: 0.20

max_fun : int, optional, default 15000
Only used when solver='lbfgs'. Maximum number of function calls.
The solver iterates until convergence (determined by 'tol'), number
of iterations reaches max_iter, or this number of function calls.
Note that number of function calls will be greater than or equal to
the number of iterations for the MLPRegressor.

.. versionadded:: 0.22

Attributes
----------
loss_ : float
Expand Down Expand Up @@ -1279,8 +1300,7 @@ def __init__(self, hidden_layer_sizes=(100,), activation="relu",
verbose=False, warm_start=False, momentum=0.9,
nesterovs_momentum=True, early_stopping=False,
validation_fraction=0.1, beta_1=0.9, beta_2=0.999,
epsilon=1e-8, n_iter_no_change=10):

epsilon=1e-8, n_iter_no_change=10, max_fun=15000):
super().__init__(
hidden_layer_sizes=hidden_layer_sizes,
activation=activation, solver=solver, alpha=alpha,
Expand All @@ -1293,7 +1313,7 @@ def __init__(self, hidden_layer_sizes=(100,), activation="relu",
early_stopping=early_stopping,
validation_fraction=validation_fraction,
beta_1=beta_1, beta_2=beta_2, epsilon=epsilon,
n_iter_no_change=n_iter_no_change)
n_iter_no_change=n_iter_no_change, max_fun=max_fun)

def predict(self, X):
"""Predict using the multi-layer perceptron model.
Expand Down
72 changes: 52 additions & 20 deletions sklearn/neural_network/tests/test_mlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@
Xboston = StandardScaler().fit_transform(boston.data)[: 200]
yboston = boston.target[:200]

regression_datasets = [(Xboston, yboston)]

iris = load_iris()

X_iris = iris.data
Expand Down Expand Up @@ -229,32 +231,31 @@ def loss_grad_fun(t):
assert_almost_equal(numgrad, grad)


def test_lbfgs_classification():
@pytest.mark.parametrize('X,y', classification_datasets)
def test_lbfgs_classification(X, y):
# Test lbfgs on classification.
# It should achieve a score higher than 0.95 for the binary and multi-class
# versions of the digits dataset.
for X, y in classification_datasets:
X_train = X[:150]
y_train = y[:150]
X_test = X[150:]

expected_shape_dtype = (X_test.shape[0], y_train.dtype.kind)

for activation in ACTIVATION_TYPES:
mlp = MLPClassifier(solver='lbfgs', hidden_layer_sizes=50,
max_iter=150, shuffle=True, random_state=1,
activation=activation)
mlp.fit(X_train, y_train)
y_predict = mlp.predict(X_test)
assert_greater(mlp.score(X_train, y_train), 0.95)
assert_equal((y_predict.shape[0], y_predict.dtype.kind),
expected_shape_dtype)
X_train = X[:150]
y_train = y[:150]
X_test = X[150:]

expected_shape_dtype = (X_test.shape[0], y_train.dtype.kind)

def test_lbfgs_regression():
for activation in ACTIVATION_TYPES:
mlp = MLPClassifier(solver='lbfgs', hidden_layer_sizes=50,
max_iter=150, shuffle=True, random_state=1,
activation=activation)
mlp.fit(X_train, y_train)
y_predict = mlp.predict(X_test)
assert_greater(mlp.score(X_train, y_train), 0.95)
assert_equal((y_predict.shape[0], y_predict.dtype.kind),
expected_shape_dtype)


@pytest.mark.parametrize('X,y', regression_datasets)
def test_lbfgs_regression(X, y):
# Test lbfgs on the boston dataset, a regression problems.
X = Xboston
y = yboston
for activation in ACTIVATION_TYPES:
mlp = MLPRegressor(solver='lbfgs', hidden_layer_sizes=50,
max_iter=150, shuffle=True, random_state=1,
Expand All @@ -267,6 +268,37 @@ def test_lbfgs_regression():
assert_greater(mlp.score(X, y), 0.95)


@pytest.mark.parametrize('X,y', classification_datasets)
def test_lbfgs_classification_maxfun(X, y):
# Test lbfgs parameter max_fun.
# It should independently limit the number of iterations for lbfgs.
max_fun = 10
# classification tests
for activation in ACTIVATION_TYPES:
mlp = MLPClassifier(solver='lbfgs', hidden_layer_sizes=50,
max_iter=150, max_fun=max_fun, shuffle=True,
random_state=1, activation=activation)
daniel-perry marked this conversation as resolved.
Show resolved Hide resolved
mlp.fit(X, y)
assert max_fun >= mlp.n_iter_


daniel-perry marked this conversation as resolved.
Show resolved Hide resolved
@pytest.mark.parametrize('X,y', regression_datasets)
def test_lbfgs_regression_maxfun(X, y):
# Test lbfgs parameter max_fun.
# It should independently limit the number of iterations for lbfgs.
max_fun = 10
# regression tests
for activation in ACTIVATION_TYPES:
mlp = MLPRegressor(solver='lbfgs', hidden_layer_sizes=50,
max_iter=150, max_fun=max_fun, shuffle=True,
random_state=1, activation=activation)
mlp.fit(X, y)
assert max_fun >= mlp.n_iter_

mlp.max_fun = -1
assert_raises(ValueError, mlp.fit, X, y)


def test_learning_rate_warmstart():
# Tests that warm_start reuse past solutions.
X = [[3, 2], [1, 6], [5, 6], [-2, -4]]
Expand Down