diff --git a/sklearn/linear_model/_glm/tests/test_glm.py b/sklearn/linear_model/_glm/tests/test_glm.py index 3fe18b630a82b..4db0289bb14e8 100644 --- a/sklearn/linear_model/_glm/tests/test_glm.py +++ b/sklearn/linear_model/_glm/tests/test_glm.py @@ -950,32 +950,78 @@ def test_family_deprecation(est, family): assert est.family.power == family.power -def test_linalg_warning_with_newton_solver(global_random_seed): +@pytest.mark.parametrize("newton_solver", ["newton-cholesky", "newton-qr-cholesky"]) +def test_linalg_warning_with_newton_solver(newton_solver, global_random_seed): rng = np.random.RandomState(global_random_seed) - X_orig = rng.normal(size=(10, 3)) + X_orig = rng.normal(size=(20, 3)) X_collinear = np.hstack([X_orig] * 10) # collinear design - y = rng.normal(size=X_orig.shape[0]) - y[y < 0] = 0.0 + y = rng.poisson( + np.exp(X_orig @ np.ones(X_orig.shape[1])), size=X_orig.shape[0] + ).astype(np.float64) + + # Let's consider the deviance of constant baseline on this problem. + baseline_pred = np.full_like(y, y.astype(np.float64).mean()) + constant_model_deviance = mean_poisson_deviance(y, baseline_pred) + assert constant_model_deviance > 1.0 # No warning raised on well-conditioned design, even without regularization. + tol = 1e-10 + with warnings.catch_warnings(): + warnings.simplefilter("error") + reg = PoissonRegressor(solver=newton_solver, alpha=0.0, tol=tol).fit(X_orig, y) + original_newton_deviance = mean_poisson_deviance(y, reg.predict(X_orig)) + + # On this dataset, we should have enough data points in the original data + # to not make it possible to get a near zero deviance (for the any of the + # admissible random seeds). This will make it easier to interpret meaning + # of rtol in the subsequent assertions: + assert original_newton_deviance > 0.2 + + # We check that the model could successfully fit information in X_orig to + # improve upon the constant baseline by a large margin (when evaluated on + # the traing set). + assert constant_model_deviance - original_newton_deviance > 0.1 + + # LBFGS is robust to a collinear design because its approximation of the + # Hessian is Symmeric Positive Definite by construction. Let's record its + # solution with warnings.catch_warnings(): warnings.simplefilter("error") - reg = PoissonRegressor(solver="newton-cholesky", alpha=0.0).fit(X_orig, y) - reference_deviance = mean_poisson_deviance(y, reg.predict(X_orig)) + reg = PoissonRegressor(solver="lbfgs", alpha=0.0, tol=tol).fit(X_collinear, y) + collinear_lbfgs_deviance = mean_poisson_deviance(y, reg.predict(X_collinear)) + + # The LBFGS solution on the collinear is expected to reach a comparable + # solution to the Newton solution on the original data. + rtol = 1e-6 + assert collinear_lbfgs_deviance == pytest.approx(original_newton_deviance, rel=rtol) - # Fitting on collinear data without regularization should raise an - # informative warning: + # Fitting a Newton solver on the collinear version of the training data + # without regularization should raise an informative warning and fallback + # to the LBFGS solver. msg = ( - "The inner solver of CholeskyNewtonSolver stumbled upon a" + "The inner solver of .*NewtonSolver stumbled upon a" " singular or very ill-conditioned hessian matrix" ) with pytest.warns(scipy.linalg.LinAlgWarning, match=msg): - PoissonRegressor(solver="newton-cholesky", alpha=0.0).fit(X_collinear, y) + reg = PoissonRegressor(solver=newton_solver, alpha=0.0, tol=tol).fit( + X_collinear, y + ) + # As a result we should still automatically converge to a good solution. + collinear_newton_deviance = mean_poisson_deviance(y, reg.predict(X_collinear)) + assert collinear_newton_deviance == pytest.approx( + original_newton_deviance, rel=rtol + ) # Increasing the regularization slightly should make the problem go away: - reg = PoissonRegressor(solver="newton-cholesky", alpha=1e-12).fit(X_collinear, y) + with warnings.catch_warnings(): + warnings.simplefilter("error", scipy.linalg.LinAlgWarning) + reg = PoissonRegressor(solver=newton_solver, alpha=1e-10).fit(X_collinear, y) - # Since we use a small penalty, the deviance of the predictions should still - # be almost the same. - this_deviance = mean_poisson_deviance(y, reg.predict(X_collinear)) - assert this_deviance == pytest.approx(reference_deviance) + # The slightly penalized model on the collinear data should be close enough + # to the unpenalized model on the original data. + penalized_collinear_newton_deviance = mean_poisson_deviance( + y, reg.predict(X_collinear) + ) + assert penalized_collinear_newton_deviance == pytest.approx( + original_newton_deviance, rel=rtol + )