Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FIX: make LinearRegression perfectly consistent across sparse or dense #13279

Changes from all commits
File filter...
Filter file types
Jump to…
Jump to file or symbol
Failed to load files and symbols.
+42 −3
Diff settings


Just for now

@@ -174,6 +174,11 @@ Support for Python 3.4 and below has been officially dropped.
parameter value ``copy_X=True`` in ``fit``.
:issue:`12972` by :user:`Lucio Fernandez-Arjona <luk-f-a>`

- |Fix| Fixed a bug in :class:`linear_model.LinearRegression` that
was not returning the same coeffecients and intercepts with
``fit_intercept=True`` in sparse and dense case.
:issue:`13279` by `Alexandre Gramfort`_


@@ -459,21 +459,34 @@ def fit(self, X, y, sample_weight=None):

X, y, X_offset, y_offset, X_scale = self._preprocess_data(
X, y, fit_intercept=self.fit_intercept, normalize=self.normalize,
copy=self.copy_X, sample_weight=sample_weight)
copy=self.copy_X, sample_weight=sample_weight,

if sample_weight is not None:
# Sample weight can be implemented via a simple rescaling.
X, y = _rescale_data(X, y, sample_weight)

if sp.issparse(X):
X_offset_scale = X_offset / X_scale

def matvec(b):
return -

def rmatvec(b):
This conversation was marked as resolved by GaelVaroquaux

This comment has been minimized.

Copy link

jnothman Feb 26, 2019


I think PEP8 wants blank lines before these functions

This comment has been minimized.

Copy link

ogrisel Feb 27, 2019


I think it's only true for top level blocks (0-indented functions /classes).

return - X_offset_scale * np.sum(b)

X_centered = sparse.linalg.LinearOperator(shape=X.shape,

This comment has been minimized.

Copy link

GaelVaroquaux Feb 27, 2019


Very elegant!

if y.ndim < 2:
out = sparse_lsqr(X, y)
out = sparse_lsqr(X_centered, y)
self.coef_ = out[0]
self._residues = out[3]
# sparse_lstsq cannot handle y with shape (M, K)
outs = Parallel(n_jobs=n_jobs_)(
delayed(sparse_lsqr)(X, y[:, j].ravel())
delayed(sparse_lsqr)(X_centered, y[:, j].ravel())
for j in range(y.shape[1]))
self.coef_ = np.vstack([out[0] for out in outs])
self._residues = np.vstack([out[3] for out in outs])
@@ -13,6 +13,7 @@
from sklearn.utils.testing import assert_array_almost_equal
from sklearn.utils.testing import assert_almost_equal
from sklearn.utils.testing import assert_equal
from sklearn.utils.testing import assert_allclose

from sklearn.linear_model.base import LinearRegression
from sklearn.linear_model.base import _preprocess_data
@@ -150,6 +151,26 @@ def test_linear_regression_sparse(random_state=0):
assert_array_almost_equal(ols.predict(X) - y.ravel(), 0)

@pytest.mark.parametrize('normalize', [True, False])
@pytest.mark.parametrize('fit_intercept', [True, False])
def test_linear_regression_sparse_equal_dense(normalize, fit_intercept):
# Test that linear regression agrees between sparse and dense
rng = check_random_state(0)
n_samples = 200
n_features = 2
X = rng.randn(n_samples, n_features)
X[X < 0.1] = 0.
Xcsr = sparse.csr_matrix(X)
y = rng.rand(n_samples)
params = dict(normalize=normalize, fit_intercept=fit_intercept)
clf_dense = LinearRegression(**params)
clf_sparse = LinearRegression(**params), y), y)
assert clf_dense.intercept_ == pytest.approx(clf_sparse.intercept_)
assert_allclose(clf_dense.coef_, clf_sparse.coef_)

def test_linear_regression_multiple_outcome(random_state=0):
# Test multiple-outcome linear regressions
X, y = make_regression(random_state=random_state)
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.