Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FIX: make LinearRegression perfectly consistent across sparse or dense #13279

Merged
Merged
Changes from 2 commits
Commits
File filter...
Filter file types
Jump to…
Jump to file or symbol
Failed to load files and symbols.
+39 −3
Diff settings

Always

Just for now

Copy path View file
@@ -174,6 +174,10 @@ Support for Python 3.4 and below has been officially dropped.
parameter value ``copy_X=True`` in ``fit``.
:issue:`12972` by :user:`Lucio Fernandez-Arjona <luk-f-a>`

- |Fix| Fixed a bug in :class:`linear_model.LinearRegression` that
was not returning the same coeffecient and intercepts with

This comment has been minimized.

Copy link
@jnothman

jnothman Feb 26, 2019

Member

I think this is missing mention of sparse/dense

``fit_intercept=True``. :issue:`13279` by `Alexandre Gramfort`_

:mod:`sklearn.manifold`
............................

Copy path View file
@@ -459,21 +459,32 @@ def fit(self, X, y, sample_weight=None):

X, y, X_offset, y_offset, X_scale = self._preprocess_data(
X, y, fit_intercept=self.fit_intercept, normalize=self.normalize,
copy=self.copy_X, sample_weight=sample_weight)
copy=self.copy_X, sample_weight=sample_weight,
return_mean=True)

if sample_weight is not None:
# Sample weight can be implemented via a simple rescaling.
X, y = _rescale_data(X, y, sample_weight)

if sp.issparse(X):
X_offset_scale = X_offset / X_scale
def matvec(b):
return X.dot(b) - b.dot(X_offset_scale)
def rmatvec(b):
This conversation was marked as resolved by GaelVaroquaux

This comment has been minimized.

Copy link
@jnothman

jnothman Feb 26, 2019

Member

I think PEP8 wants blank lines before these functions

This comment has been minimized.

Copy link
@ogrisel

ogrisel Feb 27, 2019

Member

I think it's only true for top level blocks (0-indented functions /classes).

return X.T.dot(b) - (X_offset_scale) * np.sum(b)

This comment has been minimized.

Copy link
@jnothman

jnothman Feb 26, 2019

Member

redundant parentheses


X_centered = sparse.linalg.LinearOperator(shape=X.shape,
matvec=matvec,
rmatvec=rmatvec)

This comment has been minimized.

Copy link
@GaelVaroquaux

GaelVaroquaux Feb 27, 2019

Member

Very elegant!


if y.ndim < 2:
out = sparse_lsqr(X, y)
out = sparse_lsqr(X_centered, y)
self.coef_ = out[0]
self._residues = out[3]
else:
# sparse_lstsq cannot handle y with shape (M, K)
outs = Parallel(n_jobs=n_jobs_)(
delayed(sparse_lsqr)(X, y[:, j].ravel())
delayed(sparse_lsqr)(X_centered, y[:, j].ravel())
for j in range(y.shape[1]))
self.coef_ = np.vstack([out[0] for out in outs])
self._residues = np.vstack([out[3] for out in outs])
@@ -13,6 +13,7 @@
from sklearn.utils.testing import assert_array_almost_equal
from sklearn.utils.testing import assert_almost_equal
from sklearn.utils.testing import assert_equal
from sklearn.utils.testing import assert_allclose

from sklearn.linear_model.base import LinearRegression
from sklearn.linear_model.base import _preprocess_data
@@ -150,6 +151,26 @@ def test_linear_regression_sparse(random_state=0):
assert_array_almost_equal(ols.predict(X) - y.ravel(), 0)


@pytest.mark.parametrize('normalize', [True, False])
@pytest.mark.parametrize('fit_intercept', [True, False])
def test_linear_regression_sparse_equal_dense(normalize, fit_intercept):
# Test that linear regression agrees between sparse and dense
rng = check_random_state(0)
n_samples = 200
n_features = 2
X = rng.randn(n_samples, n_features)
X[X < 0.1] = 0.
Xcsr = sparse.csr_matrix(X)
y = rng.rand(n_samples)
params = dict(normalize=normalize, fit_intercept=fit_intercept)
clf_dense = LinearRegression(**params)
clf_sparse = LinearRegression(**params)
clf_dense.fit(X, y)
clf_sparse.fit(Xcsr, y)
assert clf_dense.intercept_ == pytest.approx(clf_sparse.intercept_)
assert_allclose(clf_dense.coef_, clf_sparse.coef_)


def test_linear_regression_multiple_outcome(random_state=0):
# Test multiple-outcome linear regressions
X, y = make_regression(random_state=random_state)
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.