Skip to content

Commit

Permalink
[MRG+1] Fix return_norm bug in preprocessing.normalize (scikit-learn#…
Browse files Browse the repository at this point in the history
  • Loading branch information
luang008 authored and sergeyf committed Feb 28, 2017
1 parent 5762ab3 commit 442699d
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 3 deletions.
5 changes: 5 additions & 0 deletions doc/whats_new.rst
Expand Up @@ -85,6 +85,11 @@ Enhancements
do not set attributes on the estimator.
:issue:`7533` by :user:`Ekaterina Krivich <kiote>`.

- For sparse matrices, :func:`preprocessing.normalize` with ``return_norm=True``
will now raise a ``NotImplementedError`` with 'l1' or 'l2' norm and with norm 'max'
the norms returned will be the same as for dense matrices (:issue:`7771`).
By `Ang Lu <https://github.com/luang008>`_.

Bug fixes
.........

Expand Down
20 changes: 17 additions & 3 deletions sklearn/preprocessing/data.py
Expand Up @@ -1325,6 +1325,16 @@ def normalize(X, norm='l2', axis=1, copy=True, return_norm=False):
return_norm : boolean, default False
whether to return the computed norms
Returns
-------
X : {array-like, sparse matrix}, shape [n_samples, n_features]
Normalized input X.
norms : array, shape [n_samples] if axis=1 else [n_features]
An array of norms along given axis for X.
When X is sparse, a NotImplementedError will be raised
for norm 'l1' or 'l2'.
See also
--------
Normalizer: Performs normalization using the ``Transformer`` API
Expand All @@ -1346,15 +1356,19 @@ def normalize(X, norm='l2', axis=1, copy=True, return_norm=False):
X = X.T

if sparse.issparse(X):
if return_norm and norm in ('l1', 'l2'):
raise NotImplementedError("return_norm=True is not implemented "
"for sparse matrices with norm 'l1' "
"or norm 'l2'")
if norm == 'l1':
inplace_csr_row_normalize_l1(X)
elif norm == 'l2':
inplace_csr_row_normalize_l2(X)
elif norm == 'max':
_, norms = min_max_axis(X, 1)
norms = norms.repeat(np.diff(X.indptr))
mask = norms != 0
X.data[mask] /= norms[mask]
norms_elementwise = norms.repeat(np.diff(X.indptr))
mask = norms_elementwise != 0
X.data[mask] /= norms_elementwise[mask]
else:
if norm == 'l1':
norms = np.abs(X).sum(axis=1)
Expand Down
18 changes: 18 additions & 0 deletions sklearn/preprocessing/tests/test_data.py
Expand Up @@ -1315,6 +1315,24 @@ def test_normalize():

assert_array_almost_equal(row_sums, ones)

# Test return_norm
X_dense = np.array([[3.0, 0, 4.0], [1.0, 0.0, 0.0], [2.0, 3.0, 0.0]])
for norm in ('l1', 'l2', 'max'):
_, norms = normalize(X_dense, norm=norm, return_norm=True)
if norm == 'l1':
assert_array_almost_equal(norms, np.array([7.0, 1.0, 5.0]))
elif norm == 'l2':
assert_array_almost_equal(norms, np.array([5.0, 1.0, 3.60555127]))
else:
assert_array_almost_equal(norms, np.array([4.0, 1.0, 3.0]))

X_sparse = sparse.csr_matrix(X_dense)
for norm in ('l1', 'l2'):
assert_raises(NotImplementedError, normalize, X_sparse,
norm=norm, return_norm=True)
_, norms = normalize(X_sparse, norm='max', return_norm=True)
assert_array_almost_equal(norms, np.array([4.0, 1.0, 3.0]))


def test_binarizer():
X_ = np.array([[1, 0, 5], [2, 3, -1]])
Expand Down

0 comments on commit 442699d

Please sign in to comment.