Skip to content

Commit

Permalink
[MRG+1] MAINT drop SciPy < 0.13 (#8854)
Browse files Browse the repository at this point in the history
Remove sklearn.utils.fixes functions that are not needed for scipy >= 0.13 and keep deprecated wrappers in other modules.
  • Loading branch information
naoyak authored and lesteve committed Jun 2, 2017
1 parent a150d93 commit bd0fc23
Show file tree
Hide file tree
Showing 56 changed files with 168 additions and 3,626 deletions.
48 changes: 0 additions & 48 deletions doc/developers/utilities.rst
Expand Up @@ -89,11 +89,6 @@ Efficient Linear Algebra & Array Operations
- :func:`arrayfuncs.min_pos`: (used in ``sklearn.linear_model.least_angle``)
Find the minimum of the positive values within an array.

- :func:`extmath.norm`: computes Euclidean (L2) vector norm
by directly calling the BLAS
``nrm2`` function. This is more stable than ``scipy.linalg.norm``. See
`Fabian's blog post
<http://fa.bianp.net/blog/2011/computing-the-vector-norm>`_ for a discussion.

- :func:`extmath.fast_logdet`: efficiently compute the log of the determinant
of a matrix.
Expand All @@ -104,15 +99,6 @@ Efficient Linear Algebra & Array Operations
``scipy.sparse`` inputs. If the inputs are dense, it is equivalent to
``numpy.dot``.

- :func:`extmath.logsumexp`: compute the sum of X assuming X is in the log
domain. This is equivalent to calling ``np.log(np.sum(np.exp(X)))``, but is
robust to overflow/underflow errors. Note that there is similar
functionality in ``np.logaddexp.reduce``, but because of the pairwise nature
of this routine, it is slower for large arrays.
Scipy has a similar routine in ``scipy.misc.logsumexp`` (In scipy versions
< 0.10, this is found in ``scipy.maxentropy.logsumexp``),
but the scipy version does not accept an ``axis`` keyword.

- :func:`extmath.weighted_mode`: an extension of ``scipy.stats.mode`` which
allows each item to have a real-valued weight.

Expand Down Expand Up @@ -177,40 +163,6 @@ Graph Routines
connectivity matrix is a ``scipy.sparse.csr_matrix``.


Backports
=========

- :func:`fixes.expit`: Logistic sigmoid function. Replacement for SciPy 0.10's
``scipy.special.expit``.

- :func:`sparsetools.connected_components`
(backported from ``scipy.sparse.connected_components`` in scipy 0.12).
Used in ``sklearn.cluster.hierarchical``, as well as in tests for
:mod:`sklearn.feature_extraction`.


ARPACK
------

- :func:`arpack.eigs`
(backported from ``scipy.sparse.linalg.eigs`` in scipy 0.10)
Sparse non-symmetric eigenvalue decomposition using the Arnoldi
method. A limited version of ``eigs`` is available in earlier
scipy versions.

- :func:`arpack.eigsh`
(backported from ``scipy.sparse.linalg.eigsh`` in scipy 0.10)
Sparse non-symmetric eigenvalue decomposition using the Arnoldi
method. A limited version of ``eigsh`` is available in earlier
scipy versions.

- :func:`arpack.svds`
(backported from ``scipy.sparse.linalg.svds`` in scipy 0.10)
Sparse non-symmetric eigenvalue decomposition using the Arnoldi
method. A limited version of ``svds`` is available in earlier
scipy versions.


Benchmarking
------------

Expand Down

This file was deleted.

16 changes: 5 additions & 11 deletions examples/cluster/plot_face_compress.py
Expand Up @@ -23,23 +23,17 @@
import matplotlib.pyplot as plt

from sklearn import cluster
from sklearn.utils.testing import SkipTest
from sklearn.utils.fixes import sp_version

if sp_version < (0, 12):
raise SkipTest("Skipping because SciPy version earlier than 0.12.0 and "
"thus does not include the scipy.misc.face() image.")

try:
try: # SciPy >= 0.16 have face in misc
from scipy.misc import face
face = face(gray=True)
except ImportError:
face = sp.face(gray=True)
except AttributeError:
# Newer versions of scipy have face in misc
from scipy import misc
face = misc.face(gray=True)

n_clusters = 5
np.random.seed(0)

X = face.reshape((-1, 1)) # We need an (n_sample, n_feature) array
k_means = cluster.KMeans(n_clusters=n_clusters, n_init=4)
k_means.fit(X)
Expand Down
15 changes: 4 additions & 11 deletions examples/cluster/plot_face_segmentation.py
Expand Up @@ -30,21 +30,14 @@

from sklearn.feature_extraction import image
from sklearn.cluster import spectral_clustering
from sklearn.utils.testing import SkipTest
from sklearn.utils.fixes import sp_version

if sp_version < (0, 12):
raise SkipTest("Skipping because SciPy version earlier than 0.12.0 and "
"thus does not include the scipy.misc.face() image.")


# load the raccoon face as a numpy array
try:
try: # SciPy >= 0.16 have face in misc
from scipy.misc import face
face = face(gray=True)
except ImportError:
face = sp.face(gray=True)
except AttributeError:
# Newer versions of scipy have face in misc
from scipy import misc
face = misc.face(gray=True)

# Resize it to 10% of the original size to speed up the processing
face = sp.misc.imresize(face, 0.10) / 255.
Expand Down
15 changes: 4 additions & 11 deletions examples/cluster/plot_face_ward_segmentation.py
Expand Up @@ -23,22 +23,15 @@

from sklearn.feature_extraction.image import grid_to_graph
from sklearn.cluster import AgglomerativeClustering
from sklearn.utils.testing import SkipTest
from sklearn.utils.fixes import sp_version

if sp_version < (0, 12):
raise SkipTest("Skipping because SciPy version earlier than 0.12.0 and "
"thus does not include the scipy.misc.face() image.")


###############################################################################
# Generate data
try:
try: # SciPy >= 0.16 have face in misc
from scipy.misc import face
face = face(gray=True)
except ImportError:
face = sp.face(gray=True)
except AttributeError:
# Newer versions of scipy have face in misc
from scipy import misc
face = misc.face(gray=True)

# Resize it to 10% of the original size to speed up the processing
face = sp.misc.imresize(face, 0.10) / 255.
Expand Down
14 changes: 4 additions & 10 deletions examples/decomposition/plot_image_denoising.py
Expand Up @@ -42,19 +42,13 @@
from sklearn.decomposition import MiniBatchDictionaryLearning
from sklearn.feature_extraction.image import extract_patches_2d
from sklearn.feature_extraction.image import reconstruct_from_patches_2d
from sklearn.utils.testing import SkipTest
from sklearn.utils.fixes import sp_version

if sp_version < (0, 12):
raise SkipTest("Skipping because SciPy version earlier than 0.12.0 and "
"thus does not include the scipy.misc.face() image.")

###############################################################################
try:
from scipy import misc
face = misc.face(gray=True)
except AttributeError:
# Old versions of scipy have face in the top level package
try: # SciPy >= 0.16 have face in misc
from scipy.misc import face
face = face(gray=True)
except ImportError:
face = sp.face(gray=True)

# Convert from uint8 representation with values between 0 and 255 to
Expand Down
3 changes: 1 addition & 2 deletions examples/linear_model/plot_sparse_recovery.py
Expand Up @@ -55,15 +55,14 @@
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import auc, precision_recall_curve
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.utils.extmath import pinvh
from sklearn.exceptions import ConvergenceWarning


def mutual_incoherence(X_relevant, X_irelevant):
"""Mutual incoherence, as defined by formula (26a) of [Wainwright2006].
"""
projector = np.dot(np.dot(X_irelevant.T, X_relevant),
pinvh(np.dot(X_relevant.T, X_relevant)))
linalg.pinvh(np.dot(X_relevant.T, X_relevant)))
return np.max(np.abs(projector).sum(axis=1))


Expand Down
1 change: 0 additions & 1 deletion sklearn/cluster/_k_means.pyx
Expand Up @@ -15,7 +15,6 @@ cimport numpy as np
cimport cython
from cython cimport floating

from ..utils.extmath import norm
from sklearn.utils.sparsefuncs_fast import assign_rows_csr
from sklearn.utils.fixes import bincount

Expand Down
12 changes: 6 additions & 6 deletions sklearn/cluster/bicluster.py
Expand Up @@ -8,16 +8,16 @@

import numpy as np

from scipy.sparse import dia_matrix
from scipy.sparse import issparse
from scipy.linalg import norm
from scipy.sparse import dia_matrix, issparse
from scipy.sparse.linalg import eigsh, svds

from . import KMeans, MiniBatchKMeans
from ..base import BaseEstimator, BiclusterMixin
from ..externals import six
from ..utils import check_random_state
from ..utils.arpack import eigsh, svds

from ..utils.extmath import (make_nonnegative, norm, randomized_svd,
from ..utils.extmath import (make_nonnegative, randomized_svd,
safe_sparse_dot)

from ..utils.validation import assert_all_finite, check_array
Expand Down Expand Up @@ -202,7 +202,7 @@ class SpectralCoclustering(BaseSpectral):
'randomized' or 'arpack'. If 'randomized', use
:func:`sklearn.utils.extmath.randomized_svd`, which may be faster
for large matrices. If 'arpack', use
:func:`sklearn.utils.arpack.svds`, which is more accurate, but
:func:`scipy.sparse.linalg.svds`, which is more accurate, but
possibly slower in some cases.
n_svd_vecs : int, optional, default: None
Expand Down Expand Up @@ -334,7 +334,7 @@ class SpectralBiclustering(BaseSpectral):
'randomized' or 'arpack'. If 'randomized', uses
`sklearn.utils.extmath.randomized_svd`, which may be faster
for large matrices. If 'arpack', uses
`sklearn.utils.arpack.svds`, which is more accurate, but
`scipy.sparse.linalg.svds`, which is more accurate, but
possibly slower in some cases.
n_svd_vecs : int, optional, default: None
Expand Down
2 changes: 1 addition & 1 deletion sklearn/cluster/hierarchical.py
Expand Up @@ -12,13 +12,13 @@

import numpy as np
from scipy import sparse
from scipy.sparse.csgraph import connected_components

from ..base import BaseEstimator, ClusterMixin
from ..externals.joblib import Memory
from ..externals import six
from ..metrics.pairwise import paired_distances, pairwise_distances
from ..utils import check_array
from ..utils.sparsetools import connected_components

from . import _hierarchical
from ._feature_agglomeration import AgglomerationTransform
Expand Down
4 changes: 2 additions & 2 deletions sklearn/cluster/mean_shift_.py
Expand Up @@ -20,7 +20,7 @@
from collections import defaultdict
from ..externals import six
from ..utils.validation import check_is_fitted
from ..utils import extmath, check_random_state, gen_batches, check_array
from ..utils import check_random_state, gen_batches, check_array
from ..base import BaseEstimator, ClusterMixin
from ..neighbors import NearestNeighbors
from ..metrics.pairwise import pairwise_distances_argmin
Expand Down Expand Up @@ -96,7 +96,7 @@ def _mean_shift_single_seed(my_mean, X, nbrs, max_iter):
my_old_mean = my_mean # save the old mean
my_mean = np.mean(points_within, axis=0)
# If converged or at max_iter, adds the cluster
if (extmath.norm(my_mean - my_old_mean) < stop_thresh or
if (np.linalg.norm(my_mean - my_old_mean) < stop_thresh or
completed_iterations == max_iter):
return tuple(my_mean), len(points_within)
completed_iterations += 1
Expand Down
3 changes: 1 addition & 2 deletions sklearn/cluster/spectral.py
Expand Up @@ -12,7 +12,6 @@
from ..base import BaseEstimator, ClusterMixin
from ..utils import check_random_state, as_float_array
from ..utils.validation import check_array
from ..utils.extmath import norm
from ..metrics.pairwise import pairwise_kernels
from ..neighbors import kneighbors_graph
from ..manifold import spectral_embedding
Expand Down Expand Up @@ -90,7 +89,7 @@ def discretize(vectors, copy=True, max_svd_restarts=30, n_iter_max=20,
# search easier.
norm_ones = np.sqrt(n_samples)
for i in range(vectors.shape[1]):
vectors[:, i] = (vectors[:, i] / norm(vectors[:, i])) \
vectors[:, i] = (vectors[:, i] / np.linalg.norm(vectors[:, i])) \
* norm_ones
if vectors[0, i] != 0:
vectors[:, i] = -1 * vectors[:, i] * np.sign(vectors[0, i])
Expand Down
6 changes: 3 additions & 3 deletions sklearn/covariance/empirical_covariance_.py
Expand Up @@ -17,7 +17,7 @@

from ..base import BaseEstimator
from ..utils import check_array
from ..utils.extmath import fast_logdet, pinvh
from ..utils.extmath import fast_logdet


def log_likelihood(emp_cov, precision):
Expand Down Expand Up @@ -133,7 +133,7 @@ def _set_covariance(self, covariance):
self.covariance_ = covariance
# set precision
if self.store_precision:
self.precision_ = pinvh(covariance)
self.precision_ = linalg.pinvh(covariance)
else:
self.precision_ = None

Expand All @@ -149,7 +149,7 @@ def get_precision(self):
if self.store_precision:
precision = self.precision_
else:
precision = pinvh(self.covariance_)
precision = linalg.pinvh(self.covariance_)
return precision

def fit(self, X, y=None):
Expand Down
3 changes: 1 addition & 2 deletions sklearn/covariance/graph_lasso_.py
Expand Up @@ -17,7 +17,6 @@
log_likelihood)

from ..exceptions import ConvergenceWarning
from ..utils.extmath import pinvh
from ..utils.validation import check_random_state, check_array
from ..utils import deprecated
from ..linear_model import lars_path
Expand Down Expand Up @@ -191,7 +190,7 @@ def graph_lasso(emp_cov, alpha, cov_init=None, mode='cd', tol=1e-4,
covariance_ *= 0.95
diagonal = emp_cov.flat[::n_features + 1]
covariance_.flat[::n_features + 1] = diagonal
precision_ = pinvh(covariance_)
precision_ = linalg.pinvh(covariance_)

indices = np.arange(n_features)
costs = list()
Expand Down
12 changes: 6 additions & 6 deletions sklearn/covariance/robust_covariance.py
Expand Up @@ -14,7 +14,7 @@
from scipy.stats import chi2

from . import empirical_covariance, EmpiricalCovariance
from ..utils.extmath import fast_logdet, pinvh
from ..utils.extmath import fast_logdet
from ..utils import check_random_state, check_array


Expand Down Expand Up @@ -107,7 +107,7 @@ def _c_step(X, n_support, random_state, remaining_iterations=30,
location = initial_estimates[0]
covariance = initial_estimates[1]
# run a special iteration for that case (to get an initial support)
precision = pinvh(covariance)
precision = linalg.pinvh(covariance)
X_centered = X - location
dist = (np.dot(X_centered, precision) * X_centered).sum(1)
# compute new estimates
Expand All @@ -127,7 +127,7 @@ def _c_step(X, n_support, random_state, remaining_iterations=30,
previous_det = det
previous_support = support
# compute a new support from the full data set mahalanobis distances
precision = pinvh(covariance)
precision = linalg.pinvh(covariance)
X_centered = X - location
dist = (np.dot(X_centered, precision) * X_centered).sum(axis=1)
# compute new estimates
Expand Down Expand Up @@ -393,15 +393,15 @@ def fast_mcd(X, support_fraction=None,
covariance = np.asarray([[np.var(X[support])]])
location = np.array([location])
# get precision matrix in an optimized way
precision = pinvh(covariance)
precision = linalg.pinvh(covariance)
dist = (np.dot(X_centered, precision) * (X_centered)).sum(axis=1)
else:
support = np.ones(n_samples, dtype=bool)
covariance = np.asarray([[np.var(X)]])
location = np.asarray([np.mean(X)])
X_centered = X - location
# get precision matrix in an optimized way
precision = pinvh(covariance)
precision = linalg.pinvh(covariance)
dist = (np.dot(X_centered, precision) * (X_centered)).sum(axis=1)
# Starting FastMCD algorithm for p-dimensional case
if (n_samples > 500) and (n_features > 1):
Expand Down Expand Up @@ -629,7 +629,7 @@ def fit(self, X, y=None):
raw_covariance = self._nonrobust_covariance(X[raw_support],
assume_centered=True)
# get precision matrix in an optimized way
precision = pinvh(raw_covariance)
precision = linalg.pinvh(raw_covariance)
raw_dist = np.sum(np.dot(X, precision) * X, 1)
self.raw_location_ = raw_location
self.raw_covariance_ = raw_covariance
Expand Down

0 comments on commit bd0fc23

Please sign in to comment.