[MRG+1] Drop NumPy < 1.8 (scikit-learn#8874)

maskani-moh · Nov 15, 2017 · 955dd8a · 955dd8a
1 parent a28e484
commit 955dd8a
Show file tree

Hide file tree

Showing 56 changed files with 186 additions and 710 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -32,22 +32,22 @@ matrix:
             - libatlas-dev
     # This environment tests the oldest supported anaconda env
     - env: DISTRIB="conda" PYTHON_VERSION="2.7" INSTALL_MKL="false"
-           NUMPY_VERSION="1.8.2" SCIPY_VERSION="0.13.3" CYTHON_VERSION="0.23.4"
+           NUMPY_VERSION="1.8.2" SCIPY_VERSION="0.13.3" CYTHON_VERSION="0.23.5"
            COVERAGE=true
-    # This environment tests the newest supported Anaconda release (4.3.1)
+    # This environment tests the newest supported Anaconda release (4.4.0)
     # It also runs tests requiring Pandas.
-    - env: DISTRIB="conda" PYTHON_VERSION="3.6" INSTALL_MKL="true"
-           NUMPY_VERSION="1.11.2" SCIPY_VERSION="0.18.1" PANDAS_VERSION="0.19.2"
+    - env: DISTRIB="conda" PYTHON_VERSION="3.6.1" INSTALL_MKL="true"
+           NUMPY_VERSION="1.12.1" SCIPY_VERSION="0.19.0" PANDAS_VERSION="0.20.1"
            CYTHON_VERSION="0.25.2" COVERAGE=true
     # This environment use pytest to run the tests. It uses the newest
-    # supported Anaconda release (4.3.1). It also runs tests requiring Pandas.
-    # - env: USE_PYTEST="true" DISTRIB="conda" PYTHON_VERSION="3.6"
-    #        INSTALL_MKL="true" NUMPY_VERSION="1.11.2" SCIPY_VERSION="0.18.1"
-    #        PANDAS_VERSION="0.19.2" CYTHON_VERSION="0.25.2"
+    # supported Anaconda release (4.4.0). It also runs tests requiring Pandas.
+    # - env: USE_PYTEST="true" DISTRIB="conda" PYTHON_VERSION="3.6.1"
+    #        INSTALL_MKL="true" NUMPY_VERSION="1.12.1" SCIPY_VERSION="0.19.0"
+    #        PANDAS_VERSION="0.20.1" CYTHON_VERSION="0.25.2"
     # flake8 linting on diff wrt common ancestor with upstream/master
     - env: RUN_FLAKE8="true" SKIP_TESTS="true"
            DISTRIB="conda" PYTHON_VERSION="3.5" INSTALL_MKL="true"
-           NUMPY_VERSION="1.10.4" SCIPY_VERSION="0.17.0" CYTHON_VERSION="0.23.4"
+           NUMPY_VERSION="1.12.1" SCIPY_VERSION="0.19.0" CYTHON_VERSION="0.23.5"
     # This environment tests scikit-learn against numpy and scipy master
     # installed from their CI wheels in a virtualenv with the Python
     # interpreter provided by travis.

diff --git a/benchmarks/bench_plot_nmf.py b/benchmarks/bench_plot_nmf.py
@@ -24,7 +24,7 @@
 from sklearn.decomposition.nmf import INTEGER_TYPES, _check_init
 from sklearn.externals.joblib import Memory
 from sklearn.exceptions import ConvergenceWarning
-from sklearn.utils.extmath import fast_dot, safe_sparse_dot, squared_norm
+from sklearn.utils.extmath import safe_sparse_dot, squared_norm
 from sklearn.utils import check_array
 from sklearn.utils.validation import check_is_fitted, check_non_negative
 
@@ -99,7 +99,7 @@ def _nls_subproblem(X, W, H, tol, max_iter, alpha=0., l1_ratio=0.,
     http://www.csie.ntu.edu.tw/~cjlin/nmf/
     """
     WtX = safe_sparse_dot(W.T, X)
-    WtW = fast_dot(W.T, W)
+    WtW = np.dot(W.T, W)
 
     # values justified in the paper (alpha is renamed gamma)
     gamma = 1

diff --git a/doc/developers/performance.rst b/doc/developers/performance.rst
@@ -84,38 +84,6 @@ C/C++ generated files are embedded in distributed stable packages. The goal is
 to make it possible to install scikit-learn stable version
 on any machine with Python, Numpy, Scipy and C/C++ compiler.
 
-Fast matrix multiplications
-===========================
-
-Matrix multiplications (matrix-matrix and matrix-vector) are usually handled
-using the NumPy function ``np.dot``, but in versions of NumPy before 1.7.2
-this function is suboptimal when the inputs are not both in the C (row-major)
-layout; in that case, the inputs may be implicitly copied to obtain the right
-layout. This obviously consumes memory and takes time.
-
-The function ``fast_dot`` in ``sklearn.utils.extmath`` offers a fast
-replacement for ``np.dot`` that prevents copies from being made in some cases.
-In all other cases, it dispatches to ``np.dot`` and when the NumPy version is
-new enough, it is in fact an alias for that function, making it a drop-in
-replacement. Example usage of ``fast_dot``::
-
-  >>> import numpy as np
-  >>> from sklearn.utils.extmath import fast_dot
-  >>> X = np.random.random_sample([2, 10])
-  >>> np.allclose(np.dot(X, X.T), fast_dot(X, X.T))
-  True
-
-This function operates optimally on 2-dimensional arrays, both of the same
-dtype, which should be either single or double precision float. If these
-requirements aren't met or the BLAS package is not available, the call is
-silently dispatched to ``numpy.dot``. If you want to be sure when the original
-``numpy.dot`` has been invoked in a situation where it is suboptimal, you can
-activate the related warning::
-
-  >>> import warnings
-  >>> from sklearn.exceptions import NonBLASDotWarning
-  >>> warnings.simplefilter('always', NonBLASDotWarning) # doctest: +SKIP
-
 .. _profiling-python-code:
 
 Profiling Python code
@@ -425,4 +393,3 @@ A sample algorithmic trick: warm restarts for cross validation
 
 TODO: demonstrate the warm restart tricks for cross validation of linear
 regression with Coordinate Descent.
-
diff --git a/doc/whats_new.rst b/doc/whats_new.rst
@@ -389,6 +389,37 @@ API changes summary
      has been renamed to ``n_components`` and will be removed in version 0.21.
      :issue:`8922` by :user:Attractadore
 
+   - SciPy >= 0.13.3 and NumPy >= 1.8.2 are now the minimum supported versions
+     for scikit-learn. The following backported functions in ``sklearn.utils``
+     have been removed or deprecated accordingly.
+     :issue:`8854` and :issue:`8874` by :user:`Naoya Kanai <naoyak>`
+
+     Removed in 0.19:
+
+     - ``utils.fixes.argpartition``
+     - ``utils.fixes.array_equal``
+     - ``utils.fixes.astype``
+     - ``utils.fixes.bincount``
+     - ``utils.fixes.expit``
+     - ``utils.fixes.frombuffer_empty``
+     - ``utils.fixes.in1d``
+     - ``utils.fixes.norm``
+     - ``utils.fixes.rankdata``
+     - ``utils.fixes.safe_copy``
+
+     Deprecated in 0.19, to be removed in 0.21:
+
+     - ``utils.arpack.eigs``
+     - ``utils.arpack.eigsh``
+     - ``utils.arpack.svds``
+     - ``utils.extmath.fast_dot``
+     - ``utils.extmath.logsumexp``
+     - ``utils.extmath.norm``
+     - ``utils.extmath.pinvh``
+     - ``utils.random.choice``
+     - ``utils.sparsetools.connected_components``
+     - ``utils.stats.rankdata``
+
 
 .. _changes_0_18_1:
 

diff --git a/sklearn/cluster/_k_means.pyx b/sklearn/cluster/_k_means.pyx
@@ -16,7 +16,6 @@ cimport cython
 from cython cimport floating
 
 from sklearn.utils.sparsefuncs_fast import assign_rows_csr
-from sklearn.utils.fixes import bincount
 
 ctypedef np.float64_t DOUBLE
 ctypedef np.int32_t INT
@@ -307,7 +306,7 @@ def _centers_dense(np.ndarray[floating, ndim=2] X,
     else:
         centers = np.zeros((n_clusters, n_features), dtype=np.float64)
 
-    n_samples_in_cluster = bincount(labels, minlength=n_clusters)
+    n_samples_in_cluster = np.bincount(labels, minlength=n_clusters)
     empty_clusters = np.where(n_samples_in_cluster == 0)[0]
     # maybe also relocate small clusters?
 
@@ -367,7 +366,7 @@ def _centers_sparse(X, np.ndarray[INT, ndim=1] labels, n_clusters,
     cdef np.ndarray[floating, ndim=2, mode="c"] centers
     cdef np.ndarray[np.npy_intp, ndim=1] far_from_centers
     cdef np.ndarray[np.npy_intp, ndim=1, mode="c"] n_samples_in_cluster = \
-        bincount(labels, minlength=n_clusters)
+        np.bincount(labels, minlength=n_clusters)
     cdef np.ndarray[np.npy_intp, ndim=1, mode="c"] empty_clusters = \
         np.where(n_samples_in_cluster == 0)[0]
     cdef int n_empty_clusters = empty_clusters.shape[0]

diff --git a/sklearn/cluster/_k_means_elkan.pyx b/sklearn/cluster/_k_means_elkan.pyx
@@ -16,7 +16,6 @@ from libc.math cimport sqrt
 
 from ..metrics import euclidean_distances
 from ._k_means import _centers_dense
-from ..utils.fixes import partition
 
 
 cdef floating euclidian_dist(floating* a, floating* b, int n_features) nogil:
@@ -169,7 +168,7 @@ def k_means_elkan(np.ndarray[floating, ndim=2, mode='c'] X_, int n_clusters,
             print("start iteration")
 
         cd =  np.asarray(center_half_distances)
-        distance_next_center = partition(cd, kth=1, axis=0)[1]
+        distance_next_center = np.partition(cd, kth=1, axis=0)[1]
 
         if verbose:
             print("done sorting")

diff --git a/sklearn/cluster/dbscan_.py b/sklearn/cluster/dbscan_.py
@@ -14,7 +14,6 @@
 
 from ..base import BaseEstimator, ClusterMixin
 from ..utils import check_array, check_consistent_length
-from ..utils.fixes import astype
 from ..neighbors import NearestNeighbors
 
 from ._dbscan_inner import dbscan_inner
@@ -123,7 +122,7 @@ def dbscan(X, eps=0.5, min_samples=5, metric='minkowski', metric_params=None,
         neighborhoods = np.empty(X.shape[0], dtype=object)
         X.sum_duplicates()  # XXX: modifies X's internals in-place
         X_mask = X.data <= eps
-        masked_indices = astype(X.indices, np.intp, copy=False)[X_mask]
+        masked_indices = X.indices.astype(np.intp, copy=False)[X_mask]
         masked_indptr = np.concatenate(([0], np.cumsum(X_mask)))[X.indptr[1:]]
 
         # insert the diagonal: a point is its own neighbor, but 0 distance

diff --git a/sklearn/cluster/k_means_.py b/sklearn/cluster/k_means_.py
@@ -22,14 +22,12 @@
 from ..utils.extmath import row_norms, squared_norm, stable_cumsum
 from ..utils.sparsefuncs_fast import assign_rows_csr
 from ..utils.sparsefuncs import mean_variance_axis
-from ..utils.fixes import astype
 from ..utils import check_array
 from ..utils import check_random_state
 from ..utils import as_float_array
 from ..utils import gen_batches
 from ..utils.validation import check_is_fitted
 from ..utils.validation import FLOAT_DTYPES
-from ..utils.random import choice
 from ..externals.joblib import Parallel
 from ..externals.joblib import delayed
 from ..externals.six import string_types
@@ -1062,16 +1060,15 @@ def _mini_batch_step(X, x_squared_norms, centers, counts,
         n_reassigns = to_reassign.sum()
         if n_reassigns:
             # Pick new clusters amongst observations with uniform probability
-            new_centers = choice(X.shape[0], replace=False, size=n_reassigns,
-                                 random_state=random_state)
+            new_centers = random_state.choice(X.shape[0], replace=False,
+                                              size=n_reassigns)
             if verbose:
                 print("[MiniBatchKMeans] Reassigning %i cluster centers."
                       % n_reassigns)
 
             if sp.issparse(X) and not sp.issparse(centers):
-                assign_rows_csr(X,
-                                astype(new_centers, np.intp),
-                                astype(np.where(to_reassign)[0], np.intp),
+                assign_rows_csr(X, new_centers.astype(np.intp),
+                                np.where(to_reassign)[0].astype(np.intp),
                                 centers)
             else:
                 centers[to_reassign] = X[new_centers]

diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py
@@ -26,12 +26,10 @@
 from .utils.validation import (_is_arraylike, _num_samples,
                                column_or_1d)
 from .utils.multiclass import type_of_target
-from .utils.random import choice
 from .externals.joblib import Parallel, delayed, logger
 from .externals.six import with_metaclass
 from .externals.six.moves import zip
 from .metrics.scorer import check_scoring
-from .utils.fixes import bincount
 from .gaussian_process.kernels import Kernel as GPKernel
 from .exceptions import FitFailedWarning
 
@@ -541,7 +539,7 @@ def __init__(self, y, n_folds=3, shuffle=False,
         y = np.asarray(y)
         n_samples = y.shape[0]
         unique_labels, y_inversed = np.unique(y, return_inverse=True)
-        label_counts = bincount(y_inversed)
+        label_counts = np.bincount(y_inversed)
         min_labels = np.min(label_counts)
         if np.all(self.n_folds > label_counts):
             raise ValueError("All the n_labels for individual classes"
@@ -990,7 +988,7 @@ def _approximate_mode(class_counts, n_draws, rng):
             # if we need to add more, we add them all and
             # go to the next value
             add_now = min(len(inds), need_to_add)
-            inds = choice(inds, size=add_now, replace=False, random_state=rng)
+            inds = rng.choice(inds, size=add_now, replace=False)
             floored[inds] += 1
             need_to_add -= add_now
             if need_to_add == 0:
@@ -1072,7 +1070,7 @@ def __init__(self, y, n_iter=10, test_size=0.1, train_size=None,
         self.classes, self.y_indices = np.unique(y, return_inverse=True)
         n_cls = self.classes.shape[0]
 
-        if np.min(bincount(self.y_indices)) < 2:
+        if np.min(np.bincount(self.y_indices)) < 2:
             raise ValueError("The least populated class in y has only 1"
                              " member, which is too few. The minimum"
                              " number of labels for any class cannot"
@@ -1089,7 +1087,7 @@ def __init__(self, y, n_iter=10, test_size=0.1, train_size=None,
 
     def _iter_indices(self):
         rng = check_random_state(self.random_state)
-        cls_count = bincount(self.y_indices)
+        cls_count = np.bincount(self.y_indices)
 
         for n in range(self.n_iter):
             # if there are ties in the class-counts, we want

diff --git a/sklearn/datasets/samples_generator.py b/sklearn/datasets/samples_generator.py
@@ -15,7 +15,6 @@
 from ..preprocessing import MultiLabelBinarizer
 from ..utils import check_array, check_random_state
 from ..utils import shuffle as util_shuffle
-from ..utils.fixes import astype
 from ..utils.random import sample_without_replacement
 from ..externals import six
 map = six.moves.map
@@ -28,9 +27,9 @@ def _generate_hypercube(samples, dimensions, rng):
     if dimensions > 30:
         return np.hstack([_generate_hypercube(samples, dimensions - 30, rng),
                           _generate_hypercube(samples, 30, rng)])
-    out = astype(sample_without_replacement(2 ** dimensions, samples,
-                                            random_state=rng),
-                 dtype='>u4', copy=False)
+    out = sample_without_replacement(2 ** dimensions, samples,
+                                     random_state=rng).astype(dtype='>u4',
+                                                              copy=False)
     out = np.unpackbits(out.view('>u1')).reshape((-1, 32))[:, -dimensions:]
     return out
 

diff --git a/sklearn/datasets/svmlight_format.py b/sklearn/datasets/svmlight_format.py
@@ -28,7 +28,6 @@
 from ..externals.six import u, b
 from ..externals.six.moves import range, zip
 from ..utils import check_array
-from ..utils.fixes import frombuffer_empty
 
 
 def load_svmlight_file(f, n_features=None, dtype=np.float64,
@@ -162,11 +161,11 @@ def _open_and_load(f, dtype, multilabel, zero_based, query_id):
 
     # convert from array.array, give data the right dtype
     if not multilabel:
-        labels = frombuffer_empty(labels, np.float64)
-    data = frombuffer_empty(data, actual_dtype)
-    indices = frombuffer_empty(ind, np.intc)
+        labels = np.frombuffer(labels, np.float64)
+    data = np.frombuffer(data, actual_dtype)
+    indices = np.frombuffer(ind, np.intc)
     indptr = np.frombuffer(indptr, dtype=np.intc)   # never empty
-    query = frombuffer_empty(query, np.int64)
+    query = np.frombuffer(query, np.int64)
 
     data = np.asarray(data, dtype=dtype)    # no-op for float{32,64}
     return data, indices, indptr, labels, query

diff --git a/sklearn/decomposition/base.py b/sklearn/decomposition/base.py
@@ -13,7 +13,6 @@
 
 from ..base import BaseEstimator, TransformerMixin
 from ..utils import check_array
-from ..utils.extmath import fast_dot
 from ..utils.validation import check_is_fitted
 from ..externals import six
 from abc import ABCMeta, abstractmethod
@@ -130,7 +129,7 @@ def transform(self, X, y=None):
         X = check_array(X)
         if self.mean_ is not None:
             X = X - self.mean_
-        X_transformed = fast_dot(X, self.components_.T)
+        X_transformed = np.dot(X, self.components_.T)
         if self.whiten:
             X_transformed /= np.sqrt(self.explained_variance_)
         return X_transformed
@@ -156,7 +155,7 @@ def inverse_transform(self, X, y=None):
         exact inverse operation, which includes reversing whitening.
         """
         if self.whiten:
-            return fast_dot(X, np.sqrt(self.explained_variance_[:, np.newaxis]) *
+            return np.dot(X, np.sqrt(self.explained_variance_[:, np.newaxis]) *
                             self.components_) + self.mean_
         else:
-            return fast_dot(X, self.components_) + self.mean_
+            return np.dot(X, self.components_) + self.mean_
diff --git a/sklearn/decomposition/factor_analysis.py b/sklearn/decomposition/factor_analysis.py
@@ -28,7 +28,7 @@
 from ..base import BaseEstimator, TransformerMixin
 from ..externals.six.moves import xrange
 from ..utils import check_array, check_random_state
-from ..utils.extmath import fast_logdet, fast_dot, randomized_svd, squared_norm
+from ..utils.extmath import fast_logdet, randomized_svd, squared_norm
 from ..utils.validation import check_is_fitted
 from ..exceptions import ConvergenceWarning
 
@@ -256,8 +256,8 @@ def transform(self, X):
 
         Wpsi = self.components_ / self.noise_variance_
         cov_z = linalg.inv(Ih + np.dot(Wpsi, self.components_.T))
-        tmp = fast_dot(X_transformed, Wpsi.T)
-        X_transformed = fast_dot(tmp, cov_z)
+        tmp = np.dot(X_transformed, Wpsi.T)
+        X_transformed = np.dot(tmp, cov_z)
 
         return X_transformed