Skip to content

Commit

Permalink
[MRG+1] Drop NumPy < 1.8 (scikit-learn#8874)
Browse files Browse the repository at this point in the history
  • Loading branch information
naoyak authored and maskani-moh committed Nov 15, 2017
1 parent a28e484 commit 955dd8a
Show file tree
Hide file tree
Showing 56 changed files with 186 additions and 710 deletions.
18 changes: 9 additions & 9 deletions .travis.yml
Expand Up @@ -32,22 +32,22 @@ matrix:
- libatlas-dev
# This environment tests the oldest supported anaconda env
- env: DISTRIB="conda" PYTHON_VERSION="2.7" INSTALL_MKL="false"
NUMPY_VERSION="1.8.2" SCIPY_VERSION="0.13.3" CYTHON_VERSION="0.23.4"
NUMPY_VERSION="1.8.2" SCIPY_VERSION="0.13.3" CYTHON_VERSION="0.23.5"
COVERAGE=true
# This environment tests the newest supported Anaconda release (4.3.1)
# This environment tests the newest supported Anaconda release (4.4.0)
# It also runs tests requiring Pandas.
- env: DISTRIB="conda" PYTHON_VERSION="3.6" INSTALL_MKL="true"
NUMPY_VERSION="1.11.2" SCIPY_VERSION="0.18.1" PANDAS_VERSION="0.19.2"
- env: DISTRIB="conda" PYTHON_VERSION="3.6.1" INSTALL_MKL="true"
NUMPY_VERSION="1.12.1" SCIPY_VERSION="0.19.0" PANDAS_VERSION="0.20.1"
CYTHON_VERSION="0.25.2" COVERAGE=true
# This environment use pytest to run the tests. It uses the newest
# supported Anaconda release (4.3.1). It also runs tests requiring Pandas.
# - env: USE_PYTEST="true" DISTRIB="conda" PYTHON_VERSION="3.6"
# INSTALL_MKL="true" NUMPY_VERSION="1.11.2" SCIPY_VERSION="0.18.1"
# PANDAS_VERSION="0.19.2" CYTHON_VERSION="0.25.2"
# supported Anaconda release (4.4.0). It also runs tests requiring Pandas.
# - env: USE_PYTEST="true" DISTRIB="conda" PYTHON_VERSION="3.6.1"
# INSTALL_MKL="true" NUMPY_VERSION="1.12.1" SCIPY_VERSION="0.19.0"
# PANDAS_VERSION="0.20.1" CYTHON_VERSION="0.25.2"
# flake8 linting on diff wrt common ancestor with upstream/master
- env: RUN_FLAKE8="true" SKIP_TESTS="true"
DISTRIB="conda" PYTHON_VERSION="3.5" INSTALL_MKL="true"
NUMPY_VERSION="1.10.4" SCIPY_VERSION="0.17.0" CYTHON_VERSION="0.23.4"
NUMPY_VERSION="1.12.1" SCIPY_VERSION="0.19.0" CYTHON_VERSION="0.23.5"
# This environment tests scikit-learn against numpy and scipy master
# installed from their CI wheels in a virtualenv with the Python
# interpreter provided by travis.
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/bench_plot_nmf.py
Expand Up @@ -24,7 +24,7 @@
from sklearn.decomposition.nmf import INTEGER_TYPES, _check_init
from sklearn.externals.joblib import Memory
from sklearn.exceptions import ConvergenceWarning
from sklearn.utils.extmath import fast_dot, safe_sparse_dot, squared_norm
from sklearn.utils.extmath import safe_sparse_dot, squared_norm
from sklearn.utils import check_array
from sklearn.utils.validation import check_is_fitted, check_non_negative

Expand Down Expand Up @@ -99,7 +99,7 @@ def _nls_subproblem(X, W, H, tol, max_iter, alpha=0., l1_ratio=0.,
http://www.csie.ntu.edu.tw/~cjlin/nmf/
"""
WtX = safe_sparse_dot(W.T, X)
WtW = fast_dot(W.T, W)
WtW = np.dot(W.T, W)

# values justified in the paper (alpha is renamed gamma)
gamma = 1
Expand Down
33 changes: 0 additions & 33 deletions doc/developers/performance.rst
Expand Up @@ -84,38 +84,6 @@ C/C++ generated files are embedded in distributed stable packages. The goal is
to make it possible to install scikit-learn stable version
on any machine with Python, Numpy, Scipy and C/C++ compiler.

Fast matrix multiplications
===========================

Matrix multiplications (matrix-matrix and matrix-vector) are usually handled
using the NumPy function ``np.dot``, but in versions of NumPy before 1.7.2
this function is suboptimal when the inputs are not both in the C (row-major)
layout; in that case, the inputs may be implicitly copied to obtain the right
layout. This obviously consumes memory and takes time.

The function ``fast_dot`` in ``sklearn.utils.extmath`` offers a fast
replacement for ``np.dot`` that prevents copies from being made in some cases.
In all other cases, it dispatches to ``np.dot`` and when the NumPy version is
new enough, it is in fact an alias for that function, making it a drop-in
replacement. Example usage of ``fast_dot``::

>>> import numpy as np
>>> from sklearn.utils.extmath import fast_dot
>>> X = np.random.random_sample([2, 10])
>>> np.allclose(np.dot(X, X.T), fast_dot(X, X.T))
True

This function operates optimally on 2-dimensional arrays, both of the same
dtype, which should be either single or double precision float. If these
requirements aren't met or the BLAS package is not available, the call is
silently dispatched to ``numpy.dot``. If you want to be sure when the original
``numpy.dot`` has been invoked in a situation where it is suboptimal, you can
activate the related warning::

>>> import warnings
>>> from sklearn.exceptions import NonBLASDotWarning
>>> warnings.simplefilter('always', NonBLASDotWarning) # doctest: +SKIP

.. _profiling-python-code:

Profiling Python code
Expand Down Expand Up @@ -425,4 +393,3 @@ A sample algorithmic trick: warm restarts for cross validation

TODO: demonstrate the warm restart tricks for cross validation of linear
regression with Coordinate Descent.

31 changes: 31 additions & 0 deletions doc/whats_new.rst
Expand Up @@ -389,6 +389,37 @@ API changes summary
has been renamed to ``n_components`` and will be removed in version 0.21.
:issue:`8922` by :user:Attractadore

- SciPy >= 0.13.3 and NumPy >= 1.8.2 are now the minimum supported versions
for scikit-learn. The following backported functions in ``sklearn.utils``
have been removed or deprecated accordingly.
:issue:`8854` and :issue:`8874` by :user:`Naoya Kanai <naoyak>`

Removed in 0.19:

- ``utils.fixes.argpartition``
- ``utils.fixes.array_equal``
- ``utils.fixes.astype``
- ``utils.fixes.bincount``
- ``utils.fixes.expit``
- ``utils.fixes.frombuffer_empty``
- ``utils.fixes.in1d``
- ``utils.fixes.norm``
- ``utils.fixes.rankdata``
- ``utils.fixes.safe_copy``

Deprecated in 0.19, to be removed in 0.21:

- ``utils.arpack.eigs``
- ``utils.arpack.eigsh``
- ``utils.arpack.svds``
- ``utils.extmath.fast_dot``
- ``utils.extmath.logsumexp``
- ``utils.extmath.norm``
- ``utils.extmath.pinvh``
- ``utils.random.choice``
- ``utils.sparsetools.connected_components``
- ``utils.stats.rankdata``


.. _changes_0_18_1:

Expand Down
5 changes: 2 additions & 3 deletions sklearn/cluster/_k_means.pyx
Expand Up @@ -16,7 +16,6 @@ cimport cython
from cython cimport floating

from sklearn.utils.sparsefuncs_fast import assign_rows_csr
from sklearn.utils.fixes import bincount

ctypedef np.float64_t DOUBLE
ctypedef np.int32_t INT
Expand Down Expand Up @@ -307,7 +306,7 @@ def _centers_dense(np.ndarray[floating, ndim=2] X,
else:
centers = np.zeros((n_clusters, n_features), dtype=np.float64)

n_samples_in_cluster = bincount(labels, minlength=n_clusters)
n_samples_in_cluster = np.bincount(labels, minlength=n_clusters)
empty_clusters = np.where(n_samples_in_cluster == 0)[0]
# maybe also relocate small clusters?

Expand Down Expand Up @@ -367,7 +366,7 @@ def _centers_sparse(X, np.ndarray[INT, ndim=1] labels, n_clusters,
cdef np.ndarray[floating, ndim=2, mode="c"] centers
cdef np.ndarray[np.npy_intp, ndim=1] far_from_centers
cdef np.ndarray[np.npy_intp, ndim=1, mode="c"] n_samples_in_cluster = \
bincount(labels, minlength=n_clusters)
np.bincount(labels, minlength=n_clusters)
cdef np.ndarray[np.npy_intp, ndim=1, mode="c"] empty_clusters = \
np.where(n_samples_in_cluster == 0)[0]
cdef int n_empty_clusters = empty_clusters.shape[0]
Expand Down
3 changes: 1 addition & 2 deletions sklearn/cluster/_k_means_elkan.pyx
Expand Up @@ -16,7 +16,6 @@ from libc.math cimport sqrt

from ..metrics import euclidean_distances
from ._k_means import _centers_dense
from ..utils.fixes import partition


cdef floating euclidian_dist(floating* a, floating* b, int n_features) nogil:
Expand Down Expand Up @@ -169,7 +168,7 @@ def k_means_elkan(np.ndarray[floating, ndim=2, mode='c'] X_, int n_clusters,
print("start iteration")

cd = np.asarray(center_half_distances)
distance_next_center = partition(cd, kth=1, axis=0)[1]
distance_next_center = np.partition(cd, kth=1, axis=0)[1]

if verbose:
print("done sorting")
Expand Down
3 changes: 1 addition & 2 deletions sklearn/cluster/dbscan_.py
Expand Up @@ -14,7 +14,6 @@

from ..base import BaseEstimator, ClusterMixin
from ..utils import check_array, check_consistent_length
from ..utils.fixes import astype
from ..neighbors import NearestNeighbors

from ._dbscan_inner import dbscan_inner
Expand Down Expand Up @@ -123,7 +122,7 @@ def dbscan(X, eps=0.5, min_samples=5, metric='minkowski', metric_params=None,
neighborhoods = np.empty(X.shape[0], dtype=object)
X.sum_duplicates() # XXX: modifies X's internals in-place
X_mask = X.data <= eps
masked_indices = astype(X.indices, np.intp, copy=False)[X_mask]
masked_indices = X.indices.astype(np.intp, copy=False)[X_mask]
masked_indptr = np.concatenate(([0], np.cumsum(X_mask)))[X.indptr[1:]]

# insert the diagonal: a point is its own neighbor, but 0 distance
Expand Down
11 changes: 4 additions & 7 deletions sklearn/cluster/k_means_.py
Expand Up @@ -22,14 +22,12 @@
from ..utils.extmath import row_norms, squared_norm, stable_cumsum
from ..utils.sparsefuncs_fast import assign_rows_csr
from ..utils.sparsefuncs import mean_variance_axis
from ..utils.fixes import astype
from ..utils import check_array
from ..utils import check_random_state
from ..utils import as_float_array
from ..utils import gen_batches
from ..utils.validation import check_is_fitted
from ..utils.validation import FLOAT_DTYPES
from ..utils.random import choice
from ..externals.joblib import Parallel
from ..externals.joblib import delayed
from ..externals.six import string_types
Expand Down Expand Up @@ -1062,16 +1060,15 @@ def _mini_batch_step(X, x_squared_norms, centers, counts,
n_reassigns = to_reassign.sum()
if n_reassigns:
# Pick new clusters amongst observations with uniform probability
new_centers = choice(X.shape[0], replace=False, size=n_reassigns,
random_state=random_state)
new_centers = random_state.choice(X.shape[0], replace=False,
size=n_reassigns)
if verbose:
print("[MiniBatchKMeans] Reassigning %i cluster centers."
% n_reassigns)

if sp.issparse(X) and not sp.issparse(centers):
assign_rows_csr(X,
astype(new_centers, np.intp),
astype(np.where(to_reassign)[0], np.intp),
assign_rows_csr(X, new_centers.astype(np.intp),
np.where(to_reassign)[0].astype(np.intp),
centers)
else:
centers[to_reassign] = X[new_centers]
Expand Down
10 changes: 4 additions & 6 deletions sklearn/cross_validation.py
Expand Up @@ -26,12 +26,10 @@
from .utils.validation import (_is_arraylike, _num_samples,
column_or_1d)
from .utils.multiclass import type_of_target
from .utils.random import choice
from .externals.joblib import Parallel, delayed, logger
from .externals.six import with_metaclass
from .externals.six.moves import zip
from .metrics.scorer import check_scoring
from .utils.fixes import bincount
from .gaussian_process.kernels import Kernel as GPKernel
from .exceptions import FitFailedWarning

Expand Down Expand Up @@ -541,7 +539,7 @@ def __init__(self, y, n_folds=3, shuffle=False,
y = np.asarray(y)
n_samples = y.shape[0]
unique_labels, y_inversed = np.unique(y, return_inverse=True)
label_counts = bincount(y_inversed)
label_counts = np.bincount(y_inversed)
min_labels = np.min(label_counts)
if np.all(self.n_folds > label_counts):
raise ValueError("All the n_labels for individual classes"
Expand Down Expand Up @@ -990,7 +988,7 @@ def _approximate_mode(class_counts, n_draws, rng):
# if we need to add more, we add them all and
# go to the next value
add_now = min(len(inds), need_to_add)
inds = choice(inds, size=add_now, replace=False, random_state=rng)
inds = rng.choice(inds, size=add_now, replace=False)
floored[inds] += 1
need_to_add -= add_now
if need_to_add == 0:
Expand Down Expand Up @@ -1072,7 +1070,7 @@ def __init__(self, y, n_iter=10, test_size=0.1, train_size=None,
self.classes, self.y_indices = np.unique(y, return_inverse=True)
n_cls = self.classes.shape[0]

if np.min(bincount(self.y_indices)) < 2:
if np.min(np.bincount(self.y_indices)) < 2:
raise ValueError("The least populated class in y has only 1"
" member, which is too few. The minimum"
" number of labels for any class cannot"
Expand All @@ -1089,7 +1087,7 @@ def __init__(self, y, n_iter=10, test_size=0.1, train_size=None,

def _iter_indices(self):
rng = check_random_state(self.random_state)
cls_count = bincount(self.y_indices)
cls_count = np.bincount(self.y_indices)

for n in range(self.n_iter):
# if there are ties in the class-counts, we want
Expand Down
7 changes: 3 additions & 4 deletions sklearn/datasets/samples_generator.py
Expand Up @@ -15,7 +15,6 @@
from ..preprocessing import MultiLabelBinarizer
from ..utils import check_array, check_random_state
from ..utils import shuffle as util_shuffle
from ..utils.fixes import astype
from ..utils.random import sample_without_replacement
from ..externals import six
map = six.moves.map
Expand All @@ -28,9 +27,9 @@ def _generate_hypercube(samples, dimensions, rng):
if dimensions > 30:
return np.hstack([_generate_hypercube(samples, dimensions - 30, rng),
_generate_hypercube(samples, 30, rng)])
out = astype(sample_without_replacement(2 ** dimensions, samples,
random_state=rng),
dtype='>u4', copy=False)
out = sample_without_replacement(2 ** dimensions, samples,
random_state=rng).astype(dtype='>u4',
copy=False)
out = np.unpackbits(out.view('>u1')).reshape((-1, 32))[:, -dimensions:]
return out

Expand Down
9 changes: 4 additions & 5 deletions sklearn/datasets/svmlight_format.py
Expand Up @@ -28,7 +28,6 @@
from ..externals.six import u, b
from ..externals.six.moves import range, zip
from ..utils import check_array
from ..utils.fixes import frombuffer_empty


def load_svmlight_file(f, n_features=None, dtype=np.float64,
Expand Down Expand Up @@ -162,11 +161,11 @@ def _open_and_load(f, dtype, multilabel, zero_based, query_id):

# convert from array.array, give data the right dtype
if not multilabel:
labels = frombuffer_empty(labels, np.float64)
data = frombuffer_empty(data, actual_dtype)
indices = frombuffer_empty(ind, np.intc)
labels = np.frombuffer(labels, np.float64)
data = np.frombuffer(data, actual_dtype)
indices = np.frombuffer(ind, np.intc)
indptr = np.frombuffer(indptr, dtype=np.intc) # never empty
query = frombuffer_empty(query, np.int64)
query = np.frombuffer(query, np.int64)

data = np.asarray(data, dtype=dtype) # no-op for float{32,64}
return data, indices, indptr, labels, query
Expand Down
7 changes: 3 additions & 4 deletions sklearn/decomposition/base.py
Expand Up @@ -13,7 +13,6 @@

from ..base import BaseEstimator, TransformerMixin
from ..utils import check_array
from ..utils.extmath import fast_dot
from ..utils.validation import check_is_fitted
from ..externals import six
from abc import ABCMeta, abstractmethod
Expand Down Expand Up @@ -130,7 +129,7 @@ def transform(self, X, y=None):
X = check_array(X)
if self.mean_ is not None:
X = X - self.mean_
X_transformed = fast_dot(X, self.components_.T)
X_transformed = np.dot(X, self.components_.T)
if self.whiten:
X_transformed /= np.sqrt(self.explained_variance_)
return X_transformed
Expand All @@ -156,7 +155,7 @@ def inverse_transform(self, X, y=None):
exact inverse operation, which includes reversing whitening.
"""
if self.whiten:
return fast_dot(X, np.sqrt(self.explained_variance_[:, np.newaxis]) *
return np.dot(X, np.sqrt(self.explained_variance_[:, np.newaxis]) *
self.components_) + self.mean_
else:
return fast_dot(X, self.components_) + self.mean_
return np.dot(X, self.components_) + self.mean_
6 changes: 3 additions & 3 deletions sklearn/decomposition/factor_analysis.py
Expand Up @@ -28,7 +28,7 @@
from ..base import BaseEstimator, TransformerMixin
from ..externals.six.moves import xrange
from ..utils import check_array, check_random_state
from ..utils.extmath import fast_logdet, fast_dot, randomized_svd, squared_norm
from ..utils.extmath import fast_logdet, randomized_svd, squared_norm
from ..utils.validation import check_is_fitted
from ..exceptions import ConvergenceWarning

Expand Down Expand Up @@ -256,8 +256,8 @@ def transform(self, X):

Wpsi = self.components_ / self.noise_variance_
cov_z = linalg.inv(Ih + np.dot(Wpsi, self.components_.T))
tmp = fast_dot(X_transformed, Wpsi.T)
X_transformed = fast_dot(tmp, cov_z)
tmp = np.dot(X_transformed, Wpsi.T)
X_transformed = np.dot(tmp, cov_z)

return X_transformed

Expand Down

0 comments on commit 955dd8a

Please sign in to comment.