API kwonly for utils (#17046)

* kwonly for utils * More * fixed some * some more * iwannagohomepls * accept_sparse not kwonly anymore
scikit-learn · Apr 27, 2020 · 76ef8b0 · 76ef8b0
1 parent f624f4e
commit 76ef8b0
Show file tree

Hide file tree

Showing 15 changed files with 118 additions and 93 deletions.
diff --git a/sklearn/ensemble/_forest.py b/sklearn/ensemble/_forest.py
@@ -159,9 +159,11 @@ def _parallel_build_trees(tree, forest, X, y, sample_weight, tree_idx, n_trees,
         if class_weight == 'subsample':
             with catch_warnings():
                 simplefilter('ignore', DeprecationWarning)
-                curr_sample_weight *= compute_sample_weight('auto', y, indices)
+                curr_sample_weight *= compute_sample_weight('auto', y,
+                                                            indices=indices)
         elif class_weight == 'balanced_subsample':
-            curr_sample_weight *= compute_sample_weight('balanced', y, indices)
+            curr_sample_weight *= compute_sample_weight('balanced', y,
+                                                        indices=indices)
 
         tree.fit(X, y, sample_weight=curr_sample_weight, check_input=False)
     else:

diff --git a/sklearn/feature_selection/_univariate_selection.py b/sklearn/feature_selection/_univariate_selection.py
@@ -146,7 +146,7 @@ def f_classif(X, y):
     chi2: Chi-squared stats of non-negative features for classification tasks.
     f_regression: F-value between label/feature for regression tasks.
     """
-    X, y = check_X_y(X, y, ['csr', 'csc', 'coo'])
+    X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'])
     args = [X[safe_mask(X, y == k)] for k in np.unique(y)]
     return f_oneway(*args)
 
@@ -277,7 +277,8 @@ def f_regression(X, y, center=True):
     SelectPercentile: Select features based on percentile of the highest
         scores.
     """
-    X, y = check_X_y(X, y, ['csr', 'csc', 'coo'], dtype=np.float64)
+    X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'],
+                     dtype=np.float64)
     n_samples = X.shape[0]
 
     # compute centered values

diff --git a/sklearn/feature_selection/tests/test_base.py b/sklearn/feature_selection/tests/test_base.py
@@ -15,7 +15,7 @@ def __init__(self, step=2):
         self.step = step
 
     def fit(self, X, y=None):
-        X = check_array(X, 'csc')
+        X = check_array(X, accept_sparse='csc')
         self.n_input_feats = X.shape[1]
         return self
 

diff --git a/sklearn/linear_model/_coordinate_descent.py b/sklearn/linear_model/_coordinate_descent.py
@@ -131,7 +131,7 @@ def _alpha_grid(X, y, Xy=None, l1_ratio=1.0, fit_intercept=True,
     if Xy is None:
         X_sparse = sparse.isspmatrix(X)
         sparse_center = X_sparse and (fit_intercept or normalize)
-        X = check_array(X, 'csc',
+        X = check_array(X, accept_sparse='csc',
                         copy=(copy_X and fit_intercept and not X_sparse))
         if not X_sparse:
             # X can be touched inplace thanks to the above line
@@ -435,10 +435,10 @@ def enet_path(X, y, l1_ratio=0.5, eps=1e-3, n_alphas=100, alphas=None,
     # We expect X and y to be already Fortran ordered when bypassing
     # checks
     if check_input:
-        X = check_array(X, 'csc', dtype=[np.float64, np.float32],
+        X = check_array(X, accept_sparse='csc', dtype=[np.float64, np.float32],
                         order='F', copy=copy_X)
-        y = check_array(y, 'csc', dtype=X.dtype.type, order='F', copy=False,
-                        ensure_2d=False)
+        y = check_array(y, accept_sparse='csc', dtype=X.dtype.type,
+                        order='F', copy=False, ensure_2d=False)
         if Xy is not None:
             # Xy should be a 1d contiguous array or a 2D C ordered array
             Xy = check_array(Xy, dtype=X.dtype.type, order='C', copy=False,
@@ -1095,7 +1095,8 @@ def _path_residuals(X, y, train, test, path, path_params, alphas=None,
 
     # Do the ordering and type casting here, as if it is done in the path,
     # X is copied and a reference is kept here
-    X_train = check_array(X_train, 'csc', dtype=dtype, order=X_order)
+    X_train = check_array(X_train, accept_sparse='csc', dtype=dtype,
+                          order=X_order)
     alphas, coefs, _ = path(X_train, y_train, **path_params)
     del X_train, y_train
 

diff --git a/sklearn/linear_model/_stochastic_gradient.py b/sklearn/linear_model/_stochastic_gradient.py
@@ -487,8 +487,8 @@ def _partial_fit(self, X, y, alpha, C,
                      loss, learning_rate, max_iter,
                      classes, sample_weight,
                      coef_init, intercept_init):
-        X, y = check_X_y(X, y, 'csr', dtype=np.float64, order="C",
-                         accept_large_sparse=False)
+        X, y = check_X_y(X, y, accept_sparse='csr', dtype=np.float64,
+                         order="C", accept_large_sparse=False)
 
         n_samples, n_features = X.shape
 

diff --git a/sklearn/manifold/_spectral_embedding.py b/sklearn/manifold/_spectral_embedding.py
@@ -301,7 +301,8 @@ def spectral_embedding(adjacency, *, n_components=8, eigen_solver=None,
         # matrix to the solver and afterward set it back to the original.
         diag_shift = 1e-5 * sparse.eye(laplacian.shape[0])
         laplacian += diag_shift
-        ml = smoothed_aggregation_solver(check_array(laplacian, 'csr'))
+        ml = smoothed_aggregation_solver(check_array(laplacian,
+                                                     accept_sparse='csr'))
         laplacian -= diag_shift
 
         M = ml.aspreconditioner()

diff --git a/sklearn/neighbors/_nca.py b/sklearn/neighbors/_nca.py
@@ -308,7 +308,7 @@ def _validate_params(self, X, y):
         # Check the preferred dimensionality of the projected space
         if self.n_components is not None:
             check_scalar(
-                self.n_components, 'n_components', numbers.Integral, 1)
+                self.n_components, 'n_components', numbers.Integral, min_val=1)
 
             if self.n_components > X.shape[1]:
                 raise ValueError('The preferred dimensionality of the '
@@ -327,9 +327,9 @@ def _validate_params(self, X, y):
                                  .format(X.shape[1],
                                          self.components_.shape[1]))
 
-        check_scalar(self.max_iter, 'max_iter', numbers.Integral, 1)
-        check_scalar(self.tol, 'tol', numbers.Real, 0.)
-        check_scalar(self.verbose, 'verbose', numbers.Integral, 0)
+        check_scalar(self.max_iter, 'max_iter', numbers.Integral, min_val=1)
+        check_scalar(self.tol, 'tol', numbers.Real, min_val=0.)
+        check_scalar(self.verbose, 'verbose', numbers.Integral, min_val=0)
 
         if self.callback is not None:
             if not callable(self.callback):

diff --git a/sklearn/neural_network/_rbm.py b/sklearn/neural_network/_rbm.py
@@ -357,7 +357,7 @@ def fit(self, X, y=None):
 
         n_batches = int(np.ceil(float(n_samples) / self.batch_size))
         batch_slices = list(gen_even_slices(n_batches * self.batch_size,
-                                            n_batches, n_samples))
+                                            n_batches, n_samples=n_samples))
         verbose = self.verbose
         begin = time.time()
         for iteration in range(1, self.n_iter + 1):

diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py
@@ -1707,7 +1707,7 @@ def normalize(X, norm='l2', *, axis=1, copy=True, return_norm=False):
     else:
         raise ValueError("'%d' is not a supported axis" % axis)
 
-    X = check_array(X, sparse_format, copy=copy,
+    X = check_array(X, accept_sparse=sparse_format, copy=copy,
                     estimator='the normalize function', dtype=FLOAT_DTYPES)
     if axis == 0:
         X = X.T

diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
@@ -29,7 +29,8 @@
                          assert_all_finite,
                          check_random_state, column_or_1d, check_array,
                          check_consistent_length, check_X_y, indexable,
-                         check_symmetric, check_scalar)
+                         check_symmetric, check_scalar,
+                         _deprecate_positional_args)
 from .. import get_config
 
 
@@ -314,10 +315,10 @@ def safe_indexing(X, indices, axis=0):
     CSR, CSC, and LIL sparse matrices are supported. COO sparse matrices are
     not supported.
     """
-    return _safe_indexing(X, indices, axis)
+    return _safe_indexing(X, indices, axis=axis)
 
 
-def _safe_indexing(X, indices, axis=0):
+def _safe_indexing(X, indices, *, axis=0):
     """Return rows, items or columns of X using indices.
 
     .. warning::
@@ -684,7 +685,8 @@ def shuffle(*arrays, **options):
     return resample(*arrays, **options)
 
 
-def safe_sqr(X, copy=True):
+@_deprecate_positional_args
+def safe_sqr(X, *, copy=True):
     """Element wise squaring of array-likes and sparse matrices.
 
     Parameters
@@ -723,7 +725,8 @@ def _chunk_generator(gen, chunksize):
             return
 
 
-def gen_batches(n, batch_size, min_batch_size=0):
+@_deprecate_positional_args
+def gen_batches(n, batch_size, *, min_batch_size=0):
     """Generator to create slices containing batch_size elements, from 0 to n.
 
     The last slice may contain less than batch_size elements, when batch_size
@@ -772,7 +775,8 @@ def gen_batches(n, batch_size, min_batch_size=0):
         yield slice(start, n)
 
 
-def gen_even_slices(n, n_packs, n_samples=None):
+@_deprecate_positional_args
+def gen_even_slices(n, n_packs, *, n_samples=None):
     """Generator to create n_packs slices going up to n.
 
     Parameters
@@ -957,8 +961,8 @@ def _print_elapsed_time(source, message=None):
                                timeit.default_timer() - start))
 
 
-def get_chunk_n_rows(row_bytes, max_n_rows=None,
-                     working_memory=None):
+@_deprecate_positional_args
+def get_chunk_n_rows(row_bytes, *, max_n_rows=None, working_memory=None):
     """Calculates how many rows can be processed within working_memory
 
     Parameters

diff --git a/sklearn/utils/class_weight.py b/sklearn/utils/class_weight.py
@@ -4,6 +4,8 @@
 
 import numpy as np
 
+from .validation import _deprecate_positional_args
+
 
 def compute_class_weight(class_weight, classes, y):
     """Estimate class weights for unbalanced datasets.
@@ -69,7 +71,8 @@ def compute_class_weight(class_weight, classes, y):
     return weight
 
 
-def compute_sample_weight(class_weight, y, indices=None):
+@_deprecate_positional_args
+def compute_sample_weight(class_weight, y, *, indices=None):
     """Estimate sample weights by class for unbalanced datasets.
 
     Parameters

diff --git a/sklearn/utils/multiclass.py b/sklearn/utils/multiclass.py
@@ -27,7 +27,9 @@ def _unique_multiclass(y):
 
 
 def _unique_indicator(y):
-    return np.arange(check_array(y, ['csr', 'csc', 'coo']).shape[1])
+    return np.arange(
+        check_array(y, accept_sparse=['csr', 'csc', 'coo']).shape[1]
+    )
 
 
 _FN_UNIQUE_LABELS = {
@@ -83,7 +85,8 @@ def unique_labels(*ys):
 
     # Check consistency for the indicator format
     if (label_type == "multilabel-indicator" and
-            len(set(check_array(y, ['csr', 'csc', 'coo']).shape[1]
+            len(set(check_array(y,
+                                accept_sparse=['csr', 'csc', 'coo']).shape[1]
                     for y in ys)) > 1):
         raise ValueError("Multi-label binary indicator input with "
                          "different numbers of labels")

diff --git a/sklearn/utils/tests/test_class_weight.py b/sklearn/utils/tests/test_class_weight.py
@@ -192,39 +192,41 @@ def test_compute_sample_weight_with_subsample():
     # Test compute_sample_weight with subsamples specified.
     # Test with balanced classes and all samples present
     y = np.asarray([1, 1, 1, 2, 2, 2])
-    sample_weight = compute_sample_weight("balanced", y, range(6))
+    sample_weight = compute_sample_weight("balanced", y, indices=range(6))
     assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1.])
 
     # Test with column vector of balanced classes and all samples present
     y = np.asarray([[1], [1], [1], [2], [2], [2]])
-    sample_weight = compute_sample_weight("balanced", y, range(6))
+    sample_weight = compute_sample_weight("balanced", y, indices=range(6))
     assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1.])
 
     # Test with a subsample
     y = np.asarray([1, 1, 1, 2, 2, 2])
-    sample_weight = compute_sample_weight("balanced", y, range(4))
+    sample_weight = compute_sample_weight("balanced", y, indices=range(4))
     assert_array_almost_equal(sample_weight, [2. / 3, 2. / 3,
                                               2. / 3, 2., 2., 2.])
 
     # Test with a bootstrap subsample
     y = np.asarray([1, 1, 1, 2, 2, 2])
-    sample_weight = compute_sample_weight("balanced", y, [0, 1, 1, 2, 2, 3])
+    sample_weight = compute_sample_weight("balanced", y,
+                                          indices=[0, 1, 1, 2, 2, 3])
     expected_balanced = np.asarray([0.6, 0.6, 0.6, 3., 3., 3.])
     assert_array_almost_equal(sample_weight, expected_balanced)
 
     # Test with a bootstrap subsample for multi-output
     y = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1]])
-    sample_weight = compute_sample_weight("balanced", y, [0, 1, 1, 2, 2, 3])
+    sample_weight = compute_sample_weight("balanced", y,
+                                          indices=[0, 1, 1, 2, 2, 3])
     assert_array_almost_equal(sample_weight, expected_balanced ** 2)
 
     # Test with a missing class
     y = np.asarray([1, 1, 1, 2, 2, 2, 3])
-    sample_weight = compute_sample_weight("balanced", y, range(6))
+    sample_weight = compute_sample_weight("balanced", y, indices=range(6))
     assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1., 0.])
 
     # Test with a missing class for multi-output
     y = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1], [2, 2]])
-    sample_weight = compute_sample_weight("balanced", y, range(6))
+    sample_weight = compute_sample_weight("balanced", y, indices=range(6))
     assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1., 0.])
 
 
@@ -237,15 +239,15 @@ def test_compute_sample_weight_errors():
     with pytest.raises(ValueError):
         compute_sample_weight("ni", y)
     with pytest.raises(ValueError):
-        compute_sample_weight("ni", y, range(4))
+        compute_sample_weight("ni", y, indices=range(4))
     with pytest.raises(ValueError):
         compute_sample_weight("ni", y_)
     with pytest.raises(ValueError):
-        compute_sample_weight("ni", y_, range(4))
+        compute_sample_weight("ni", y_, indices=range(4))
 
     # Not "balanced" for subsample
     with pytest.raises(ValueError):
-        compute_sample_weight({1: 2, 2: 1}, y, range(4))
+        compute_sample_weight({1: 2, 2: 1}, y, indices=range(4))
 
     # Not a list or preset for multi-output
     with pytest.raises(ValueError):

diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py
@@ -63,7 +63,7 @@ def test_as_float_array():
     X = X.astype(np.int64)
     X2 = as_float_array(X, copy=True)
     # Checking that the array wasn't overwritten
-    assert as_float_array(X, False) is not X
+    assert as_float_array(X, copy=False) is not X
     assert X2.dtype == np.float64
     # Test int dtypes <= 32bit
     tested_dtypes = [np.bool,
@@ -912,7 +912,8 @@ def test_check_scalar_valid(x, target_type, min_val, max_val):
     """Test that check_scalar returns no error/warning if valid inputs are
     provided"""
     with pytest.warns(None) as record:
-        check_scalar(x, "test_name", target_type, min_val, max_val)
+        check_scalar(x, "test_name", target_type=target_type,
+                     min_val=min_val, max_val=max_val)
     assert len(record) == 0