diff --git a/doc/modules/array_api.rst b/doc/modules/array_api.rst index 7effd3ee6eb43..161307dfddcf4 100644 --- a/doc/modules/array_api.rst +++ b/doc/modules/array_api.rst @@ -96,6 +96,7 @@ Estimators - :class:`decomposition.PCA` (with `svd_solver="full"`, `svd_solver="randomized"` and `power_iteration_normalizer="QR"`) - :class:`discriminant_analysis.LinearDiscriminantAnalysis` (with `solver="svd"`) +- :class:`preprocessing.MaxAbsScaler` - :class:`preprocessing.MinMaxScaler` Tools diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst index 066a3af2666a4..351e155672098 100644 --- a/doc/whats_new/v1.4.rst +++ b/doc/whats_new/v1.4.rst @@ -108,7 +108,7 @@ Changelog `full` and `randomized` solvers (with QR power iterations). See :ref:`array_api` for more details. :pr:`26315` and :pr:`27098` by :user:`Mateusz Sokół `, - :user:`Olivier Grisel ` and :user:` Edoardo Abati `. + :user:`Olivier Grisel ` and :user:`Edoardo Abati `. - |Enhancement| :func:`decomposition.non_negative_factorization`, :class:`decomposition.NMF`, and :class:`decomposition.MiniBatchNMF` now support :class:`scipy.sparse.sparray` @@ -205,11 +205,11 @@ Changelog when `sparse_output=True` and the output is configured to be pandas. :pr:`26931` by `Thomas Fan`_. -- |MajorFeature| :class:`preprocessing.MinMaxScaler` now +- |MajorFeature| :class:`preprocessing.MinMaxScaler` and :class:`preprocessing.MaxAbsScaler` now supports the `Array API `_. Array API support is considered experimental and might evolve without being subject to our usual rolling deprecation cycle policy. See - :ref:`array_api` for more details. :pr:`26243` by `Tim Head`_. + :ref:`array_api` for more details. :pr:`26243` by `Tim Head`_ and :pr:`27110` by :user:`Edoardo Abati `. :mod:`sklearn.tree` ................... diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py index 160e482379989..ba8627935f28d 100644 --- a/sklearn/preprocessing/_data.py +++ b/sklearn/preprocessing/_data.py @@ -1215,12 +1215,14 @@ def partial_fit(self, X, y=None): self : object Fitted scaler. """ + xp, _ = get_namespace(X) + first_pass = not hasattr(self, "n_samples_seen_") X = self._validate_data( X, reset=first_pass, accept_sparse=("csr", "csc"), - dtype=FLOAT_DTYPES, + dtype=_array_api.supported_float_dtypes(xp), force_all_finite="allow-nan", ) @@ -1228,12 +1230,12 @@ def partial_fit(self, X, y=None): mins, maxs = min_max_axis(X, axis=0, ignore_nan=True) max_abs = np.maximum(np.abs(mins), np.abs(maxs)) else: - max_abs = np.nanmax(np.abs(X), axis=0) + max_abs = _array_api._nanmax(xp.abs(X), axis=0) if first_pass: self.n_samples_seen_ = X.shape[0] else: - max_abs = np.maximum(self.max_abs_, max_abs) + max_abs = xp.maximum(self.max_abs_, max_abs) self.n_samples_seen_ += X.shape[0] self.max_abs_ = max_abs @@ -1254,12 +1256,15 @@ def transform(self, X): Transformed array. """ check_is_fitted(self) + + xp, _ = get_namespace(X) + X = self._validate_data( X, accept_sparse=("csr", "csc"), copy=self.copy, reset=False, - dtype=FLOAT_DTYPES, + dtype=_array_api.supported_float_dtypes(xp), force_all_finite="allow-nan", ) @@ -1283,11 +1288,14 @@ def inverse_transform(self, X): Transformed array. """ check_is_fitted(self) + + xp, _ = get_namespace(X) + X = check_array( X, accept_sparse=("csr", "csc"), copy=self.copy, - dtype=FLOAT_DTYPES, + dtype=_array_api.supported_float_dtypes(xp), force_all_finite="allow-nan", ) diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py index 5af6172080e9e..d61996b76a25c 100644 --- a/sklearn/preprocessing/tests/test_data.py +++ b/sklearn/preprocessing/tests/test_data.py @@ -701,12 +701,10 @@ def test_standard_check_array_of_inverse_transform(): ) @pytest.mark.parametrize( "estimator", - [MinMaxScaler()], + [MaxAbsScaler(), MinMaxScaler()], ids=_get_check_estimator_ids, ) -def test_minmaxscaler_array_api_compliance( - estimator, check, array_namespace, device, dtype -): +def test_scaler_array_api_compliance(estimator, check, array_namespace, device, dtype): name = estimator.__class__.__name__ check(name, estimator, array_namespace, device=device, dtype=dtype) diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py index 2bc49117792c9..0fb5a0e264aa8 100644 --- a/sklearn/utils/_array_api.py +++ b/sklearn/utils/_array_api.py @@ -495,7 +495,7 @@ def _nanmin(X, axis=None): else: mask = xp.isnan(X) - X = xp.min(xp.where(mask, xp.asarray(+xp.inf), X), axis=axis) + X = xp.min(xp.where(mask, xp.asarray(+xp.inf, device=device(X)), X), axis=axis) # Replace Infs from all NaN slices with NaN again mask = xp.all(mask, axis=axis) if xp.any(mask): @@ -512,7 +512,7 @@ def _nanmax(X, axis=None): else: mask = xp.isnan(X) - X = xp.max(xp.where(mask, xp.asarray(-xp.inf), X), axis=axis) + X = xp.max(xp.where(mask, xp.asarray(-xp.inf, device=device(X)), X), axis=axis) # Replace Infs from all NaN slices with NaN again mask = xp.all(mask, axis=axis) if xp.any(mask):