From 6b2ca6e8b10722b865973a204de141f995854946 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Thu, 15 Feb 2024 13:08:34 -0500
Subject: [PATCH 01/22] Draft of the metadata routing for bagging

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 doc/metadata_routing.rst                      |   4 +-
 doc/whats_new/v1.5.rst                        |   6 ++
 sklearn/ensemble/_bagging.py                  |  63 +++++++++--
 sklearn/ensemble/tests/test_bagging.py        | 100 ++++++++++++++++++
 .../test_metaestimators_metadata_routing.py   |   4 -
 5 files changed, 165 insertions(+), 12 deletions(-)
diff --git a/doc/metadata_routing.rst b/doc/metadata_routing.rst
index 658357d910464..7f2719606a4dd 100644
--- a/doc/metadata_routing.rst
+++ b/doc/metadata_routing.rst
@@ -274,6 +274,8 @@ Meta-estimators and functions supporting metadata routing:
 
 - :class:`sklearn.calibration.CalibratedClassifierCV`
 - :class:`sklearn.compose.ColumnTransformer`
+- :class:`sklearn.ensemble.BaggingClassifier`
+- :class:`sklearn.ensemble.BaggingRegressor`
 - :class:`sklearn.feature_selection.SelectFromModel`
 - :class:`sklearn.impute.IterativeImputer`
 - :class:`sklearn.linear_model.ElasticNetCV`
@@ -306,8 +308,6 @@ Meta-estimators and tools not supporting metadata routing yet:
 - :class:`sklearn.covariance.GraphicalLassoCV`
 - :class:`sklearn.ensemble.AdaBoostClassifier`
 - :class:`sklearn.ensemble.AdaBoostRegressor`
-- :class:`sklearn.ensemble.BaggingClassifier`
-- :class:`sklearn.ensemble.BaggingRegressor`
 - :class:`sklearn.ensemble.StackingClassifier`
 - :class:`sklearn.ensemble.StackingRegressor`
 - :class:`sklearn.ensemble.VotingClassifier`
diff --git a/doc/whats_new/v1.5.rst b/doc/whats_new/v1.5.rst
index c0c96f14ae612..6157609512555 100644
--- a/doc/whats_new/v1.5.rst
+++ b/doc/whats_new/v1.5.rst
@@ -83,6 +83,12 @@ Changelog
   pre-sorting the data before finding the thresholds for binning.
   :pr:`28102` by :user:`Christian Lorentzen <lorentzenchr>`.
 
+- |Enhancement| :class:`ensemble.BaggingClassifier` and :class:`ensemble.BaggingRegressor`
+  now supports metadata routing. The fit methods now
+  accepts ``**fit_params`` which are passed to the underlying estimators
+  via their `fit` methods.
+  :pr:`` by :user:`Adam Li <adam2392>`.
+
 :mod:`sklearn.feature_extraction`
 .................................
 
diff --git a/sklearn/ensemble/_bagging.py b/sklearn/ensemble/_bagging.py
index da340ceec6fe4..d1d1ba3a97680 100644
--- a/sklearn/ensemble/_bagging.py
+++ b/sklearn/ensemble/_bagging.py
@@ -13,7 +13,7 @@
 
 import numpy as np
 
-from ..base import ClassifierMixin, RegressorMixin, _fit_context
+from ..base import Bunch, ClassifierMixin, RegressorMixin, _fit_context
 from ..metrics import accuracy_score, r2_score
 from ..tree import DecisionTreeClassifier, DecisionTreeRegressor
 from ..utils import check_random_state, column_or_1d, indices_to_mask
@@ -21,7 +21,11 @@
 from ..utils._tags import _safe_tags
 from ..utils.metadata_routing import (
     _raise_for_unsupported_routing,
-    _RoutingNotSupportedMixin,
+    MetadataRouter,
+    MethodMapping,
+    _raise_for_params,
+    _routing_enabled,
+    process_routing,
 )
 from ..utils.metaestimators import available_if
 from ..utils.multiclass import check_classification_targets
@@ -298,7 +302,7 @@ def __init__(
         # BaseBagging.estimator is not validated yet
         prefer_skip_nested_validation=False
     )
-    def fit(self, X, y, sample_weight=None):
+    def fit(self, X, y, **fit_params):
         """Build a Bagging ensemble of estimators from the training set (X, y).
 
         Parameters
@@ -316,12 +320,35 @@ def fit(self, X, y, sample_weight=None):
             Note that this is supported only if the base estimator supports
             sample weighting.
 
+        **fit_params : dict
+            Parameters to pass to the underlying estimators.
+
+            .. versionadded:: 1.5
+
+                Only available if `enable_metadata_routing=True`,
+                which can be set by using
+                ``sklearn.set_config(enable_metadata_routing=True)``.
+                See :ref:`Metadata Routing User Guide <metadata_routing>` for
+                more details.
+
         Returns
         -------
         self : object
             Fitted estimator.
         """
-        _raise_for_unsupported_routing(self, "fit", sample_weight=sample_weight)
+        _raise_for_params(self, "fit")
+
+        if _routing_enabled():
+            routed_params = process_routing(self, "fit", **fit_params)
+        else:
+            routed_params = Bunch()
+            for name in names:
+                routed_params[name] = Bunch(fit={})
+                if "sample_weight" in fit_params:
+                    routed_params[name].fit["sample_weight"] = fit_params[
+                        "sample_weight"
+                    ]
+
         # Convert data (X is required to be 2d and indexable)
         X, y = self._validate_data(
             X,
@@ -537,8 +564,32 @@ def estimators_samples_(self):
         """
         return [sample_indices for _, sample_indices in self._get_estimators_indices()]
 
+    def get_metadata_routing(self):
+        """Get metadata routing of this object.
+
+        Please check :ref:`User Guide <metadata_routing>` on how the routing
+        mechanism works.
+
+        .. versionadded:: 1.5
+
+        Returns
+        -------
+        routing : MetadataRouter
+            A :class:`~sklearn.utils.metadata_routing.MetadataRouter` encapsulating
+            routing information.
+        """
+        router = MetadataRouter(owner=self.__class__.__name__)
+
+        # `self.estimators` is a list of (name, est) tuples
+        for name, estimator in self.estimators:
+            router.add(
+                **{name: estimator},
+                method_mapping=MethodMapping().add(callee="fit", caller="fit"),
+            )
+        return router
+
 
-class BaggingClassifier(_RoutingNotSupportedMixin, ClassifierMixin, BaseBagging):
+class BaggingClassifier(ClassifierMixin, BaseBagging):
     """A Bagging classifier.
 
     A Bagging classifier is an ensemble meta-estimator that fits base
@@ -970,7 +1021,7 @@ def _more_tags(self):
         return {"allow_nan": _safe_tags(estimator, "allow_nan")}
 
 
-class BaggingRegressor(_RoutingNotSupportedMixin, RegressorMixin, BaseBagging):
+class BaggingRegressor(RegressorMixin, BaseBagging):
     """A Bagging regressor.
 
     A Bagging regressor is an ensemble meta-estimator that fits base
diff --git a/sklearn/ensemble/tests/test_bagging.py b/sklearn/ensemble/tests/test_bagging.py
index 2c1e308cee33b..5d7c4806ab105 100644
--- a/sklearn/ensemble/tests/test_bagging.py
+++ b/sklearn/ensemble/tests/test_bagging.py
@@ -5,12 +5,14 @@
 # Author: Gilles Louppe
 # License: BSD 3 clause
 from itertools import cycle, product
+import re
 
 import joblib
 import numpy as np
 import pytest
 
 from sklearn.base import BaseEstimator
+from sklearn.calibration import CalibratedClassifierCV
 from sklearn.datasets import load_diabetes, load_iris, make_hastie_10_2
 from sklearn.dummy import DummyClassifier, DummyRegressor
 from sklearn.ensemble import (
@@ -31,6 +33,12 @@
 from sklearn.utils import check_random_state
 from sklearn.utils._testing import assert_array_almost_equal, assert_array_equal
 from sklearn.utils.fixes import CSC_CONTAINERS, CSR_CONTAINERS
+from sklearn.tests.metadata_routing_common import (
+    ConsumingClassifier,
+    ConsumingRegressor,
+    _Registry,
+    check_recorded_metadata,
+)
 
 rng = check_random_state(0)
 
@@ -936,3 +944,95 @@ def fit(self, X, y):
 def test_bagging_allow_nan_tag(bagging, expected_allow_nan):
     """Check that bagging inherits allow_nan tag."""
     assert bagging._get_tags()["allow_nan"] == expected_allow_nan
+
+
+# Metadata Routing Tests
+# ======================
+
+
+@pytest.mark.parametrize(
+    "Estimator, BaseEst",
+    [(BaggingClassifier, ConsumingClassifier), (BaggingRegressor, ConsumingRegressor)],
+)
+def test_routing_passed_metadata_not_supported(Estimator, BaseEst):
+    """Test that the right error message is raised when metadata is passed while
+    not supported when `enable_metadata_routing=False`."""
+
+    X = np.array([[0, 1], [2, 2], [4, 6]])
+    y = [1, 2, 3]
+
+    with pytest.raises(
+        ValueError, match="is only supported if enable_metadata_routing=True"
+    ):
+        Estimator(estimator=BaseEst()).fit(X, y, sample_weight=[1, 1, 1], metadata="a")
+
+
+@pytest.mark.usefixtures("enable_slep006")
+@pytest.mark.parametrize(
+    "Estimator, BaseEst",
+    [(BaggingClassifier, ConsumingClassifier), (BaggingRegressor, ConsumingRegressor)],
+)
+def test_get_metadata_routing_without_fit(Estimator, BaseEst):
+    # Test that metadata_routing() doesn't raise when called before fit.
+    est = Estimator(estimator=BaseEst())
+    est.get_metadata_routing()
+
+
+@pytest.mark.usefixtures("enable_slep006")
+@pytest.mark.parametrize(
+    "Estimator, BaseEst",
+    [(BaggingClassifier, ConsumingClassifier), (BaggingRegressor, ConsumingRegressor)],
+)
+@pytest.mark.parametrize("prop", ["sample_weight", "metadata"])
+def test_metadata_routing_for_bagging_estimators(Estimator, BaseEst, prop):
+    """Test that metadata is routed correctly for Bagging*."""
+    X = np.array([[0, 1], [2, 2], [4, 6]])
+    y = [1, 2, 3]
+    sample_weight, metadata = [1, 1, 1], "a"
+
+    est = Estimator(
+        estimator=BaseEst(registry=_Registry()).set_fit_request(**{prop: True})
+    )
+
+    est.fit(X, y, **{prop: sample_weight if prop == "sample_weight" else metadata})
+
+    for estimator in est.estimators:
+        if prop == "sample_weight":
+            kwargs = {prop: sample_weight}
+        else:
+            kwargs = {prop: metadata}
+        # access sub-estimator in (name, est) with estimator[1]
+        registry = estimator[1].registry
+        assert len(registry)
+        for sub_est in registry:
+            check_recorded_metadata(
+                obj=sub_est,
+                method="fit",
+                **kwargs,
+            )
+
+
+@pytest.mark.usefixtures("enable_slep006")
+@pytest.mark.parametrize(
+    "Estimator, BaseEst",
+    [(BaggingClassifier, ConsumingClassifier), (BaggingClassifier, ConsumingRegressor)],
+)
+def test_metadata_routing_error_for_bagging_estimators(Estimator, BaseEst):
+    """Test that the right error is raised when metadata is not requested."""
+    X = np.array([[0, 1], [2, 2], [4, 6]])
+    y = [1, 2, 3]
+    sample_weight, metadata = [1, 1, 1], "a"
+
+    est = Estimator(estimator=BaseEst())
+
+    error_message = (
+        "[sample_weight, metadata] are passed but are not explicitly set as requested"
+        f" or not for {BaseEst.__name__}.fit"
+    )
+
+    with pytest.raises(ValueError, match=re.escape(error_message)):
+        est.fit(X, y, sample_weight=sample_weight, metadata=metadata)
+
+
+# End of Metadata Routing Tests
+# =============================
diff --git a/sklearn/tests/test_metaestimators_metadata_routing.py b/sklearn/tests/test_metaestimators_metadata_routing.py
index 9aa305e11e6fd..880b6217ba38f 100644
--- a/sklearn/tests/test_metaestimators_metadata_routing.py
+++ b/sklearn/tests/test_metaestimators_metadata_routing.py
@@ -12,8 +12,6 @@
 from sklearn.ensemble import (
     AdaBoostClassifier,
     AdaBoostRegressor,
-    BaggingClassifier,
-    BaggingRegressor,
     StackingClassifier,
     StackingRegressor,
     VotingClassifier,
@@ -336,8 +334,6 @@ def enable_slep006():
 UNSUPPORTED_ESTIMATORS = [
     AdaBoostClassifier(),
     AdaBoostRegressor(),
-    BaggingClassifier(),
-    BaggingRegressor(),
     FeatureUnion([]),
     GraphicalLassoCV(),
     RANSACRegressor(),

From 6fc50fcc986c7d81f480256099f3c15026daeaf7 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Thu, 15 Feb 2024 16:46:30 -0500
Subject: [PATCH 02/22] Adding metadata routing

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 doc/whats_new/v1.5.rst                   |   2 +-
 sklearn/ensemble/_bagging.py             | 111 +++++++++++++++--------
 sklearn/ensemble/tests/test_bagging.py   |  18 ++--
 sklearn/tests/metadata_routing_common.py |  10 +-
 4 files changed, 88 insertions(+), 53 deletions(-)

diff --git a/doc/whats_new/v1.5.rst b/doc/whats_new/v1.5.rst
index 6157609512555..38994f361dcc3 100644
--- a/doc/whats_new/v1.5.rst
+++ b/doc/whats_new/v1.5.rst
@@ -87,7 +87,7 @@ Changelog
   now supports metadata routing. The fit methods now
   accepts ``**fit_params`` which are passed to the underlying estimators
   via their `fit` methods.
-  :pr:`` by :user:`Adam Li <adam2392>`.
+  :pr:`28432` by :user:`Adam Li <adam2392>`.
 
 :mod:`sklearn.feature_extraction`
 .................................
diff --git a/sklearn/ensemble/_bagging.py b/sklearn/ensemble/_bagging.py
index d1d1ba3a97680..88e2a74d9b8a4 100644
--- a/sklearn/ensemble/_bagging.py
+++ b/sklearn/ensemble/_bagging.py
@@ -13,17 +13,21 @@
 
 import numpy as np
 
-from ..base import Bunch, ClassifierMixin, RegressorMixin, _fit_context
+from ..base import ClassifierMixin, RegressorMixin, _fit_context
 from ..metrics import accuracy_score, r2_score
 from ..tree import DecisionTreeClassifier, DecisionTreeRegressor
-from ..utils import check_random_state, column_or_1d, indices_to_mask
+from ..utils import (
+    Bunch,
+    _safe_indexing,
+    check_random_state,
+    column_or_1d,
+    indices_to_mask,
+)
 from ..utils._param_validation import HasMethods, Interval, RealNotInt
 from ..utils._tags import _safe_tags
 from ..utils.metadata_routing import (
-    _raise_for_unsupported_routing,
     MetadataRouter,
     MethodMapping,
-    _raise_for_params,
     _routing_enabled,
     process_routing,
 )
@@ -31,7 +35,13 @@
 from ..utils.multiclass import check_classification_targets
 from ..utils.parallel import Parallel, delayed
 from ..utils.random import sample_without_replacement
-from ..utils.validation import _check_sample_weight, check_is_fitted, has_fit_parameter
+from ..utils.validation import (
+    _check_method_params,
+    _check_sample_weight,
+    _deprecate_positional_args,
+    check_is_fitted,
+    has_fit_parameter,
+)
 from ._base import BaseEnsemble, _partition_estimators
 
 __all__ = ["BaggingClassifier", "BaggingRegressor"]
@@ -81,11 +91,11 @@ def _parallel_build_estimators(
     ensemble,
     X,
     y,
-    sample_weight,
     seeds,
     total_n_estimators,
     verbose,
     check_input,
+    fit_params,
 ):
     """Private function used to build a batch of estimators within a job."""
     # Retrieve settings
@@ -94,17 +104,16 @@ def _parallel_build_estimators(
     max_samples = ensemble._max_samples
     bootstrap = ensemble.bootstrap
     bootstrap_features = ensemble.bootstrap_features
-    support_sample_weight = has_fit_parameter(ensemble.estimator_, "sample_weight")
     has_check_input = has_fit_parameter(ensemble.estimator_, "check_input")
     requires_feature_indexing = bootstrap_features or max_features != n_features
 
-    if not support_sample_weight and sample_weight is not None:
-        raise ValueError("The base estimator doesn't support sample weight")
-
     # Build estimators
     estimators = []
     estimators_features = []
 
+    # Row sampling can be achieved either through setting sample_weight or
+    # by indexing. The former is more efficient. Therefore, use this method
+    # if possible, otherwise use indexing.
     for i in range(n_estimators):
         if verbose > 1:
             print(
@@ -131,12 +140,14 @@ def _parallel_build_estimators(
             max_samples,
         )
 
+        # TODO(SLEP6): remove if condition for unrouted sample_weight when metadata
+        # routing can't be disabled.
         # Draw samples, using sample weights, and then fit
-        if support_sample_weight:
-            if sample_weight is None:
-                curr_sample_weight = np.ones((n_samples,))
-            else:
-                curr_sample_weight = sample_weight.copy()
+        if not _routing_enabled() and "sample_weight" in fit_params:
+            # row subsampling via sample_weight
+            curr_sample_weight = _check_sample_weight(
+                fit_params.pop("sample_weight", None), X
+            ).copy()
 
             if bootstrap:
                 sample_counts = np.bincount(indices, minlength=n_samples)
@@ -146,10 +157,15 @@ def _parallel_build_estimators(
                 curr_sample_weight[not_indices_mask] = 0
 
             X_ = X[:, features] if requires_feature_indexing else X
-            estimator_fit(X_, y, sample_weight=curr_sample_weight)
+            estimator_fit(X_, y, sample_weight=curr_sample_weight, **fit_params)
         else:
-            X_ = X[indices][:, features] if requires_feature_indexing else X[indices]
-            estimator_fit(X_, y[indices])
+            # cannot use sample_weight, use indexing
+            y_ = _safe_indexing(y, indices)
+            X_ = _safe_indexing(X, indices)
+            fit_params = _check_method_params(X, params=fit_params, indices=indices)
+            if requires_feature_indexing:
+                X_ = X_[:, features]
+            estimator_fit(X_, y_, **fit_params)
 
         estimators.append(estimator)
         estimators_features.append(features)
@@ -298,11 +314,15 @@ def __init__(
         self.random_state = random_state
         self.verbose = verbose
 
+    # TODO(1.7): remove `sample_weight` from the signature after deprecation
+    # cycle; pop it from `fit_params` before the `_raise_for_params` check and
+    # reinsert later, for backwards compatibility
+    @_deprecate_positional_args(version="1.7")
     @_fit_context(
         # BaseBagging.estimator is not validated yet
         prefer_skip_nested_validation=False
     )
-    def fit(self, X, y, **fit_params):
+    def fit(self, X, y, *, sample_weight=None, **fit_params):
         """Build a Bagging ensemble of estimators from the training set (X, y).
 
         Parameters
@@ -336,18 +356,12 @@ def fit(self, X, y, **fit_params):
         self : object
             Fitted estimator.
         """
-        _raise_for_params(self, "fit")
-
-        if _routing_enabled():
-            routed_params = process_routing(self, "fit", **fit_params)
-        else:
-            routed_params = Bunch()
-            for name in names:
-                routed_params[name] = Bunch(fit={})
-                if "sample_weight" in fit_params:
-                    routed_params[name].fit["sample_weight"] = fit_params[
-                        "sample_weight"
-                    ]
+        if fit_params and not _routing_enabled():
+            raise ValueError(
+                "params is only supported if enable_metadata_routing=True. See the User"
+                " Guide at https://scikit-learn.org/stable/metadata_routing.html for"
+                " more information."
+            )
 
         # Convert data (X is required to be 2d and indexable)
         X, y = self._validate_data(
@@ -358,7 +372,10 @@ def fit(self, X, y, **fit_params):
             force_all_finite=False,
             multi_output=True,
         )
-        return self._fit(X, y, self.max_samples, sample_weight=sample_weight)
+
+        return self._fit(
+            X, y, self.max_samples, sample_weight=sample_weight, **fit_params
+        )
 
     def _parallel_args(self):
         return {}
@@ -371,6 +388,7 @@ def _fit(
         max_depth=None,
         sample_weight=None,
         check_input=True,
+        **fit_params,
     ):
         """Build a Bagging ensemble of estimators from the training
            set (X, y).
@@ -400,6 +418,12 @@ def _fit(
         check_input : bool, default=True
             Override value used when fitting base estimator. Only supported
             if the base estimator has a check_input parameter for fit function.
+            If the meta-estimator already checks the input, set this value to
+            False to prevent redundant input validation.
+
+        **fit_params : dict, default=None
+            Parameters to pass to the :term:`fit` method of the underlying
+            estimator.
 
         Returns
         -------
@@ -419,6 +443,16 @@ def _fit(
         # Check parameters
         self._validate_estimator()
 
+        if _routing_enabled():
+            routed_params = process_routing(
+                self, "fit", sample_weight=sample_weight, **fit_params
+            )
+        else:
+            routed_params = Bunch()
+            routed_params.estimator = Bunch(fit=fit_params)
+            if "sample_weight" != None:
+                routed_params.estimator.fit["sample_weight"] = sample_weight
+
         if max_depth is not None:
             self.estimator_.max_depth = max_depth
 
@@ -501,11 +535,11 @@ def _fit(
                 self,
                 X,
                 y,
-                sample_weight,
                 seeds[starts[i] : starts[i + 1]],
                 total_n_estimators,
                 verbose=self.verbose,
                 check_input=check_input,
+                fit_params=routed_params.estimator.fit,
             )
             for i in range(n_jobs)
         )
@@ -579,13 +613,10 @@ def get_metadata_routing(self):
             routing information.
         """
         router = MetadataRouter(owner=self.__class__.__name__)
-
-        # `self.estimators` is a list of (name, est) tuples
-        for name, estimator in self.estimators:
-            router.add(
-                **{name: estimator},
-                method_mapping=MethodMapping().add(callee="fit", caller="fit"),
-            )
+        router.add(
+            estimator=self.estimator,
+            method_mapping=MethodMapping().add(callee="fit", caller="fit"),
+        )
         return router
 
 
diff --git a/sklearn/ensemble/tests/test_bagging.py b/sklearn/ensemble/tests/test_bagging.py
index 5d7c4806ab105..3b755edd5f41b 100644
--- a/sklearn/ensemble/tests/test_bagging.py
+++ b/sklearn/ensemble/tests/test_bagging.py
@@ -4,15 +4,14 @@
 
 # Author: Gilles Louppe
 # License: BSD 3 clause
-from itertools import cycle, product
 import re
+from itertools import cycle, product
 
 import joblib
 import numpy as np
 import pytest
 
 from sklearn.base import BaseEstimator
-from sklearn.calibration import CalibratedClassifierCV
 from sklearn.datasets import load_diabetes, load_iris, make_hastie_10_2
 from sklearn.dummy import DummyClassifier, DummyRegressor
 from sklearn.ensemble import (
@@ -29,16 +28,16 @@
 from sklearn.preprocessing import FunctionTransformer, scale
 from sklearn.random_projection import SparseRandomProjection
 from sklearn.svm import SVC, SVR
-from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
-from sklearn.utils import check_random_state
-from sklearn.utils._testing import assert_array_almost_equal, assert_array_equal
-from sklearn.utils.fixes import CSC_CONTAINERS, CSR_CONTAINERS
 from sklearn.tests.metadata_routing_common import (
     ConsumingClassifier,
     ConsumingRegressor,
     _Registry,
     check_recorded_metadata,
 )
+from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
+from sklearn.utils import check_random_state
+from sklearn.utils._testing import assert_array_almost_equal, assert_array_equal
+from sklearn.utils.fixes import CSC_CONTAINERS, CSR_CONTAINERS
 
 rng = check_random_state(0)
 
@@ -988,7 +987,7 @@ def test_metadata_routing_for_bagging_estimators(Estimator, BaseEst, prop):
     """Test that metadata is routed correctly for Bagging*."""
     X = np.array([[0, 1], [2, 2], [4, 6]])
     y = [1, 2, 3]
-    sample_weight, metadata = [1, 1, 1], "a"
+    sample_weight, metadata = [1.0, 1.0, 1.0], "a"
 
     est = Estimator(
         estimator=BaseEst(registry=_Registry()).set_fit_request(**{prop: True})
@@ -996,13 +995,12 @@ def test_metadata_routing_for_bagging_estimators(Estimator, BaseEst, prop):
 
     est.fit(X, y, **{prop: sample_weight if prop == "sample_weight" else metadata})
 
-    for estimator in est.estimators:
+    for estimator in est.estimators_:
         if prop == "sample_weight":
             kwargs = {prop: sample_weight}
         else:
             kwargs = {prop: metadata}
-        # access sub-estimator in (name, est) with estimator[1]
-        registry = estimator[1].registry
+        registry = estimator.registry
         assert len(registry)
         for sub_est in registry:
             check_recorded_metadata(
diff --git a/sklearn/tests/metadata_routing_common.py b/sklearn/tests/metadata_routing_common.py
index e330cd3960aeb..c075cbca9acbe 100644
--- a/sklearn/tests/metadata_routing_common.py
+++ b/sklearn/tests/metadata_routing_common.py
@@ -1,6 +1,7 @@
 from functools import partial
 
 import numpy as np
+from numpy.testing import assert_array_equal
 
 from sklearn.base import (
     BaseEstimator,
@@ -56,7 +57,9 @@ def check_recorded_metadata(obj, method, split_params=tuple(), **kwargs):
         of the original values.
     """
     records = getattr(obj, "_records", dict()).get(method, dict())
-    assert set(kwargs.keys()) == set(records.keys())
+    assert set(kwargs.keys()) == set(
+        records.keys()
+    ), f"Expected {kwargs.keys()} vs {records.keys()}"
     for key, value in kwargs.items():
         recorded_value = records[key]
         # The following condition is used to check for any specified parameters
@@ -64,7 +67,10 @@ def check_recorded_metadata(obj, method, split_params=tuple(), **kwargs):
         if key in split_params and recorded_value is not None:
             assert np.isin(recorded_value, value).all()
         else:
-            assert recorded_value is value
+            if isinstance(recorded_value, np.ndarray):
+                assert_array_equal(recorded_value, value)
+            else:
+                assert recorded_value is value, f"Expected {recorded_value} vs {value}"
 
 
 record_metadata_not_default = partial(record_metadata, record_default=False)

From a14913a04f5b8da3acbe7ddf14f809ba5d26394d Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Thu, 15 Feb 2024 16:51:46 -0500
Subject: [PATCH 03/22] Raise for params

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 sklearn/ensemble/_bagging.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/sklearn/ensemble/_bagging.py b/sklearn/ensemble/_bagging.py
index 88e2a74d9b8a4..49400fe42f92c 100644
--- a/sklearn/ensemble/_bagging.py
+++ b/sklearn/ensemble/_bagging.py
@@ -30,6 +30,7 @@
     MethodMapping,
     _routing_enabled,
     process_routing,
+    _raise_for_params,
 )
 from ..utils.metaestimators import available_if
 from ..utils.multiclass import check_classification_targets
@@ -356,12 +357,7 @@ def fit(self, X, y, *, sample_weight=None, **fit_params):
         self : object
             Fitted estimator.
         """
-        if fit_params and not _routing_enabled():
-            raise ValueError(
-                "params is only supported if enable_metadata_routing=True. See the User"
-                " Guide at https://scikit-learn.org/stable/metadata_routing.html for"
-                " more information."
-            )
+        _raise_for_params(fit_params, self, "fit")
 
         # Convert data (X is required to be 2d and indexable)
         X, y = self._validate_data(

From 351f2b29e2f47ef3e0bcf5728dd633a2859a60f4 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Thu, 15 Feb 2024 16:55:20 -0500
Subject: [PATCH 04/22] Fix lint

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 sklearn/ensemble/_bagging.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/ensemble/_bagging.py b/sklearn/ensemble/_bagging.py
index 49400fe42f92c..eef5e53977bc6 100644
--- a/sklearn/ensemble/_bagging.py
+++ b/sklearn/ensemble/_bagging.py
@@ -28,9 +28,9 @@
 from ..utils.metadata_routing import (
     MetadataRouter,
     MethodMapping,
+    _raise_for_params,
     _routing_enabled,
     process_routing,
-    _raise_for_params,
 )
 from ..utils.metaestimators import available_if
 from ..utils.multiclass import check_classification_targets

From 52e99a938605e4b9561cf8625274b9377f8dbed4 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Thu, 15 Feb 2024 22:54:45 -0500
Subject: [PATCH 05/22] Address final test

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 sklearn/ensemble/_bagging.py           | 76 +++++++++++++++++++++++---
 sklearn/ensemble/tests/test_bagging.py | 14 +++--
 2 files changed, 78 insertions(+), 12 deletions(-)

diff --git a/sklearn/ensemble/_bagging.py b/sklearn/ensemble/_bagging.py
index eef5e53977bc6..8749c741922b6 100644
--- a/sklearn/ensemble/_bagging.py
+++ b/sklearn/ensemble/_bagging.py
@@ -31,6 +31,7 @@
     _raise_for_params,
     _routing_enabled,
     process_routing,
+    get_routing_for_object,
 )
 from ..utils.metaestimators import available_if
 from ..utils.multiclass import check_classification_targets
@@ -87,6 +88,14 @@ def _generate_bagging_indices(
     return feature_indices, sample_indices
 
 
+def _supports_sample_weight(request_or_router):
+    """Check if the fit method supports sample_weight"""
+    param_names = request_or_router._get_param_names(
+        method="fit", return_alias=True, ignore_self_request=False
+    )
+    return "sample_weight" in param_names
+
+
 def _parallel_build_estimators(
     n_estimators,
     ensemble,
@@ -105,6 +114,7 @@ def _parallel_build_estimators(
     max_samples = ensemble._max_samples
     bootstrap = ensemble.bootstrap
     bootstrap_features = ensemble.bootstrap_features
+    support_sample_weight = has_fit_parameter(ensemble.estimator_, "sample_weight")
     has_check_input = has_fit_parameter(ensemble.estimator_, "check_input")
     requires_feature_indexing = bootstrap_features or max_features != n_features
 
@@ -112,6 +122,17 @@ def _parallel_build_estimators(
     estimators = []
     estimators_features = []
 
+    request_or_router = get_routing_for_object(ensemble.estimator_)
+
+    if (
+        not _supports_sample_weight(request_or_router)
+        and fit_params.get("sample_weight") is not None
+    ):
+        raise ValueError(
+            "The base estimator doesn't support sample weight, but sample_weight is "
+            "passed to the fit method."
+        )
+
     # Row sampling can be achieved either through setting sample_weight or
     # by indexing. The former is more efficient. Therefore, use this method
     # if possible, otherwise use indexing.
@@ -125,6 +146,7 @@ def _parallel_build_estimators(
         random_state = seeds[i]
         estimator = ensemble._make_estimator(append=False, random_state=random_state)
 
+        records = getattr(estimator, "_records", dict()).get('fit', dict())
         if has_check_input:
             estimator_fit = partial(estimator.fit, check_input=check_input)
         else:
@@ -141,13 +163,14 @@ def _parallel_build_estimators(
             max_samples,
         )
 
+        fit_params_ = fit_params.copy()
         # TODO(SLEP6): remove if condition for unrouted sample_weight when metadata
         # routing can't be disabled.
         # Draw samples, using sample weights, and then fit
-        if not _routing_enabled() and "sample_weight" in fit_params:
+        if support_sample_weight:
             # row subsampling via sample_weight
             curr_sample_weight = _check_sample_weight(
-                fit_params.pop("sample_weight", None), X
+                fit_params_.pop("sample_weight", None), X
             ).copy()
 
             if bootstrap:
@@ -157,17 +180,26 @@ def _parallel_build_estimators(
                 not_indices_mask = ~indices_to_mask(indices, n_samples)
                 curr_sample_weight[not_indices_mask] = 0
 
+            fit_params_["sample_weight"] = curr_sample_weight
             X_ = X[:, features] if requires_feature_indexing else X
-            estimator_fit(X_, y, sample_weight=curr_sample_weight, **fit_params)
+            if not _routing_enabled():
+                estimator_fit(X_, y, sample_weight=curr_sample_weight)
+            else:
+                estimator_fit(X_, y, **fit_params_)
         else:
+            # if sample_weight is not supported in the Bagging estimator, pop it from fit_params
+            fit_params_.pop("sample_weight", None)
+
             # cannot use sample_weight, use indexing
             y_ = _safe_indexing(y, indices)
             X_ = _safe_indexing(X, indices)
-            fit_params = _check_method_params(X, params=fit_params, indices=indices)
+            fit_params_ = _check_method_params(X, params=fit_params_, indices=indices)
             if requires_feature_indexing:
                 X_ = X_[:, features]
-            estimator_fit(X_, y_, **fit_params)
+            estimator_fit(X_, y_, **fit_params_)
 
+        records = getattr(estimator, "_records", dict()).get('fit', dict())
+        print('Next here...' , records)
         estimators.append(estimator)
         estimators_features.append(features)
 
@@ -615,6 +647,10 @@ def get_metadata_routing(self):
         )
         return router
 
+    @abstractmethod
+    def _get_estimator(self):
+        """Resolve which estimator to return."""
+
 
 class BaggingClassifier(ClassifierMixin, BaseBagging):
     """A Bagging classifier.
@@ -815,7 +851,21 @@ def __init__(
 
     def _validate_estimator(self):
         """Check the estimator and set the estimator_ attribute."""
-        super()._validate_estimator(default=DecisionTreeClassifier())
+        super()._validate_estimator(default=self._get_estimator())
+
+    def _get_estimator(self):
+        """Resolve which estimator to return (default is DecisionTreeClassifier)"""
+        estimator = self.estimator
+
+        if estimator is None:
+            # we want all classifiers that don't expose a random_state
+            # to be deterministic (and we don't want to expose this one).
+            estimator = DecisionTreeClassifier()
+
+            if _routing_enabled():
+                estimator.set_fit_request(sample_weight=True)
+
+        return estimator
 
     def _set_oob_score(self, X, y):
         n_samples = y.shape[0]
@@ -1282,7 +1332,7 @@ def predict(self, X):
 
     def _validate_estimator(self):
         """Check the estimator and set the estimator_ attribute."""
-        super()._validate_estimator(default=DecisionTreeRegressor())
+        super()._validate_estimator(default=self._get_estimator())
 
     def _set_oob_score(self, X, y):
         n_samples = y.shape[0]
@@ -1318,3 +1368,15 @@ def _more_tags(self):
         else:
             estimator = self.estimator
         return {"allow_nan": _safe_tags(estimator, "allow_nan")}
+
+    def _get_estimator(self):
+        """Resolve which estimator to return (default is DecisionTreeClassifier)"""
+        estimator = self.estimator
+
+        if estimator is None:
+            # we want all classifiers that don't expose a random_state
+            # to be deterministic (and we don't want to expose this one).
+            estimator = DecisionTreeRegressor()
+            if _routing_enabled():
+                estimator.set_fit_request(sample_weight=True)
+        return estimator
diff --git a/sklearn/ensemble/tests/test_bagging.py b/sklearn/ensemble/tests/test_bagging.py
index 3b755edd5f41b..737efb06e71e1 100644
--- a/sklearn/ensemble/tests/test_bagging.py
+++ b/sklearn/ensemble/tests/test_bagging.py
@@ -38,6 +38,8 @@
 from sklearn.utils import check_random_state
 from sklearn.utils._testing import assert_array_almost_equal, assert_array_equal
 from sklearn.utils.fixes import CSC_CONTAINERS, CSR_CONTAINERS
+from sklearn.utils.validation import has_fit_parameter
+
 
 rng = check_random_state(0)
 
@@ -993,6 +995,7 @@ def test_metadata_routing_for_bagging_estimators(Estimator, BaseEst, prop):
         estimator=BaseEst(registry=_Registry()).set_fit_request(**{prop: True})
     )
 
+    print({prop: sample_weight if prop == "sample_weight" else metadata})
     est.fit(X, y, **{prop: sample_weight if prop == "sample_weight" else metadata})
 
     for estimator in est.estimators_:
@@ -1003,11 +1006,12 @@ def test_metadata_routing_for_bagging_estimators(Estimator, BaseEst, prop):
         registry = estimator.registry
         assert len(registry)
         for sub_est in registry:
-            check_recorded_metadata(
-                obj=sub_est,
-                method="fit",
-                **kwargs,
-            )
+            if not has_fit_parameter(sub_est, "sample_weight"):
+                check_recorded_metadata(
+                    obj=sub_est,
+                    method="fit",
+                    **kwargs,
+                )
 
 
 @pytest.mark.usefixtures("enable_slep006")

From 9558c18697ebbffefee921be17e19db6eff08099 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Thu, 15 Feb 2024 23:06:31 -0500
Subject: [PATCH 06/22] Fix lint' -s

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 sklearn/ensemble/_bagging.py           | 11 ++++++-----
 sklearn/ensemble/tests/test_bagging.py |  1 -
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/sklearn/ensemble/_bagging.py b/sklearn/ensemble/_bagging.py
index 8749c741922b6..bd8156f7f687e 100644
--- a/sklearn/ensemble/_bagging.py
+++ b/sklearn/ensemble/_bagging.py
@@ -30,8 +30,8 @@
     MethodMapping,
     _raise_for_params,
     _routing_enabled,
-    process_routing,
     get_routing_for_object,
+    process_routing,
 )
 from ..utils.metaestimators import available_if
 from ..utils.multiclass import check_classification_targets
@@ -146,7 +146,7 @@ def _parallel_build_estimators(
         random_state = seeds[i]
         estimator = ensemble._make_estimator(append=False, random_state=random_state)
 
-        records = getattr(estimator, "_records", dict()).get('fit', dict())
+        records = getattr(estimator, "_records", dict()).get("fit", dict())
         if has_check_input:
             estimator_fit = partial(estimator.fit, check_input=check_input)
         else:
@@ -187,7 +187,8 @@ def _parallel_build_estimators(
             else:
                 estimator_fit(X_, y, **fit_params_)
         else:
-            # if sample_weight is not supported in the Bagging estimator, pop it from fit_params
+            # if sample_weight is not supported in the Bagging estimator, pop it
+            # from fit_params
             fit_params_.pop("sample_weight", None)
 
             # cannot use sample_weight, use indexing
@@ -198,8 +199,8 @@ def _parallel_build_estimators(
                 X_ = X_[:, features]
             estimator_fit(X_, y_, **fit_params_)
 
-        records = getattr(estimator, "_records", dict()).get('fit', dict())
-        print('Next here...' , records)
+        records = getattr(estimator, "_records", dict()).get("fit", dict())
+        print("Next here...", records)
         estimators.append(estimator)
         estimators_features.append(features)
 
diff --git a/sklearn/ensemble/tests/test_bagging.py b/sklearn/ensemble/tests/test_bagging.py
index 737efb06e71e1..01bf2991eabd3 100644
--- a/sklearn/ensemble/tests/test_bagging.py
+++ b/sklearn/ensemble/tests/test_bagging.py
@@ -40,7 +40,6 @@
 from sklearn.utils.fixes import CSC_CONTAINERS, CSR_CONTAINERS
 from sklearn.utils.validation import has_fit_parameter
 
-
 rng = check_random_state(0)
 
 # also load the iris dataset

From fc77399ee6cddc5aa8e874d1741b82a1c4bc82d1 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Fri, 16 Feb 2024 10:06:36 -0500
Subject: [PATCH 07/22] Fix iforest and print statements

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 sklearn/ensemble/_bagging.py |  1 -
 sklearn/ensemble/_iforest.py | 12 +++++++++---
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/sklearn/ensemble/_bagging.py b/sklearn/ensemble/_bagging.py
index bd8156f7f687e..e702bb91682c2 100644
--- a/sklearn/ensemble/_bagging.py
+++ b/sklearn/ensemble/_bagging.py
@@ -200,7 +200,6 @@ def _parallel_build_estimators(
             estimator_fit(X_, y_, **fit_params_)
 
         records = getattr(estimator, "_records", dict()).get("fit", dict())
-        print("Next here...", records)
         estimators.append(estimator)
         estimators_features.append(features)
 
diff --git a/sklearn/ensemble/_iforest.py b/sklearn/ensemble/_iforest.py
index c975f121798f0..870bd9cfd9b6c 100644
--- a/sklearn/ensemble/_iforest.py
+++ b/sklearn/ensemble/_iforest.py
@@ -232,9 +232,7 @@ def __init__(
         warm_start=False,
     ):
         super().__init__(
-            estimator=ExtraTreeRegressor(
-                max_features=1, splitter="random", random_state=random_state
-            ),
+            estimator=None,
             # here above max_features has no links with self.max_features
             bootstrap=bootstrap,
             bootstrap_features=False,
@@ -249,6 +247,14 @@ def __init__(
 
         self.contamination = contamination
 
+    def _get_estimator(self):
+        return ExtraTreeRegressor(
+            # here max_features has no links with self.max_features
+            max_features=1,
+            splitter="random",
+            random_state=self.random_state,
+        )
+
     def _set_oob_score(self, X, y):
         raise NotImplementedError("OOB score not supported by iforest")
 

From e7a392cf1f389bff590cf0278057245e7d900ecd Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Fri, 16 Feb 2024 10:10:23 -0500
Subject: [PATCH 08/22] Fix lint

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 sklearn/ensemble/_bagging.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/ensemble/_bagging.py b/sklearn/ensemble/_bagging.py
index e702bb91682c2..cf9ec3a3bd460 100644
--- a/sklearn/ensemble/_bagging.py
+++ b/sklearn/ensemble/_bagging.py
@@ -146,7 +146,7 @@ def _parallel_build_estimators(
         random_state = seeds[i]
         estimator = ensemble._make_estimator(append=False, random_state=random_state)
 
-        records = getattr(estimator, "_records", dict()).get("fit", dict())
+        getattr(estimator, "_records", dict()).get("fit", dict())
         if has_check_input:
             estimator_fit = partial(estimator.fit, check_input=check_input)
         else:
@@ -199,7 +199,7 @@ def _parallel_build_estimators(
                 X_ = X_[:, features]
             estimator_fit(X_, y_, **fit_params_)
 
-        records = getattr(estimator, "_records", dict()).get("fit", dict())
+        getattr(estimator, "_records", dict()).get("fit", dict())
         estimators.append(estimator)
         estimators_features.append(features)
 

From 9d23eb2f5ca74b6242188335424c9ed1ff786c97 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Fri, 16 Feb 2024 10:29:12 -0500
Subject: [PATCH 09/22] Fix bagging and isolation forest

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 sklearn/ensemble/_bagging.py | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/sklearn/ensemble/_bagging.py b/sklearn/ensemble/_bagging.py
index cf9ec3a3bd460..134c5c4bde7c9 100644
--- a/sklearn/ensemble/_bagging.py
+++ b/sklearn/ensemble/_bagging.py
@@ -469,7 +469,7 @@ def _fit(
         y = self._validate_y(y)
 
         # Check parameters
-        self._validate_estimator()
+        self._validate_estimator(self._get_estimator())
 
         if _routing_enabled():
             routed_params = process_routing(
@@ -849,10 +849,6 @@ def __init__(
             verbose=verbose,
         )
 
-    def _validate_estimator(self):
-        """Check the estimator and set the estimator_ attribute."""
-        super()._validate_estimator(default=self._get_estimator())
-
     def _get_estimator(self):
         """Resolve which estimator to return (default is DecisionTreeClassifier)"""
         estimator = self.estimator
@@ -1330,10 +1326,6 @@ def predict(self, X):
 
         return y_hat
 
-    def _validate_estimator(self):
-        """Check the estimator and set the estimator_ attribute."""
-        super()._validate_estimator(default=self._get_estimator())
-
     def _set_oob_score(self, X, y):
         n_samples = y.shape[0]
 

From f0a93e58c5a23e4fcc2b1b6169f17244858b0ea4 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Sat, 17 Feb 2024 12:17:03 -0500
Subject: [PATCH 10/22] Fix test

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 sklearn/ensemble/_bagging.py           | 50 ++++++++++----------------
 sklearn/ensemble/tests/test_bagging.py | 26 ++++++++++++--
 2 files changed, 43 insertions(+), 33 deletions(-)

diff --git a/sklearn/ensemble/_bagging.py b/sklearn/ensemble/_bagging.py
index 134c5c4bde7c9..ddeb299b6e2bb 100644
--- a/sklearn/ensemble/_bagging.py
+++ b/sklearn/ensemble/_bagging.py
@@ -164,10 +164,17 @@ def _parallel_build_estimators(
         )
 
         fit_params_ = fit_params.copy()
+
         # TODO(SLEP6): remove if condition for unrouted sample_weight when metadata
         # routing can't be disabled.
+        # 1. If routing is not enabled, we will check if the base
+        # estimator supports sample_weight and use it if it does.
+        # 2. If routing is enabled, we will check if the routing supports sample
+        # weight and use it if it does.
         # Draw samples, using sample weights, and then fit
-        if support_sample_weight:
+        if (_routing_enabled() and "sample_weight" in fit_params_) or (
+            not _routing_enabled() and support_sample_weight
+        ):
             # row subsampling via sample_weight
             curr_sample_weight = _check_sample_weight(
                 fit_params_.pop("sample_weight", None), X
@@ -182,15 +189,8 @@ def _parallel_build_estimators(
 
             fit_params_["sample_weight"] = curr_sample_weight
             X_ = X[:, features] if requires_feature_indexing else X
-            if not _routing_enabled():
-                estimator_fit(X_, y, sample_weight=curr_sample_weight)
-            else:
-                estimator_fit(X_, y, **fit_params_)
+            estimator_fit(X_, y, **fit_params_)
         else:
-            # if sample_weight is not supported in the Bagging estimator, pop it
-            # from fit_params
-            fit_params_.pop("sample_weight", None)
-
             # cannot use sample_weight, use indexing
             y_ = _safe_indexing(y, indices)
             X_ = _safe_indexing(X, indices)
@@ -401,9 +401,11 @@ def fit(self, X, y, *, sample_weight=None, **fit_params):
             multi_output=True,
         )
 
-        return self._fit(
-            X, y, self.max_samples, sample_weight=sample_weight, **fit_params
-        )
+        if sample_weight is not None:
+            sample_weight = _check_sample_weight(sample_weight, X, dtype=None)
+            fit_params["sample_weight"] = sample_weight
+
+        return self._fit(X, y, max_samples=self.max_samples, **fit_params)
 
     def _parallel_args(self):
         return {}
@@ -414,7 +416,6 @@ def _fit(
         y,
         max_samples=None,
         max_depth=None,
-        sample_weight=None,
         check_input=True,
         **fit_params,
     ):
@@ -438,11 +439,6 @@ def _fit(
             Override value used when constructing base estimator. Only
             supported if the base estimator has a max_depth parameter.
 
-        sample_weight : array-like of shape (n_samples,), default=None
-            Sample weights. If None, then samples are equally weighted.
-            Note that this is supported only if the base estimator supports
-            sample weighting.
-
         check_input : bool, default=True
             Override value used when fitting base estimator. Only supported
             if the base estimator has a check_input parameter for fit function.
@@ -460,9 +456,6 @@ def _fit(
         """
         random_state = check_random_state(self.random_state)
 
-        if sample_weight is not None:
-            sample_weight = _check_sample_weight(sample_weight, X, dtype=None)
-
         # Remap output
         n_samples = X.shape[0]
         self._n_samples = n_samples
@@ -472,14 +465,14 @@ def _fit(
         self._validate_estimator(self._get_estimator())
 
         if _routing_enabled():
-            routed_params = process_routing(
-                self, "fit", sample_weight=sample_weight, **fit_params
-            )
+            routed_params = process_routing(self, "fit", **fit_params)
         else:
             routed_params = Bunch()
             routed_params.estimator = Bunch(fit=fit_params)
-            if "sample_weight" != None:
-                routed_params.estimator.fit["sample_weight"] = sample_weight
+            if "sample_weight" in fit_params:
+                routed_params.estimator.fit["sample_weight"] = fit_params[
+                    "sample_weight"
+                ]
 
         if max_depth is not None:
             self.estimator_.max_depth = max_depth
@@ -858,9 +851,6 @@ def _get_estimator(self):
             # to be deterministic (and we don't want to expose this one).
             estimator = DecisionTreeClassifier()
 
-            if _routing_enabled():
-                estimator.set_fit_request(sample_weight=True)
-
         return estimator
 
     def _set_oob_score(self, X, y):
@@ -1369,6 +1359,4 @@ def _get_estimator(self):
             # we want all classifiers that don't expose a random_state
             # to be deterministic (and we don't want to expose this one).
             estimator = DecisionTreeRegressor()
-            if _routing_enabled():
-                estimator.set_fit_request(sample_weight=True)
         return estimator
diff --git a/sklearn/ensemble/tests/test_bagging.py b/sklearn/ensemble/tests/test_bagging.py
index 01bf2991eabd3..09d5c8f994004 100644
--- a/sklearn/ensemble/tests/test_bagging.py
+++ b/sklearn/ensemble/tests/test_bagging.py
@@ -978,6 +978,24 @@ def test_get_metadata_routing_without_fit(Estimator, BaseEst):
     est.get_metadata_routing()
 
 
+@pytest.mark.usefixtures("enable_slep006")
+@pytest.mark.parametrize(
+    "Estimator",
+    [BaggingClassifier, BaggingRegressor],
+)
+def test_get_metadata_routing_with_default_estimator(Estimator):
+    """Test that metadata routing is off by default estimator."""
+    X = np.array([[0, 1], [2, 2], [4, 6]])
+    y = [1, 2, 3]
+    sample_weight = [1.0, 1.0, 1.0]
+    est = Estimator(estimator=None, n_estimators=2, random_state=0)
+
+    with pytest.raises(
+        TypeError, match="got unexpected argument\(s\) {'sample_weight'}"
+    ):
+        est.fit(X, y, sample_weight=sample_weight)
+
+
 @pytest.mark.usefixtures("enable_slep006")
 @pytest.mark.parametrize(
     "Estimator, BaseEst",
@@ -994,7 +1012,6 @@ def test_metadata_routing_for_bagging_estimators(Estimator, BaseEst, prop):
         estimator=BaseEst(registry=_Registry()).set_fit_request(**{prop: True})
     )
 
-    print({prop: sample_weight if prop == "sample_weight" else metadata})
     est.fit(X, y, **{prop: sample_weight if prop == "sample_weight" else metadata})
 
     for estimator in est.estimators_:
@@ -1005,7 +1022,12 @@ def test_metadata_routing_for_bagging_estimators(Estimator, BaseEst, prop):
         registry = estimator.registry
         assert len(registry)
         for sub_est in registry:
-            if not has_fit_parameter(sub_est, "sample_weight"):
+            # When sample weight is passed in, the sub-estimators use
+            # bootstrapping to sample from the dataset, so we can't check
+            # that sample weight is the same within all sub-estimators.
+            #
+            # Note: We can check all other metadata is the same though.
+            if prop != "sample_weight":
                 check_recorded_metadata(
                     obj=sub_est,
                     method="fit",

From 294c8f179c0737a3e003a62fba6f28cda8541a2e Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Sat, 17 Feb 2024 12:17:21 -0500
Subject: [PATCH 11/22] Fix lint

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 sklearn/ensemble/tests/test_bagging.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sklearn/ensemble/tests/test_bagging.py b/sklearn/ensemble/tests/test_bagging.py
index 09d5c8f994004..6f15dc95b7761 100644
--- a/sklearn/ensemble/tests/test_bagging.py
+++ b/sklearn/ensemble/tests/test_bagging.py
@@ -38,7 +38,6 @@
 from sklearn.utils import check_random_state
 from sklearn.utils._testing import assert_array_almost_equal, assert_array_equal
 from sklearn.utils.fixes import CSC_CONTAINERS, CSR_CONTAINERS
-from sklearn.utils.validation import has_fit_parameter
 
 rng = check_random_state(0)
 
@@ -991,7 +990,7 @@ def test_get_metadata_routing_with_default_estimator(Estimator):
     est = Estimator(estimator=None, n_estimators=2, random_state=0)
 
     with pytest.raises(
-        TypeError, match="got unexpected argument\(s\) {'sample_weight'}"
+        TypeError, match=r"got unexpected argument\(s\) {'sample_weight'}"
     ):
         est.fit(X, y, sample_weight=sample_weight)
 

From a675a658929b31ccea0b57f0cd40f944756e3970 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Mon, 19 Feb 2024 10:10:53 -0500
Subject: [PATCH 12/22] Apply suggestions from code review

Co-authored-by: Adrin Jalali <adrin.jalali@gmail.com>
---
 sklearn/ensemble/_bagging.py | 19 +++----------------
 1 file changed, 3 insertions(+), 16 deletions(-)

diff --git a/sklearn/ensemble/_bagging.py b/sklearn/ensemble/_bagging.py
index ddeb299b6e2bb..a75460e9553fa 100644
--- a/sklearn/ensemble/_bagging.py
+++ b/sklearn/ensemble/_bagging.py
@@ -635,7 +635,7 @@ def get_metadata_routing(self):
         """
         router = MetadataRouter(owner=self.__class__.__name__)
         router.add(
-            estimator=self.estimator,
+            estimator=self._get_estimator(),
             method_mapping=MethodMapping().add(callee="fit", caller="fit"),
         )
         return router
@@ -844,14 +844,7 @@ def __init__(
 
     def _get_estimator(self):
         """Resolve which estimator to return (default is DecisionTreeClassifier)"""
-        estimator = self.estimator
-
-        if estimator is None:
-            # we want all classifiers that don't expose a random_state
-            # to be deterministic (and we don't want to expose this one).
-            estimator = DecisionTreeClassifier()
-
-        return estimator
+    return self.estimator or DecisionTreeClassifier()
 
     def _set_oob_score(self, X, y):
         n_samples = y.shape[0]
@@ -1353,10 +1346,4 @@ def _more_tags(self):
 
     def _get_estimator(self):
         """Resolve which estimator to return (default is DecisionTreeClassifier)"""
-        estimator = self.estimator
-
-        if estimator is None:
-            # we want all classifiers that don't expose a random_state
-            # to be deterministic (and we don't want to expose this one).
-            estimator = DecisionTreeRegressor()
-        return estimator
+    return self.estimator or DecisionTreeRegressor()

From 134a60f548b7b6eea3eb51303d32bd6af8390fab Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Mon, 19 Feb 2024 10:56:19 -0500
Subject: [PATCH 13/22] Address adrin comments

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 doc/whats_new/v1.5.rst                        | 11 +++---
 sklearn/ensemble/_bagging.py                  | 39 +++++++------------
 sklearn/ensemble/tests/test_bagging.py        |  3 +-
 .../_middle_term_computer.pyx.tp              |  8 +---
 .../_radius_neighbors.pyx.tp                  |  7 +---
 .../test_metaestimators_metadata_routing.py   | 18 +++++++++
 6 files changed, 42 insertions(+), 44 deletions(-)

diff --git a/doc/whats_new/v1.5.rst b/doc/whats_new/v1.5.rst
index d9a196aba349e..9ff336807449d 100644
--- a/doc/whats_new/v1.5.rst
+++ b/doc/whats_new/v1.5.rst
@@ -42,6 +42,11 @@ more details.
 - |Feature| :class:`impute.IterativeImputer` now supports metadata routing in
   its `fit` method. :pr:`28187` by :user:`Stefanie Senger <StefanieSenger>`.
 
+- |Feature| :class:`ensemble.BaggingClassifier` and :class:`ensemble.BaggingRegressor`
+  now supports metadata routing. The fit methods now
+  accepts ``**fit_params`` which are passed to the underlying estimators
+  via their `fit` methods.
+  :pr:`28432` by :user:`Adam Li <adam2392>` and :user:`Benjamin Bossan <BenjaminBossan>`.
 
 Changelog
 ---------
@@ -83,12 +88,6 @@ Changelog
   pre-sorting the data before finding the thresholds for binning.
   :pr:`28102` by :user:`Christian Lorentzen <lorentzenchr>`.
 
-- |Enhancement| :class:`ensemble.BaggingClassifier` and :class:`ensemble.BaggingRegressor`
-  now supports metadata routing. The fit methods now
-  accepts ``**fit_params`` which are passed to the underlying estimators
-  via their `fit` methods.
-  :pr:`28432` by :user:`Adam Li <adam2392>`.
-
 :mod:`sklearn.feature_extraction`
 .................................
 
diff --git a/sklearn/ensemble/_bagging.py b/sklearn/ensemble/_bagging.py
index a75460e9553fa..cfb9621b3d35d 100644
--- a/sklearn/ensemble/_bagging.py
+++ b/sklearn/ensemble/_bagging.py
@@ -88,14 +88,6 @@ def _generate_bagging_indices(
     return feature_indices, sample_indices
 
 
-def _supports_sample_weight(request_or_router):
-    """Check if the fit method supports sample_weight"""
-    param_names = request_or_router._get_param_names(
-        method="fit", return_alias=True, ignore_self_request=False
-    )
-    return "sample_weight" in param_names
-
-
 def _parallel_build_estimators(
     n_estimators,
     ensemble,
@@ -124,18 +116,16 @@ def _parallel_build_estimators(
 
     request_or_router = get_routing_for_object(ensemble.estimator_)
 
-    if (
-        not _supports_sample_weight(request_or_router)
-        and fit_params.get("sample_weight") is not None
+    # TODO: (slep6) remove if condition for unrouted sample_weight when metadata
+    # routing can't be disabled.
+    if not _routing_enabled() and (
+        not support_sample_weight and fit_params.get("sample_weight") is not None
     ):
         raise ValueError(
             "The base estimator doesn't support sample weight, but sample_weight is "
             "passed to the fit method."
         )
 
-    # Row sampling can be achieved either through setting sample_weight or
-    # by indexing. The former is more efficient. Therefore, use this method
-    # if possible, otherwise use indexing.
     for i in range(n_estimators):
         if verbose > 1:
             print(
@@ -146,7 +136,6 @@ def _parallel_build_estimators(
         random_state = seeds[i]
         estimator = ensemble._make_estimator(append=False, random_state=random_state)
 
-        getattr(estimator, "_records", dict()).get("fit", dict())
         if has_check_input:
             estimator_fit = partial(estimator.fit, check_input=check_input)
         else:
@@ -171,11 +160,14 @@ def _parallel_build_estimators(
         # estimator supports sample_weight and use it if it does.
         # 2. If routing is enabled, we will check if the routing supports sample
         # weight and use it if it does.
-        # Draw samples, using sample weights, and then fit
-        if (_routing_enabled() and "sample_weight" in fit_params_) or (
-            not _routing_enabled() and support_sample_weight
-        ):
-            # row subsampling via sample_weight
+
+        # Note: Row sampling can be achieved either through setting sample_weight or
+        # by indexing. The former is more efficient. Therefore, use this method
+        # if possible, otherwise use indexing.
+        if (
+            _routing_enabled() and request_or_router.consumes("fit", ("sample_weight",))
+        ) or (not _routing_enabled() and support_sample_weight):
+            # Draw sub samples, using sample weights, and then fit
             curr_sample_weight = _check_sample_weight(
                 fit_params_.pop("sample_weight", None), X
             ).copy()
@@ -191,7 +183,7 @@ def _parallel_build_estimators(
             X_ = X[:, features] if requires_feature_indexing else X
             estimator_fit(X_, y, **fit_params_)
         else:
-            # cannot use sample_weight, use indexing
+            # cannot use sample_weight, so use indexing
             y_ = _safe_indexing(y, indices)
             X_ = _safe_indexing(X, indices)
             fit_params_ = _check_method_params(X, params=fit_params_, indices=indices)
@@ -199,7 +191,6 @@ def _parallel_build_estimators(
                 X_ = X_[:, features]
             estimator_fit(X_, y_, **fit_params_)
 
-        getattr(estimator, "_records", dict()).get("fit", dict())
         estimators.append(estimator)
         estimators_features.append(features)
 
@@ -844,7 +835,7 @@ def __init__(
 
     def _get_estimator(self):
         """Resolve which estimator to return (default is DecisionTreeClassifier)"""
-    return self.estimator or DecisionTreeClassifier()
+        return self.estimator or DecisionTreeClassifier()
 
     def _set_oob_score(self, X, y):
         n_samples = y.shape[0]
@@ -1346,4 +1337,4 @@ def _more_tags(self):
 
     def _get_estimator(self):
         """Resolve which estimator to return (default is DecisionTreeClassifier)"""
-    return self.estimator or DecisionTreeRegressor()
+        return self.estimator or DecisionTreeRegressor()
diff --git a/sklearn/ensemble/tests/test_bagging.py b/sklearn/ensemble/tests/test_bagging.py
index 6f15dc95b7761..a8f0f4f973d65 100644
--- a/sklearn/ensemble/tests/test_bagging.py
+++ b/sklearn/ensemble/tests/test_bagging.py
@@ -990,7 +990,8 @@ def test_get_metadata_routing_with_default_estimator(Estimator):
     est = Estimator(estimator=None, n_estimators=2, random_state=0)
 
     with pytest.raises(
-        TypeError, match=r"got unexpected argument\(s\) {'sample_weight'}"
+        Exception,
+        match=r"\[sample_weight\] are passed but are not explicitly set as requested",
     ):
         est.fit(X, y, sample_weight=sample_weight)
 
diff --git a/sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pyx.tp
index bfe465394c4d2..1fca2d674720c 100644
--- a/sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pyx.tp
+++ b/sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pyx.tp
@@ -14,6 +14,7 @@ implementation_specific_values = [
 
 }}
 from libcpp.vector cimport vector
+from libcpp.algorithm cimport fill
 
 from ...utils._cython_blas cimport (
   BLAS_Order,
@@ -25,13 +26,6 @@ from ...utils._cython_blas cimport (
 )
 from ...utils._typedefs cimport float64_t, float32_t, int32_t, intp_t
 
-# TODO: change for `libcpp.algorithm.fill` once Cython 3 is used
-# Introduction in Cython:
-#
-# https://github.com/cython/cython/blob/05059e2a9b89bf6738a7750b905057e5b1e3fe2e/Cython/Includes/libcpp/algorithm.pxd#L50 #noqa
-cdef extern from "<algorithm>" namespace "std" nogil:
-    void fill[Iter, T](Iter first, Iter last, const T& value) except + #noqa
-
 import numpy as np
 from scipy.sparse import issparse, csr_matrix
 
diff --git a/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp
index 8695baad172e0..cd25b010cc81b 100644
--- a/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp
+++ b/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp
@@ -4,6 +4,7 @@ import warnings
 
 from libcpp.memory cimport shared_ptr, make_shared
 from libcpp.vector cimport vector
+from libcpp.algorithm cimport move
 from cython cimport final
 from cython.operator cimport dereference as deref
 from cython.parallel cimport parallel, prange
@@ -19,12 +20,6 @@ from ...utils.fixes import threadpool_limits
 
 cnp.import_array()
 
-# TODO: change for `libcpp.algorithm.move` once Cython 3 is used
-# Introduction in Cython:
-# https://github.com/cython/cython/blob/05059e2a9b89bf6738a7750b905057e5b1e3fe2e/Cython/Includes/libcpp/algorithm.pxd#L47 #noqa
-cdef extern from "<algorithm>" namespace "std" nogil:
-    OutputIt move[InputIt, OutputIt](InputIt first, InputIt last, OutputIt d_first) except + #noqa
-
 ######################
 
 cdef cnp.ndarray[object, ndim=1] coerce_vectors_to_nd_arrays(
diff --git a/sklearn/tests/test_metaestimators_metadata_routing.py b/sklearn/tests/test_metaestimators_metadata_routing.py
index 880b6217ba38f..9d03e2dec7052 100644
--- a/sklearn/tests/test_metaestimators_metadata_routing.py
+++ b/sklearn/tests/test_metaestimators_metadata_routing.py
@@ -12,6 +12,8 @@
 from sklearn.ensemble import (
     AdaBoostClassifier,
     AdaBoostRegressor,
+    BaggingClassifier,
+    BaggingRegressor,
     StackingClassifier,
     StackingRegressor,
     VotingClassifier,
@@ -296,6 +298,22 @@ def enable_slep006():
         "y": y,
         "estimator_routing_methods": ["fit"],
     },
+    {
+        "metaestimator": BaggingClassifier,
+        "estimator_name": "estimator",
+        "estimator": ConsumingClassifier,
+        "X": X,
+        "y": y,
+        "estimator_routing_methods": ["fit"],
+    },
+    {
+        "metaestimator": BaggingRegressor,
+        "estimator_name": "estimator",
+        "estimator": ConsumingRegressor,
+        "X": X,
+        "y": y,
+        "estimator_routing_methods": ["fit"],
+    },
 ]
 """List containing all metaestimators to be tested and their settings
 

From 2ffd7b7616116a15d768556b25078ab610d5f606 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Mon, 19 Feb 2024 17:53:15 -0500
Subject: [PATCH 14/22] Address adrin's comment

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 sklearn/ensemble/_bagging.py                  |  2 +-
 sklearn/ensemble/tests/test_bagging.py        | 84 ++-----------------
 .../test_metaestimators_metadata_routing.py   |  2 +
 3 files changed, 8 insertions(+), 80 deletions(-)

diff --git a/sklearn/ensemble/_bagging.py b/sklearn/ensemble/_bagging.py
index cfb9621b3d35d..41205aa3e6d22 100644
--- a/sklearn/ensemble/_bagging.py
+++ b/sklearn/ensemble/_bagging.py
@@ -106,7 +106,6 @@ def _parallel_build_estimators(
     max_samples = ensemble._max_samples
     bootstrap = ensemble.bootstrap
     bootstrap_features = ensemble.bootstrap_features
-    support_sample_weight = has_fit_parameter(ensemble.estimator_, "sample_weight")
     has_check_input = has_fit_parameter(ensemble.estimator_, "check_input")
     requires_feature_indexing = bootstrap_features or max_features != n_features
 
@@ -118,6 +117,7 @@ def _parallel_build_estimators(
 
     # TODO: (slep6) remove if condition for unrouted sample_weight when metadata
     # routing can't be disabled.
+    support_sample_weight = has_fit_parameter(ensemble.estimator_, "sample_weight")
     if not _routing_enabled() and (
         not support_sample_weight and fit_params.get("sample_weight") is not None
     ):
diff --git a/sklearn/ensemble/tests/test_bagging.py b/sklearn/ensemble/tests/test_bagging.py
index a8f0f4f973d65..b24f0ebb25ee8 100644
--- a/sklearn/ensemble/tests/test_bagging.py
+++ b/sklearn/ensemble/tests/test_bagging.py
@@ -4,7 +4,6 @@
 
 # Author: Gilles Louppe
 # License: BSD 3 clause
-import re
 from itertools import cycle, product
 
 import joblib
@@ -945,57 +944,6 @@ def test_bagging_allow_nan_tag(bagging, expected_allow_nan):
     assert bagging._get_tags()["allow_nan"] == expected_allow_nan
 
 
-# Metadata Routing Tests
-# ======================
-
-
-@pytest.mark.parametrize(
-    "Estimator, BaseEst",
-    [(BaggingClassifier, ConsumingClassifier), (BaggingRegressor, ConsumingRegressor)],
-)
-def test_routing_passed_metadata_not_supported(Estimator, BaseEst):
-    """Test that the right error message is raised when metadata is passed while
-    not supported when `enable_metadata_routing=False`."""
-
-    X = np.array([[0, 1], [2, 2], [4, 6]])
-    y = [1, 2, 3]
-
-    with pytest.raises(
-        ValueError, match="is only supported if enable_metadata_routing=True"
-    ):
-        Estimator(estimator=BaseEst()).fit(X, y, sample_weight=[1, 1, 1], metadata="a")
-
-
-@pytest.mark.usefixtures("enable_slep006")
-@pytest.mark.parametrize(
-    "Estimator, BaseEst",
-    [(BaggingClassifier, ConsumingClassifier), (BaggingRegressor, ConsumingRegressor)],
-)
-def test_get_metadata_routing_without_fit(Estimator, BaseEst):
-    # Test that metadata_routing() doesn't raise when called before fit.
-    est = Estimator(estimator=BaseEst())
-    est.get_metadata_routing()
-
-
-@pytest.mark.usefixtures("enable_slep006")
-@pytest.mark.parametrize(
-    "Estimator",
-    [BaggingClassifier, BaggingRegressor],
-)
-def test_get_metadata_routing_with_default_estimator(Estimator):
-    """Test that metadata routing is off by default estimator."""
-    X = np.array([[0, 1], [2, 2], [4, 6]])
-    y = [1, 2, 3]
-    sample_weight = [1.0, 1.0, 1.0]
-    est = Estimator(estimator=None, n_estimators=2, random_state=0)
-
-    with pytest.raises(
-        Exception,
-        match=r"\[sample_weight\] are passed but are not explicitly set as requested",
-    ):
-        est.fit(X, y, sample_weight=sample_weight)
-
-
 @pytest.mark.usefixtures("enable_slep006")
 @pytest.mark.parametrize(
     "Estimator, BaseEst",
@@ -1003,7 +951,11 @@ def test_get_metadata_routing_with_default_estimator(Estimator):
 )
 @pytest.mark.parametrize("prop", ["sample_weight", "metadata"])
 def test_metadata_routing_for_bagging_estimators(Estimator, BaseEst, prop):
-    """Test that metadata is routed correctly for Bagging*."""
+    """Test that metadata is routed correctly for Bagging*.
+
+    Bagging differs in that it should preserve all meta-data except for
+    ``sample_weight``.
+    """
     X = np.array([[0, 1], [2, 2], [4, 6]])
     y = [1, 2, 3]
     sample_weight, metadata = [1.0, 1.0, 1.0], "a"
@@ -1033,29 +985,3 @@ def test_metadata_routing_for_bagging_estimators(Estimator, BaseEst, prop):
                     method="fit",
                     **kwargs,
                 )
-
-
-@pytest.mark.usefixtures("enable_slep006")
-@pytest.mark.parametrize(
-    "Estimator, BaseEst",
-    [(BaggingClassifier, ConsumingClassifier), (BaggingClassifier, ConsumingRegressor)],
-)
-def test_metadata_routing_error_for_bagging_estimators(Estimator, BaseEst):
-    """Test that the right error is raised when metadata is not requested."""
-    X = np.array([[0, 1], [2, 2], [4, 6]])
-    y = [1, 2, 3]
-    sample_weight, metadata = [1, 1, 1], "a"
-
-    est = Estimator(estimator=BaseEst())
-
-    error_message = (
-        "[sample_weight, metadata] are passed but are not explicitly set as requested"
-        f" or not for {BaseEst.__name__}.fit"
-    )
-
-    with pytest.raises(ValueError, match=re.escape(error_message)):
-        est.fit(X, y, sample_weight=sample_weight, metadata=metadata)
-
-
-# End of Metadata Routing Tests
-# =============================
diff --git a/sklearn/tests/test_metaestimators_metadata_routing.py b/sklearn/tests/test_metaestimators_metadata_routing.py
index 9d03e2dec7052..2cffa5125e3c2 100644
--- a/sklearn/tests/test_metaestimators_metadata_routing.py
+++ b/sklearn/tests/test_metaestimators_metadata_routing.py
@@ -304,6 +304,7 @@ def enable_slep006():
         "estimator": ConsumingClassifier,
         "X": X,
         "y": y,
+        "preserves_metadata": False,
         "estimator_routing_methods": ["fit"],
     },
     {
@@ -312,6 +313,7 @@ def enable_slep006():
         "estimator": ConsumingRegressor,
         "X": X,
         "y": y,
+        "preserves_metadata": False,
         "estimator_routing_methods": ["fit"],
     },
 ]

From e2dbc63d0343bf4bfcb44a8fb250a9c8d9533d1d Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Mon, 19 Feb 2024 18:00:06 -0500
Subject: [PATCH 15/22] clean up test to make pass

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 sklearn/tests/test_metaestimators_metadata_routing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/tests/test_metaestimators_metadata_routing.py b/sklearn/tests/test_metaestimators_metadata_routing.py
index 2cffa5125e3c2..fa0b8045d5f66 100644
--- a/sklearn/tests/test_metaestimators_metadata_routing.py
+++ b/sklearn/tests/test_metaestimators_metadata_routing.py
@@ -1,6 +1,6 @@
 import copy
 import re
-
+from typing import Iterable
 import numpy as np
 import pytest
 

From 00625668fd8ae82d90495952025d4c5a12097e26 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Mon, 19 Feb 2024 19:57:58 -0500
Subject: [PATCH 16/22] Fix lint

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 sklearn/tests/test_metaestimators_metadata_routing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/tests/test_metaestimators_metadata_routing.py b/sklearn/tests/test_metaestimators_metadata_routing.py
index fa0b8045d5f66..2cffa5125e3c2 100644
--- a/sklearn/tests/test_metaestimators_metadata_routing.py
+++ b/sklearn/tests/test_metaestimators_metadata_routing.py
@@ -1,6 +1,6 @@
 import copy
 import re
-from typing import Iterable
+
 import numpy as np
 import pytest
 

From 7dd4fb5dab61c6dfd5e3aac5d29667f9774467a3 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Tue, 20 Feb 2024 10:45:00 -0500
Subject: [PATCH 17/22] Cleanup

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 sklearn/ensemble/tests/test_bagging.py | 49 --------------------------
 1 file changed, 49 deletions(-)

diff --git a/sklearn/ensemble/tests/test_bagging.py b/sklearn/ensemble/tests/test_bagging.py
index b24f0ebb25ee8..2c1e308cee33b 100644
--- a/sklearn/ensemble/tests/test_bagging.py
+++ b/sklearn/ensemble/tests/test_bagging.py
@@ -27,12 +27,6 @@
 from sklearn.preprocessing import FunctionTransformer, scale
 from sklearn.random_projection import SparseRandomProjection
 from sklearn.svm import SVC, SVR
-from sklearn.tests.metadata_routing_common import (
-    ConsumingClassifier,
-    ConsumingRegressor,
-    _Registry,
-    check_recorded_metadata,
-)
 from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
 from sklearn.utils import check_random_state
 from sklearn.utils._testing import assert_array_almost_equal, assert_array_equal
@@ -942,46 +936,3 @@ def fit(self, X, y):
 def test_bagging_allow_nan_tag(bagging, expected_allow_nan):
     """Check that bagging inherits allow_nan tag."""
     assert bagging._get_tags()["allow_nan"] == expected_allow_nan
-
-
-@pytest.mark.usefixtures("enable_slep006")
-@pytest.mark.parametrize(
-    "Estimator, BaseEst",
-    [(BaggingClassifier, ConsumingClassifier), (BaggingRegressor, ConsumingRegressor)],
-)
-@pytest.mark.parametrize("prop", ["sample_weight", "metadata"])
-def test_metadata_routing_for_bagging_estimators(Estimator, BaseEst, prop):
-    """Test that metadata is routed correctly for Bagging*.
-
-    Bagging differs in that it should preserve all meta-data except for
-    ``sample_weight``.
-    """
-    X = np.array([[0, 1], [2, 2], [4, 6]])
-    y = [1, 2, 3]
-    sample_weight, metadata = [1.0, 1.0, 1.0], "a"
-
-    est = Estimator(
-        estimator=BaseEst(registry=_Registry()).set_fit_request(**{prop: True})
-    )
-
-    est.fit(X, y, **{prop: sample_weight if prop == "sample_weight" else metadata})
-
-    for estimator in est.estimators_:
-        if prop == "sample_weight":
-            kwargs = {prop: sample_weight}
-        else:
-            kwargs = {prop: metadata}
-        registry = estimator.registry
-        assert len(registry)
-        for sub_est in registry:
-            # When sample weight is passed in, the sub-estimators use
-            # bootstrapping to sample from the dataset, so we can't check
-            # that sample weight is the same within all sub-estimators.
-            #
-            # Note: We can check all other metadata is the same though.
-            if prop != "sample_weight":
-                check_recorded_metadata(
-                    obj=sub_est,
-                    method="fit",
-                    **kwargs,
-                )

From 926e5ee9c0a1fabc427454b4d9d69d3b22e114f0 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Tue, 20 Feb 2024 10:47:22 -0500
Subject: [PATCH 18/22] Remove diff

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 .../_middle_term_computer.pyx.tp                           | 7 ++++++-
 .../_pairwise_distances_reduction/_radius_neighbors.pyx.tp | 7 ++++++-
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pyx.tp
index 1fca2d674720c..e81e6fa20e877 100644
--- a/sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pyx.tp
+++ b/sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pyx.tp
@@ -14,7 +14,6 @@ implementation_specific_values = [
 
 }}
 from libcpp.vector cimport vector
-from libcpp.algorithm cimport fill
 
 from ...utils._cython_blas cimport (
   BLAS_Order,
@@ -29,6 +28,12 @@ from ...utils._typedefs cimport float64_t, float32_t, int32_t, intp_t
 import numpy as np
 from scipy.sparse import issparse, csr_matrix
 
+# TODO: change for `libcpp.algorithm.fill` once Cython 3 is used
+# Introduction in Cython:
+#
+# https://github.com/cython/cython/blob/05059e2a9b89bf6738a7750b905057e5b1e3fe2e/Cython/Includes/libcpp/algorithm.pxd#L50 #noqa
+cdef extern from "<algorithm>" namespace "std" nogil:
+    void fill[Iter, T](Iter first, Iter last, const T& value) except + #noqa
 
 cdef void _middle_term_sparse_sparse_64(
     const float64_t[:] X_data,
diff --git a/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp
index cd25b010cc81b..8695baad172e0 100644
--- a/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp
+++ b/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp
@@ -4,7 +4,6 @@ import warnings
 
 from libcpp.memory cimport shared_ptr, make_shared
 from libcpp.vector cimport vector
-from libcpp.algorithm cimport move
 from cython cimport final
 from cython.operator cimport dereference as deref
 from cython.parallel cimport parallel, prange
@@ -20,6 +19,12 @@ from ...utils.fixes import threadpool_limits
 
 cnp.import_array()
 
+# TODO: change for `libcpp.algorithm.move` once Cython 3 is used
+# Introduction in Cython:
+# https://github.com/cython/cython/blob/05059e2a9b89bf6738a7750b905057e5b1e3fe2e/Cython/Includes/libcpp/algorithm.pxd#L47 #noqa
+cdef extern from "<algorithm>" namespace "std" nogil:
+    OutputIt move[InputIt, OutputIt](InputIt first, InputIt last, OutputIt d_first) except + #noqa
+
 ######################
 
 cdef cnp.ndarray[object, ndim=1] coerce_vectors_to_nd_arrays(

From 312be7186ca39943f07033e1cccb5c0dddbd9bef Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Tue, 20 Feb 2024 10:47:58 -0500
Subject: [PATCH 19/22] Remove diff

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 .../_middle_term_computer.pyx.tp                           | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pyx.tp
index e81e6fa20e877..bfe465394c4d2 100644
--- a/sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pyx.tp
+++ b/sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pyx.tp
@@ -25,9 +25,6 @@ from ...utils._cython_blas cimport (
 )
 from ...utils._typedefs cimport float64_t, float32_t, int32_t, intp_t
 
-import numpy as np
-from scipy.sparse import issparse, csr_matrix
-
 # TODO: change for `libcpp.algorithm.fill` once Cython 3 is used
 # Introduction in Cython:
 #
@@ -35,6 +32,10 @@ from scipy.sparse import issparse, csr_matrix
 cdef extern from "<algorithm>" namespace "std" nogil:
     void fill[Iter, T](Iter first, Iter last, const T& value) except + #noqa
 
+import numpy as np
+from scipy.sparse import issparse, csr_matrix
+
+
 cdef void _middle_term_sparse_sparse_64(
     const float64_t[:] X_data,
     const int32_t[:] X_indices,

From 5dc9a3d8ffff18a30e8f7ec1bfa345be53a36494 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Wed, 21 Feb 2024 09:05:43 -0500
Subject: [PATCH 20/22] Update v1.5.rst

Co-authored-by: Omar Salman <omar.salman@arbisoft.com>
---
 doc/whats_new/v1.5.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/whats_new/v1.5.rst b/doc/whats_new/v1.5.rst
index 911d72b7a0e01..66394288e60ce 100644
--- a/doc/whats_new/v1.5.rst
+++ b/doc/whats_new/v1.5.rst
@@ -43,7 +43,7 @@ more details.
   its `fit` method. :pr:`28187` by :user:`Stefanie Senger <StefanieSenger>`.
 
 - |Feature| :class:`ensemble.BaggingClassifier` and :class:`ensemble.BaggingRegressor`
-  now supports metadata routing. The fit methods now
+  now support metadata routing. The fit methods now
   accepts ``**fit_params`` which are passed to the underlying estimators
   via their `fit` methods.
   :pr:`28432` by :user:`Adam Li <adam2392>` and :user:`Benjamin Bossan <BenjaminBossan>`.

From 3a3d4441dbdbece0914b1c6c58eaf22c6bd42253 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Wed, 21 Feb 2024 09:05:52 -0500
Subject: [PATCH 21/22] Update v1.5.rst

Co-authored-by: Omar Salman <omar.salman@arbisoft.com>
---
 doc/whats_new/v1.5.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/whats_new/v1.5.rst b/doc/whats_new/v1.5.rst
index 66394288e60ce..8c6623bd7e359 100644
--- a/doc/whats_new/v1.5.rst
+++ b/doc/whats_new/v1.5.rst
@@ -44,7 +44,7 @@ more details.
 
 - |Feature| :class:`ensemble.BaggingClassifier` and :class:`ensemble.BaggingRegressor`
   now support metadata routing. The fit methods now
-  accepts ``**fit_params`` which are passed to the underlying estimators
+  accept ``**fit_params`` which are passed to the underlying estimators
   via their `fit` methods.
   :pr:`28432` by :user:`Adam Li <adam2392>` and :user:`Benjamin Bossan <BenjaminBossan>`.
 

From 01a64e4b235755448662840ebf68b3c569b69337 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Wed, 21 Feb 2024 09:06:11 -0500
Subject: [PATCH 22/22] Update _bagging.py

Co-authored-by: Omar Salman <omar.salman@arbisoft.com>
---
 sklearn/ensemble/_bagging.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sklearn/ensemble/_bagging.py b/sklearn/ensemble/_bagging.py
index 41205aa3e6d22..878e2ea809c01 100644
--- a/sklearn/ensemble/_bagging.py
+++ b/sklearn/ensemble/_bagging.py
@@ -156,10 +156,10 @@ def _parallel_build_estimators(
 
         # TODO(SLEP6): remove if condition for unrouted sample_weight when metadata
         # routing can't be disabled.
-        # 1. If routing is not enabled, we will check if the base
-        # estimator supports sample_weight and use it if it does.
-        # 2. If routing is enabled, we will check if the routing supports sample
+        # 1. If routing is enabled, we will check if the routing supports sample
         # weight and use it if it does.
+        # 2. If routing is not enabled, we will check if the base
+        # estimator supports sample_weight and use it if it does.
 
         # Note: Row sampling can be achieved either through setting sample_weight or
         # by indexing. The former is more efficient. Therefore, use this method