scikit-learn · thomasjpfan · Jan 17, 2024 · Dec 11, 2023 · Dec 11, 2023 · Dec 15, 2023
diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst
@@ -31,6 +31,19 @@ random sampling procedures.
   specified `tol`, for small values you will get more precise results.
   :pr:`26721` by :user:`Christian Lorentzen <lorentzenchr>`.
 
+  .. note::
+
+      The lbfgs is the default solver, so this change might effect many models.
+
+      This change also means that with this new version of scikit-learn, the resulting
+      coefficients `coef_` and `intercept_` of your models will change for these two
+      solvers (when fit on the same data again). The amount of change depends on the
+      specified `tol`, for small values you will get more precise results.
+
+- |Enhancement| :class:`dummy.DummyClassifier` and :class:`dummy.DummyRegressor` now
+  have the `n_features_in_` and `feature_names_in_` attributes after `fit`.
+  :pr:`27937` by :user:`Marco vd Boom <tvdboom>`.
+
 - |Fix| fixes a memory leak seen in PyPy for estimators using the Cython loss functions.
   :pr:`27670` by :user:`Guillaume Lemaitre <glemaitre>`.
 
@@ -381,6 +394,13 @@ Changelog
   version 1.6. Use the default value instead.
   :pr:`27834` by :user:`Guillaume Lemaitre <glemaitre>`.
 
+:mod:`sklearn.dummy`
+.......................
+
+- |Enhancement| :class:`dummy.DummyClassifier` and :class:`dummy.DummyRegressor` now
+  have the `n_features_in_` and `feature_names_in_` attributes after `fit`.
+  :pr:`27937` by :user:`Marco vd Boom <tvdboom>`.
+
 :mod:`sklearn.ensemble`
 .......................
 

diff --git a/sklearn/dummy.py b/sklearn/dummy.py
@@ -110,6 +110,13 @@ class prior probabilities.
         Frequency of each class observed in `y`. For multioutput classification
         problems, this is computed independently for each output.
 
+    n_features_in_ : int
+        Number of features seen during :term:`fit`.
+
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X` has
+        feature names that are all strings.
+
     n_outputs_ : int
         Number of outputs.
 
@@ -170,6 +177,8 @@ def fit(self, X, y, sample_weight=None):
         self : object
             Returns the instance itself.
         """
+        self._validate_data(X, cast_to_ndarray=False)
+
         self._strategy = self.strategy
 
         if self._strategy == "uniform" and sp.issparse(y):
@@ -488,6 +497,13 @@ class DummyRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         Mean or median or quantile of the training targets or constant value
         given by the user.
 
+    n_features_in_ : int
+        Number of features seen during :term:`fit`.
+
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X` has
+        feature names that are all strings.
+
     n_outputs_ : int
         Number of outputs.
 
@@ -545,6 +561,8 @@ def fit(self, X, y, sample_weight=None):
         self : object
             Fitted estimator.
         """
+        self._validate_data(X, cast_to_ndarray=False)
+
         y = check_array(y, ensure_2d=False, input_name="y")
         if len(y) == 0:
             raise ValueError("y must not be empty.")

diff --git a/sklearn/tests/test_dummy.py b/sklearn/tests/test_dummy.py
@@ -376,7 +376,7 @@ def test_quantile_invalid():
 
 def test_quantile_strategy_empty_train():
     est = DummyRegressor(strategy="quantile", quantile=0.4)
-    with pytest.raises(ValueError):
+    with pytest.raises(IndexError):
         est.fit([], [])