Merge pull request #3534 from pycaret/fix_ts_blending

Time Series `blend_models` enhancements and bug fixes
pycaret · May 7, 2023 · 3f3ac4b · 3f3ac4b
2 parents 5f2bd5d + 7f0c8c3
commit 3f3ac4b
Show file tree

Hide file tree

Showing 7 changed files with 919 additions and 946 deletions.
diff --git a/pycaret/containers/models/time_series.py b/pycaret/containers/models/time_series.py
@@ -2821,29 +2821,35 @@ def __init__(self, experiment) -> None:
         np.random.seed(experiment.seed)
         self.gpu_imported = False
 
-        from pycaret.internal.ensemble import _EnsembleForecasterWithVoting
-
-        args = {}
-        tune_args = {}
-        tune_grid = {}
-        tune_distributions = {}
-
-        # if not self.gpu_imported:
-        #     args["n_jobs"] = experiment.n_jobs_param
+        from sktime.forecasting.compose import EnsembleForecaster
 
+        args = self._set_args
+        tune_args = self._set_tune_args
+        tune_grid = self._set_tune_grid
+        tune_distributions = self._set_tune_distributions
         leftover_parameters_to_categorical_distributions(tune_grid, tune_distributions)
 
         super().__init__(
             id="ensemble_forecaster",
             name="EnsembleForecaster",
-            class_def=_EnsembleForecasterWithVoting,
+            class_def=EnsembleForecaster,
             args=args,
             tune_grid=tune_grid,
             tune_distribution=tune_distributions,
             tune_args=tune_args,
             is_gpu_enabled=self.gpu_imported,
         )
 
+    @property
+    def _set_args(self) -> Dict[str, Any]:
+        args = {"aggfunc": "mean"}
+        return args
+
+    @property
+    def _set_tune_grid(self) -> Dict[str, List[Any]]:
+        tune_grid = {"aggfunc": ["mean", "median", "min", "max", "gmean"]}
+        return tune_grid
+
 
 def get_all_model_containers(
     experiment, raise_errors: bool = True

diff --git a/pycaret/internal/ensemble.py b/pycaret/internal/ensemble.py
diff --git a/pycaret/internal/pycaret_experiment/supervised_experiment.py b/pycaret/internal/pycaret_experiment/supervised_experiment.py
@@ -3279,10 +3279,14 @@ def blend_models(
             fit_kwargs = {}
 
         # checking method parameter
-        available_method = ["auto", "soft", "hard", "mean", "median", "voting"]
+        if self._ml_usecase == MLUsecase.TIME_SERIES:
+            available_method = ["mean", "median", "min", "max", "gmean"]
+        else:
+            available_method = ["auto", "soft", "hard", "mean", "median", "voting"]
         if method not in available_method:
             raise ValueError(
-                "Method parameter only accepts 'auto', 'soft', 'hard', 'mean', 'median' or 'voting' as a parameter. See Docstring for details."
+                f"Method parameter only accepts the following values: {available_method}. "
+                "See Docstring for details."
             )
 
         # checking error for estimator_list (skip for timeseries)
@@ -3447,7 +3451,7 @@ def blend_models(
         elif self._ml_usecase == MLUsecase.TIME_SERIES:
             model = voting_model_definition.class_def(
                 forecasters=estimator_list,
-                method=method,
+                aggfunc=method,
                 weights=weights,
                 n_jobs=self.gpu_n_jobs_param,
             )

diff --git a/pycaret/time_series/forecasting/functional.py b/pycaret/time_series/forecasting/functional.py
@@ -1081,10 +1081,8 @@ def blend_models(
 ):
     """
     This function trains a EnsembleForecaster for select models passed in the
-    ``estimator_list`` param. The output of this function is a score grid with
-    CV scores by fold. Metrics evaluated during CV can be accessed using the
-    ``get_metrics`` function. Custom metrics can be added or removed using
-    ``add_metric`` and ``remove_metric`` function.
+    ``estimator_list`` param. Trains a sktime EnsembleForecaster under the hood.
+    Refer to it's documentation for more details.
 
 
     Example
@@ -1106,8 +1104,10 @@ def blend_models(
         Available Methods:
 
         * 'mean' - Mean of individual predictions
+        * 'gmean' - Geometric Mean of individual predictions
         * 'median' - Median of individual predictions
-        * 'voting' - Vote individual predictions based on the provided weights.
+        * 'min' - Minimum of individual predictions
+        * 'max' - Maximum of individual predictions
 
 
     fold: int or scikit-learn compatible CV generator, default = None
@@ -1131,9 +1131,9 @@ def blend_models(
 
 
     weights: list, default = None
-        Sequence of weights (float or int) to weight the occurrences of predicted class
-        labels (hard voting) or class probabilities before averaging (soft voting). Uses
-        uniform weights when None.
+        Sequence of weights (float or int) to apply to the individual model
+        predictons. Uses uniform weights when None. Note that weights only
+        apply 'mean', 'gmean' and 'median' methods.
 
 
     fit_kwargs: dict, default = {} (empty dict)
@@ -1146,10 +1146,7 @@ def blend_models(
 
     Returns:
         Trained Model
-
-
     """
-
     return _CURRENT_EXPERIMENT.blend_models(
         estimator_list=estimator_list,
         fold=fold,

diff --git a/pycaret/time_series/forecasting/oop.py b/pycaret/time_series/forecasting/oop.py
@@ -3299,10 +3299,8 @@ def blend_models(
     ):
         """
         This function trains a EnsembleForecaster for select models passed in the
-        ``estimator_list`` param. The output of this function is a score grid with
-        CV scores by fold. Metrics evaluated during CV can be accessed using the
-        ``get_metrics`` function. Custom metrics can be added or removed using
-        ``add_metric`` and ``remove_metric`` function.
+        ``estimator_list`` param. Trains a sktime EnsembleForecaster under the hood.
+        Refer to it's documentation for more details.
 
 
         Example
@@ -3324,8 +3322,10 @@ def blend_models(
             Available Methods:
 
             * 'mean' - Mean of individual predictions
+            * 'gmean' - Geometric Mean of individual predictions
             * 'median' - Median of individual predictions
-            * 'voting' - Vote individual predictions based on the provided weights.
+            * 'min' - Minimum of individual predictions
+            * 'max' - Maximum of individual predictions
 
 
         fold: int or scikit-learn compatible CV generator, default = None
@@ -3349,9 +3349,9 @@ def blend_models(
 
 
         weights: list, default = None
-            Sequence of weights (float or int) to weight the occurrences of predicted class
-            labels (hard voting) or class probabilities before averaging (soft voting). Uses
-            uniform weights when None.
+            Sequence of weights (float or int) to apply to the individual model
+            predictons. Uses uniform weights when None. Note that weights only
+            apply 'mean', 'gmean' and 'median' methods.
 
 
         fit_kwargs: dict, default = {} (empty dict)
@@ -3364,9 +3364,14 @@ def blend_models(
 
         Returns:
             Trained Model
-
-
         """
+        msg = (
+            "method 'voting' is not supported from pycaret 3.0.1 onwards. "
+            "Please use method = 'mean' and pass the weights to mimic the "
+            "functionality of 'voting' blender from prior releases."
+        )
+        if method == "voting":
+            raise ValueError(msg)
 
         return super().blend_models(
             estimator_list=estimator_list,