From 541b5c3f4aacf0a731b741fc26754cc2ad1f1a31 Mon Sep 17 00:00:00 2001
From: HassnHamada <44922829+HassnHamada@users.noreply.github.com>
Date: Thu, 21 Mar 2024 00:48:17 +0200
Subject: [PATCH 1/5] [ENH] forecasting tuners, `return_n_best_forecasters=-1`
 to return performances of all forecasters (#6031)

#### Reference Issues/PRs

Fixes #6027

#### What does this implement/fix? Explain your changes.

If user input `return_n_best_forecasters=-1` the function should add all
forecasters to `n_best_forecasters_` list
---
 sktime/forecasting/model_selection/_tune.py   | 16 +++-
 .../model_selection/tests/test_tune.py        | 77 +++++++++++++++++++
 2 files changed, 89 insertions(+), 4 deletions(-)

diff --git a/sktime/forecasting/model_selection/_tune.py b/sktime/forecasting/model_selection/_tune.py
index e579cf8248b..a7afe38c0e7 100644
--- a/sktime/forecasting/model_selection/_tune.py
+++ b/sktime/forecasting/model_selection/_tune.py
@@ -267,7 +267,10 @@ def evaluate_candidates(candidate_params):
         # Select n best forecaster
         self.n_best_forecasters_ = []
         self.n_best_scores_ = []
-        for i in range(self.return_n_best_forecasters):
+        _forecasters_to_return = min(self.return_n_best_forecasters, len(results.index))
+        if _forecasters_to_return == -1:
+            _forecasters_to_return = len(results.index)
+        for i in range(_forecasters_to_return):
             params = results["params"].iloc[i]
             rank = results[f"rank_{scoring_name}"].iloc[i]
             rank = str(int(rank))
@@ -445,7 +448,9 @@ class ForecastingGridSearchCV(BaseGridSearch):
     verbose: int, optional (default=0)
     return_n_best_forecasters : int, default=1
         In case the n best forecaster should be returned, this value can be set
-        and the n best forecasters will be assigned to n_best_forecasters_
+        and the n best forecasters will be assigned to n_best_forecasters_.
+        Set return_n_best_forecasters to -1 to return all forecasters.
+
     error_score : numeric value or the str 'raise', optional (default=np.nan)
         The test score returned when a forecaster fails to be fitted.
     return_train_score : bool, optional (default=False)
@@ -773,7 +778,9 @@ class ForecastingRandomizedSearchCV(BaseGridSearch):
     verbose : int, optional (default=0)
     return_n_best_forecasters: int, default=1
         In case the n best forecaster should be returned, this value can be set
-        and the n best forecasters will be assigned to n_best_forecasters_
+        and the n best forecasters will be assigned to n_best_forecasters_.
+        Set return_n_best_forecasters to -1 to return all forecasters.
+
     random_state : int, RandomState instance or None, default=None
         Pseudo random number generator state used for random uniform sampling
         from lists of possible values instead of scipy.stats distributions.
@@ -1037,7 +1044,8 @@ class ForecastingSkoptSearchCV(BaseGridSearch):
         FitFailedWarning is raised.
     return_n_best_forecasters: int, default=1
         In case the n best forecaster should be returned, this value can be set
-        and the n best forecasters will be assigned to n_best_forecasters_
+        and the n best forecasters will be assigned to n_best_forecasters_.
+        Set return_n_best_forecasters to -1 to return all forecasters.
 
     backend : {"dask", "loky", "multiprocessing", "threading"}, by default "loky".
         Runs parallel evaluate if specified and ``strategy`` is set as "refit".
diff --git a/sktime/forecasting/model_selection/tests/test_tune.py b/sktime/forecasting/model_selection/tests/test_tune.py
index 7c1d604e21d..9dfb21a832b 100644
--- a/sktime/forecasting/model_selection/tests/test_tune.py
+++ b/sktime/forecasting/model_selection/tests/test_tune.py
@@ -4,6 +4,10 @@
 
 __author__ = ["mloning", "fkiraly"]
 
+
+from functools import reduce
+from typing import Dict, List, Union
+
 import numpy as np
 import pytest
 from sklearn.model_selection import ParameterGrid, ParameterSampler
@@ -348,3 +352,76 @@ def test_gscv_backends(backend_set):
         backend_params=backend_params,
     )
     gscv.fit(y, X)
+
+
+TEST_PARAMS_DICT = PIPE_GRID
+
+TEST_PARAMS_LIST = [
+    {
+        "window_length": [1, 2, 3],
+        "strategy": ["last", "mean"],
+        "transformer__degree": [1, 2, 3],
+        "forecaster__strategy": ["last", "mean", "seasonal_last"],
+    },
+    {
+        "window_length": [4, 5, 6],
+        "forecaster__strategy": ["last", "mean"],
+    },
+]
+
+
+@pytest.mark.parametrize("return_n_best_forecasters", [-1, 0, 3])
+@pytest.mark.parametrize(
+    "Forecaster, kwargs",
+    [
+        (ForecastingGridSearchCV, {"param_grid": TEST_PARAMS_DICT}),
+        (ForecastingGridSearchCV, {"param_grid": TEST_PARAMS_LIST}),
+        (ForecastingRandomizedSearchCV, {"param_distributions": TEST_PARAMS_LIST}),
+        (
+            ForecastingRandomizedSearchCV,
+            {"param_distributions": TEST_PARAMS_LIST, "n_iter": 100},
+        ),
+    ],
+)
+def test_return_n_best_forecasters(Forecaster, return_n_best_forecasters, kwargs):
+    y, X = load_longley()
+    searchCV = Forecaster(
+        forecaster=PIPE,
+        cv=CVs[0],
+        **kwargs,
+        return_n_best_forecasters=return_n_best_forecasters,
+    )
+    searchCV.fit(y, X)
+    if return_n_best_forecasters == -1:
+
+        def calculate_total_combinations(param_grid: Union[List[Dict], Dict]):
+            if isinstance(param_grid, dict):
+                return reduce(lambda x, y: x * y, [len(x) for x in param_grid.values()])
+            elif isinstance(param_grid, list):
+                return sum(calculate_total_combinations(i) for i in param_grid)
+            else:
+                error_message = "`param_grid` must be a dict or a list[dict]"
+                raise ValueError(error_message)
+
+        if "param_grid" in kwargs:
+            total_combinations = calculate_total_combinations(kwargs["param_grid"])
+            assert len(searchCV.n_best_forecasters_) == total_combinations
+        else:
+            try:
+                assert len(searchCV.n_best_forecasters_) == searchCV.n_iter
+            except AssertionError:
+                total_combinations = calculate_total_combinations(
+                    kwargs["param_distributions"]
+                )
+                assert len(searchCV.n_best_forecasters_) == total_combinations
+    else:
+        try:
+            assert len(searchCV.n_best_forecasters_) == return_n_best_forecasters
+        except AssertionError:
+            key = (
+                "param_distributions"
+                if "param_distributions" in kwargs
+                else "param_grid"
+            )
+            total_combinations = calculate_total_combinations(kwargs[key])
+            assert len(searchCV.n_best_forecasters_) == total_combinations

From 2d698370ec3e62e406f43b5c9acebd8a306470e5 Mon Sep 17 00:00:00 2001
From: Felix Hirwa Nshuti <hirwanshutiflx@gmail.com>
Date: Thu, 21 Mar 2024 04:46:11 +0530
Subject: [PATCH 2/5] [BUG] Fix the `colalign` functionality to `ScipyDist`
 class as specified in the docstrings (#6110)

This Pull request adds the `colalign` functionality to the `ScipyDist`
class as specified in the docstring and corrected the formatting of the
docstring of the class.

#### Reference Issues/PRs

FIxes https://github.com/sktime/sktime/issues/1942
---
 sktime/dists_kernels/scipy_dist.py | 52 +++++++++++++++++++-----------
 1 file changed, 34 insertions(+), 18 deletions(-)

diff --git a/sktime/dists_kernels/scipy_dist.py b/sktime/dists_kernels/scipy_dist.py
index 4c3646f0a5e..7704d529450 100644
--- a/sktime/dists_kernels/scipy_dist.py
+++ b/sktime/dists_kernels/scipy_dist.py
@@ -22,24 +22,25 @@ class ScipyDist(BasePairwiseTransformer):
 
     Parameters
     ----------
-    metric : string or function, as in cdist; default = 'euclidean'
-        if string, one of: 'braycurtis', 'canberra', 'chebyshev', 'cityblock',
-            'correlation', 'cosine', 'dice', 'euclidean', 'hamming', 'jaccard',
-            'jensenshannon',
-            'kulsinski' (< scipy 1.11) or 'kulczynski1' (from scipy 1.11),
-            'mahalanobis', 'matching', 'minkowski',
-            'rogerstanimoto', 'russellrao', 'seuclidean', 'sokalmichener',
-            'sokalsneath', 'sqeuclidean', 'yule'
+    metric : string or function, as in cdist; default = ``euclidean``
+        if string, one of: ``braycurtis``, ``canberra``, ``chebyshev``, ``cityblock``,
+            ``correlation``, ``cosine``, ``dice``, ``euclidean``, ``hamming``,
+            ``jaccard``, ``jensenshannon``,
+            ``kulsinski`` (< scipy 1.11) or ``kulczynski1`` (from scipy 1.11),
+            ``mahalanobis``, ``matching``, ``minkowski``,
+            ``rogerstanimoto``, ``russellrao``, ``seuclidean``, ``sokalmichener``,
+            ``sokalsneath``, ``sqeuclidean``, ``yule``
         if function, should have signature 1D-np.array x 1D-np.array -> float
-    p:  if metric='minkowski', the "p" in "p-norm", otherwise irrelevant
-    colalign : string, one of 'intersect' (default), 'force-align', 'none'
+    p:  if metric=``minkowski``, the ``p`` in ``p-norm``, otherwise irrelevant
+    colalign : string, one of ``intersect`` (default), ``force-align``, ``none``
         controls column alignment if X, X2 passed in fit are pd.DataFrame
-        columns between X and X2 are aligned via column names
-        if 'intersect', distance is computed on columns occurring both in X and X2,
+        columns between X and X2 are aligned via column names.
+
+        if ``intersect``, distance is computed on columns occurring both in X and X2,
             other columns are discarded; column ordering in X2 is copied from X
-        if 'force-align', raises an error if the set of columns in X, X2 differs;
+        if ``force-align``, raises an error if the set of columns in X, X2 differs;
             column ordering in X2 is copied from X
-        if 'none', X and X2 are passed through unmodified (no columns are aligned)
+        if ``none``, X and X2 are passed through unmodified (no columns are aligned)
             note: this will potentially align "non-matching" columns
     var_weights : 1D np.array of float or None, default=None
         weight/scaling vector applied to variables in X/X2
@@ -47,7 +48,7 @@ class ScipyDist(BasePairwiseTransformer):
         if None, equivalent to all-ones vector
     metric_kwargs : dict, optional, default=None
         any kwargs passed to the metric in addition, i.e., to the function cdist
-        common kwargs: "w" : array-like, same length as X.columns, weights for metric
+        common kwargs: ``w`` : array-like, same length as X.columns, weights for metric
         refer to scipy.spatial.distance.dist for a documentation of other extra kwargs
     """
 
@@ -100,7 +101,21 @@ def _transform(self, X, X2=None):
         metric_kwargs = self.metric_kwargs
         if metric_kwargs is None:
             metric_kwargs = {}
-
+        if isinstance(X, pd.DataFrame) and isinstance(X2, pd.DataFrame):
+            if self.colalign == "intersect":
+                common_cols = X.columns.intersection(X2.columns)
+                X = X[common_cols]
+                X2 = X2[common_cols]
+                # reordering X2 columns to match X
+                X2 = X2[X.columns]
+            elif self.colalign == "force-align":
+                if not X.columns.equals(X2.columns):
+                    raise ValueError("X and X2 have different columns")
+                X2 = X2[X.columns]
+            elif self.colalign == "none":
+                pass
+            else:
+                raise ValueError("colalign must be one of intersect, force-align, none")
         if isinstance(X, pd.DataFrame):
             X = X.select_dtypes("number").to_numpy(dtype="float")
 
@@ -147,6 +162,7 @@ def get_test_params(cls, parameter_set="default"):
         params1 = {}
 
         # using kwargs
-        params2 = {"metric": "minkowski", "p": 3}
+        params2 = {"metric": "minkowski", "p": 3, "colalign": "intersect"}
+        params3 = {"metric": "euclidean", "colalign": "force-align"}
 
-        return [params1, params2]
+        return [params1, params2, params3]

From 533fe8c85b2852cecf45116983dc71039e7fa2b0 Mon Sep 17 00:00:00 2001
From: Xinyu Wu <57612792+Xinyu-Wu-0000@users.noreply.github.com>
Date: Thu, 21 Mar 2024 19:34:34 +0800
Subject: [PATCH 3/5] [ENH][BUG] Second test parameter set for shapeDTW (#6093)

Towards https://github.com/sktime/sktime/issues/3429

Adds a second test parameter set for shapeDTW
---
 .all-contributorsrc                           | 13 ++++++-
 .../distance_based/_shape_dtw.py              | 34 +++++++++++++++++--
 2 files changed, 44 insertions(+), 3 deletions(-)

diff --git a/.all-contributorsrc b/.all-contributorsrc
index 1625508003b..3e39feadae5 100644
--- a/.all-contributorsrc
+++ b/.all-contributorsrc
@@ -2629,6 +2629,17 @@
         "maintenance"
       ]
     },
+    {
+      "login": "Xinyu-Wu-0000",
+      "name": "Xinyu Wu",
+      "avatar_url": "https://avatars.githubusercontent.com/u/57612792?v=4",
+      "profile": "https://github.com/Xinyu-Wu-0000",
+      "contributions": [
+        "bug",
+        "code",
+        "test"
+      ]
+    },
     {
       "login": "meraldoantonio",
       "name": "Meraldo Antonio",
@@ -2636,7 +2647,7 @@
       "profile": "https://github.com/meraldoantonio",
       "contributions": [
         "doc"
-       ]
+      ]
     },
     {
       "login": "memeo-pro",
diff --git a/sktime/classification/distance_based/_shape_dtw.py b/sktime/classification/distance_based/_shape_dtw.py
index 180b5ad9cac..3d9960f3b42 100644
--- a/sktime/classification/distance_based/_shape_dtw.py
+++ b/sktime/classification/distance_based/_shape_dtw.py
@@ -38,7 +38,7 @@ class ShapeDTW(BaseClassifier):
 
     Parameters
     ----------
-    n_neighbours                : int, int, set k for knn (default =1).
+    n_neighbors                : int, int, set k for knn (default =1).
     subsequence_length          : int, defines the length of the
                                   subsequences(default=sqrt(n_timepoints)).
 
@@ -165,6 +165,8 @@ def _fit(self, X, y):
         if self.metric_params is None:
             self.metric_params = {}
             _reset = True
+        else:
+            _reset = False
 
         # If the shape descriptor is 'compound',
         # calculate the appropriate weighting_factor
@@ -238,7 +240,7 @@ def _calculate_weighting_factor_value(self, X, y):
 
             grid = GridSearchCV(
                 estimator=ShapeDTW(
-                    n_neighbours=n,
+                    n_neighbors=n,
                     subsequence_length=sl,
                     shape_descriptor_function=sdf,
                     shape_descriptor_functions=sdfs,
@@ -502,3 +504,31 @@ def _combine_data_frames(self, dataFrames, weighting_factor, col_names):
                 colToAdd.append(pd.Series(inst))
             df[col] = colToAdd
         return df
+
+    @classmethod
+    def get_test_params(cls, parameter_set="default"):
+        """Return testing parameter settings for the estimator.
+
+        Parameters
+        ----------
+        parameter_set : str, default="default"
+            Name of the set of test parameters to return, for use in tests. If no
+            special parameters are defined for a value, will return ``"default"`` set.
+
+
+        Returns
+        -------
+        params : dict or list of dict, default = {}
+            Parameters to create testing instances of the class
+            Each dict are parameters to construct an "interesting" test instance, i.e.,
+            ``MyClass(**params)`` or ``MyClass(**params[i])`` creates a valid test
+            instance.
+            ``create_test_instance`` uses the first (or only) dictionary in ``params``
+        """
+        params1 = {}
+        params2 = {
+            "n_neighbors": 3,
+            "shape_descriptor_function": "compound",
+            "shape_descriptor_functions": ["paa", "dwt"],
+        }
+        return [params1, params2]

From 5a6b01ce092eb0d62c9e7070ae63a977c0a80df8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= <f.kiraly@ucl.ac.uk>
Date: Thu, 21 Mar 2024 18:43:44 +0100
Subject: [PATCH 4/5]  [DOC] minor clarifications in mtype descriptions (#6078)

While refactoring `datatypes`, noticed some minor inconsistencies in the
registry short descriptions of mtypes. Fixed in this PR.
---
 sktime/datatypes/_table/_registry.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sktime/datatypes/_table/_registry.py b/sktime/datatypes/_table/_registry.py
index 8e0b76b8969..f7f7974cf8f 100644
--- a/sktime/datatypes/_table/_registry.py
+++ b/sktime/datatypes/_table/_registry.py
@@ -11,9 +11,9 @@
 
 MTYPE_REGISTER_TABLE = [
     ("pd_DataFrame_Table", "Table", "pd.DataFrame representation of a data table"),
-    ("numpy1D", "Table", "1D np.narray representation of a univariate table"),
-    ("numpy2D", "Table", "2D np.narray representation of a univariate table"),
-    ("pd_Series_Table", "Table", "pd.Series representation of a data table"),
+    ("numpy1D", "Table", "1D np.narray representation of a univariate data table"),
+    ("numpy2D", "Table", "2D np.narray representation of a multivariate data table"),
+    ("pd_Series_Table", "Table", "pd.Series representation of a univariate data table"),
     ("list_of_dict", "Table", "list of dictionaries with primitive entries"),
     ("polars_eager_table", "Table", "polars.DataFrame representation of a data table"),
     ("polars_lazy_table", "Table", "polars.LazyFrame representation of a data table"),

From b6671117336a6d8af521226318cf78329be3cb01 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= <f.kiraly@ucl.ac.uk>
Date: Thu, 21 Mar 2024 19:29:33 +0100
Subject: [PATCH 5/5] [MNT] bound `temporian<0.8.0` (#6184)

`temporian` is currently causing install failures on `main`, likely due
to version 0.8.0 released today.

This PR adds a bound `temporian<0.8.0` to `pyproject.toml`.

FYI @ianspektor, @achoum
---
 pyproject.toml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 7fdcce89014..e9d480d7417 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -108,7 +108,7 @@ all_extras = [
   "statsmodels>=0.12.1",
   'stumpy>=1.5.1; python_version < "3.11"',
   'tbats>=1.1; python_version < "3.12"',
-  'temporian>=0.7.0; python_version < "3.12" and sys_platform != "win32"',
+  'temporian<0.8.0,>=0.7.0; python_version < "3.12" and sys_platform != "win32"',
   'tensorflow<2.17,>=2; python_version < "3.12"',
   'tsfresh>=0.17; python_version < "3.12"',
   'tslearn<0.7.0,!=0.6.0,>=0.5.2; python_version < "3.11"',
@@ -146,7 +146,7 @@ all_extras_pandas2 = [
   "statsmodels>=0.12.1",
   'stumpy>=1.5.1; python_version < "3.11"',
   'tbats>=1.1; python_version < "3.12"',
-  'temporian>=0.7.0; python_version < "3.12" and sys_platform != "win32"',
+  'temporian<0.8.0,>=0.7.0; python_version < "3.12" and sys_platform != "win32"',
   'tensorflow<2.17,>=2; python_version < "3.12"',
   'tsfresh>=0.17; python_version < "3.12"',
   'tslearn<0.7.0,!=0.6.0,>=0.5.2; python_version < "3.11"',
@@ -212,7 +212,7 @@ transformations = [
   "pykalman-bardo<0.10,>=0.9.7",
   "statsmodels<0.15,>=0.12.1",
   'stumpy<1.13,>=1.5.1; python_version < "3.12"',
-  'temporian>=0.7.0; python_version < "3.12" and sys_platform != "win32"',
+  'temporian<0.8.0,>=0.7.0; python_version < "3.12" and sys_platform != "win32"',
   'tsfresh<0.21,>=0.17; python_version < "3.12"',
 ]