Skip to content

Commit

Permalink
Merge branch 'main' into tsbootstrap_adapter
Browse files Browse the repository at this point in the history
  • Loading branch information
fkiraly committed Mar 22, 2024
2 parents ce0ef73 + b667111 commit 9755bc3
Show file tree
Hide file tree
Showing 7 changed files with 173 additions and 31 deletions.
13 changes: 12 additions & 1 deletion .all-contributorsrc
Original file line number Diff line number Diff line change
Expand Up @@ -2629,14 +2629,25 @@
"maintenance"
]
},
{
"login": "Xinyu-Wu-0000",
"name": "Xinyu Wu",
"avatar_url": "https://avatars.githubusercontent.com/u/57612792?v=4",
"profile": "https://github.com/Xinyu-Wu-0000",
"contributions": [
"bug",
"code",
"test"
]
},
{
"login": "meraldoantonio",
"name": "Meraldo Antonio",
"avatar_url": "https://avatars.githubusercontent.com/u/37468543?v=4",
"profile": "https://github.com/meraldoantonio",
"contributions": [
"doc"
]
]
},
{
"login": "memeo-pro",
Expand Down
6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ all_extras = [
"statsmodels>=0.12.1",
'stumpy>=1.5.1; python_version < "3.11"',
'tbats>=1.1; python_version < "3.12"',
'temporian>=0.7.0; python_version < "3.12" and sys_platform != "win32"',
'temporian<0.8.0,>=0.7.0; python_version < "3.12" and sys_platform != "win32"',
'tensorflow<2.17,>=2; python_version < "3.12"',
'tsbootstrap<0.2,>=0.1.0',
'tsfresh>=0.17; python_version < "3.12"',
Expand Down Expand Up @@ -147,7 +147,7 @@ all_extras_pandas2 = [
"statsmodels>=0.12.1",
'stumpy>=1.5.1; python_version < "3.11"',
'tbats>=1.1; python_version < "3.12"',
'temporian>=0.7.0; python_version < "3.12" and sys_platform != "win32"',
'temporian<0.8.0,>=0.7.0; python_version < "3.12" and sys_platform != "win32"',
'tensorflow<2.17,>=2; python_version < "3.12"',
'tsbootstrap<0.2,>=0.1.0',
'tsfresh>=0.17; python_version < "3.12"',
Expand Down Expand Up @@ -214,7 +214,7 @@ transformations = [
"pykalman-bardo<0.10,>=0.9.7",
"statsmodels<0.15,>=0.12.1",
'stumpy<1.13,>=1.5.1; python_version < "3.12"',
'temporian>=0.7.0; python_version < "3.12" and sys_platform != "win32"',
'temporian<0.8.0,>=0.7.0; python_version < "3.12" and sys_platform != "win32"',
'tsbootstrap<0.2,>=0.1.0',
'tsfresh<0.21,>=0.17; python_version < "3.12"',
]
Expand Down
34 changes: 32 additions & 2 deletions sktime/classification/distance_based/_shape_dtw.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class ShapeDTW(BaseClassifier):
Parameters
----------
n_neighbours : int, int, set k for knn (default =1).
n_neighbors : int, int, set k for knn (default =1).
subsequence_length : int, defines the length of the
subsequences(default=sqrt(n_timepoints)).
Expand Down Expand Up @@ -165,6 +165,8 @@ def _fit(self, X, y):
if self.metric_params is None:
self.metric_params = {}
_reset = True
else:
_reset = False

# If the shape descriptor is 'compound',
# calculate the appropriate weighting_factor
Expand Down Expand Up @@ -238,7 +240,7 @@ def _calculate_weighting_factor_value(self, X, y):

grid = GridSearchCV(
estimator=ShapeDTW(
n_neighbours=n,
n_neighbors=n,
subsequence_length=sl,
shape_descriptor_function=sdf,
shape_descriptor_functions=sdfs,
Expand Down Expand Up @@ -502,3 +504,31 @@ def _combine_data_frames(self, dataFrames, weighting_factor, col_names):
colToAdd.append(pd.Series(inst))
df[col] = colToAdd
return df

@classmethod
def get_test_params(cls, parameter_set="default"):
"""Return testing parameter settings for the estimator.
Parameters
----------
parameter_set : str, default="default"
Name of the set of test parameters to return, for use in tests. If no
special parameters are defined for a value, will return ``"default"`` set.
Returns
-------
params : dict or list of dict, default = {}
Parameters to create testing instances of the class
Each dict are parameters to construct an "interesting" test instance, i.e.,
``MyClass(**params)`` or ``MyClass(**params[i])`` creates a valid test
instance.
``create_test_instance`` uses the first (or only) dictionary in ``params``
"""
params1 = {}
params2 = {
"n_neighbors": 3,
"shape_descriptor_function": "compound",
"shape_descriptor_functions": ["paa", "dwt"],
}
return [params1, params2]
6 changes: 3 additions & 3 deletions sktime/datatypes/_table/_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@

MTYPE_REGISTER_TABLE = [
("pd_DataFrame_Table", "Table", "pd.DataFrame representation of a data table"),
("numpy1D", "Table", "1D np.narray representation of a univariate table"),
("numpy2D", "Table", "2D np.narray representation of a univariate table"),
("pd_Series_Table", "Table", "pd.Series representation of a data table"),
("numpy1D", "Table", "1D np.narray representation of a univariate data table"),
("numpy2D", "Table", "2D np.narray representation of a multivariate data table"),
("pd_Series_Table", "Table", "pd.Series representation of a univariate data table"),
("list_of_dict", "Table", "list of dictionaries with primitive entries"),
("polars_eager_table", "Table", "polars.DataFrame representation of a data table"),
("polars_lazy_table", "Table", "polars.LazyFrame representation of a data table"),
Expand Down
52 changes: 34 additions & 18 deletions sktime/dists_kernels/scipy_dist.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,32 +22,33 @@ class ScipyDist(BasePairwiseTransformer):
Parameters
----------
metric : string or function, as in cdist; default = 'euclidean'
if string, one of: 'braycurtis', 'canberra', 'chebyshev', 'cityblock',
'correlation', 'cosine', 'dice', 'euclidean', 'hamming', 'jaccard',
'jensenshannon',
'kulsinski' (< scipy 1.11) or 'kulczynski1' (from scipy 1.11),
'mahalanobis', 'matching', 'minkowski',
'rogerstanimoto', 'russellrao', 'seuclidean', 'sokalmichener',
'sokalsneath', 'sqeuclidean', 'yule'
metric : string or function, as in cdist; default = ``euclidean``
if string, one of: ``braycurtis``, ``canberra``, ``chebyshev``, ``cityblock``,
``correlation``, ``cosine``, ``dice``, ``euclidean``, ``hamming``,
``jaccard``, ``jensenshannon``,
``kulsinski`` (< scipy 1.11) or ``kulczynski1`` (from scipy 1.11),
``mahalanobis``, ``matching``, ``minkowski``,
``rogerstanimoto``, ``russellrao``, ``seuclidean``, ``sokalmichener``,
``sokalsneath``, ``sqeuclidean``, ``yule``
if function, should have signature 1D-np.array x 1D-np.array -> float
p: if metric='minkowski', the "p" in "p-norm", otherwise irrelevant
colalign : string, one of 'intersect' (default), 'force-align', 'none'
p: if metric=``minkowski``, the ``p`` in ``p-norm``, otherwise irrelevant
colalign : string, one of ``intersect`` (default), ``force-align``, ``none``
controls column alignment if X, X2 passed in fit are pd.DataFrame
columns between X and X2 are aligned via column names
if 'intersect', distance is computed on columns occurring both in X and X2,
columns between X and X2 are aligned via column names.
if ``intersect``, distance is computed on columns occurring both in X and X2,
other columns are discarded; column ordering in X2 is copied from X
if 'force-align', raises an error if the set of columns in X, X2 differs;
if ``force-align``, raises an error if the set of columns in X, X2 differs;
column ordering in X2 is copied from X
if 'none', X and X2 are passed through unmodified (no columns are aligned)
if ``none``, X and X2 are passed through unmodified (no columns are aligned)
note: this will potentially align "non-matching" columns
var_weights : 1D np.array of float or None, default=None
weight/scaling vector applied to variables in X/X2
before being passed to cdist, i-th col of X/X2 is multiplied by var_weights[i]
if None, equivalent to all-ones vector
metric_kwargs : dict, optional, default=None
any kwargs passed to the metric in addition, i.e., to the function cdist
common kwargs: "w" : array-like, same length as X.columns, weights for metric
common kwargs: ``w`` : array-like, same length as X.columns, weights for metric
refer to scipy.spatial.distance.dist for a documentation of other extra kwargs
"""

Expand Down Expand Up @@ -100,7 +101,21 @@ def _transform(self, X, X2=None):
metric_kwargs = self.metric_kwargs
if metric_kwargs is None:
metric_kwargs = {}

if isinstance(X, pd.DataFrame) and isinstance(X2, pd.DataFrame):
if self.colalign == "intersect":
common_cols = X.columns.intersection(X2.columns)
X = X[common_cols]
X2 = X2[common_cols]
# reordering X2 columns to match X
X2 = X2[X.columns]
elif self.colalign == "force-align":
if not X.columns.equals(X2.columns):
raise ValueError("X and X2 have different columns")
X2 = X2[X.columns]
elif self.colalign == "none":
pass
else:
raise ValueError("colalign must be one of intersect, force-align, none")
if isinstance(X, pd.DataFrame):
X = X.select_dtypes("number").to_numpy(dtype="float")

Expand Down Expand Up @@ -147,6 +162,7 @@ def get_test_params(cls, parameter_set="default"):
params1 = {}

# using kwargs
params2 = {"metric": "minkowski", "p": 3}
params2 = {"metric": "minkowski", "p": 3, "colalign": "intersect"}
params3 = {"metric": "euclidean", "colalign": "force-align"}

return [params1, params2]
return [params1, params2, params3]
16 changes: 12 additions & 4 deletions sktime/forecasting/model_selection/_tune.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,10 @@ def evaluate_candidates(candidate_params):
# Select n best forecaster
self.n_best_forecasters_ = []
self.n_best_scores_ = []
for i in range(self.return_n_best_forecasters):
_forecasters_to_return = min(self.return_n_best_forecasters, len(results.index))
if _forecasters_to_return == -1:
_forecasters_to_return = len(results.index)
for i in range(_forecasters_to_return):
params = results["params"].iloc[i]
rank = results[f"rank_{scoring_name}"].iloc[i]
rank = str(int(rank))
Expand Down Expand Up @@ -445,7 +448,9 @@ class ForecastingGridSearchCV(BaseGridSearch):
verbose: int, optional (default=0)
return_n_best_forecasters : int, default=1
In case the n best forecaster should be returned, this value can be set
and the n best forecasters will be assigned to n_best_forecasters_
and the n best forecasters will be assigned to n_best_forecasters_.
Set return_n_best_forecasters to -1 to return all forecasters.
error_score : numeric value or the str 'raise', optional (default=np.nan)
The test score returned when a forecaster fails to be fitted.
return_train_score : bool, optional (default=False)
Expand Down Expand Up @@ -773,7 +778,9 @@ class ForecastingRandomizedSearchCV(BaseGridSearch):
verbose : int, optional (default=0)
return_n_best_forecasters: int, default=1
In case the n best forecaster should be returned, this value can be set
and the n best forecasters will be assigned to n_best_forecasters_
and the n best forecasters will be assigned to n_best_forecasters_.
Set return_n_best_forecasters to -1 to return all forecasters.
random_state : int, RandomState instance or None, default=None
Pseudo random number generator state used for random uniform sampling
from lists of possible values instead of scipy.stats distributions.
Expand Down Expand Up @@ -1037,7 +1044,8 @@ class ForecastingSkoptSearchCV(BaseGridSearch):
FitFailedWarning is raised.
return_n_best_forecasters: int, default=1
In case the n best forecaster should be returned, this value can be set
and the n best forecasters will be assigned to n_best_forecasters_
and the n best forecasters will be assigned to n_best_forecasters_.
Set return_n_best_forecasters to -1 to return all forecasters.
backend : {"dask", "loky", "multiprocessing", "threading"}, by default "loky".
Runs parallel evaluate if specified and ``strategy`` is set as "refit".
Expand Down
77 changes: 77 additions & 0 deletions sktime/forecasting/model_selection/tests/test_tune.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@

__author__ = ["mloning", "fkiraly"]


from functools import reduce
from typing import Dict, List, Union

import numpy as np
import pytest
from sklearn.model_selection import ParameterGrid, ParameterSampler
Expand Down Expand Up @@ -348,3 +352,76 @@ def test_gscv_backends(backend_set):
backend_params=backend_params,
)
gscv.fit(y, X)


TEST_PARAMS_DICT = PIPE_GRID

TEST_PARAMS_LIST = [
{
"window_length": [1, 2, 3],
"strategy": ["last", "mean"],
"transformer__degree": [1, 2, 3],
"forecaster__strategy": ["last", "mean", "seasonal_last"],
},
{
"window_length": [4, 5, 6],
"forecaster__strategy": ["last", "mean"],
},
]


@pytest.mark.parametrize("return_n_best_forecasters", [-1, 0, 3])
@pytest.mark.parametrize(
"Forecaster, kwargs",
[
(ForecastingGridSearchCV, {"param_grid": TEST_PARAMS_DICT}),
(ForecastingGridSearchCV, {"param_grid": TEST_PARAMS_LIST}),
(ForecastingRandomizedSearchCV, {"param_distributions": TEST_PARAMS_LIST}),
(
ForecastingRandomizedSearchCV,
{"param_distributions": TEST_PARAMS_LIST, "n_iter": 100},
),
],
)
def test_return_n_best_forecasters(Forecaster, return_n_best_forecasters, kwargs):
y, X = load_longley()
searchCV = Forecaster(
forecaster=PIPE,
cv=CVs[0],
**kwargs,
return_n_best_forecasters=return_n_best_forecasters,
)
searchCV.fit(y, X)
if return_n_best_forecasters == -1:

def calculate_total_combinations(param_grid: Union[List[Dict], Dict]):
if isinstance(param_grid, dict):
return reduce(lambda x, y: x * y, [len(x) for x in param_grid.values()])
elif isinstance(param_grid, list):
return sum(calculate_total_combinations(i) for i in param_grid)
else:
error_message = "`param_grid` must be a dict or a list[dict]"
raise ValueError(error_message)

if "param_grid" in kwargs:
total_combinations = calculate_total_combinations(kwargs["param_grid"])
assert len(searchCV.n_best_forecasters_) == total_combinations
else:
try:
assert len(searchCV.n_best_forecasters_) == searchCV.n_iter
except AssertionError:
total_combinations = calculate_total_combinations(
kwargs["param_distributions"]
)
assert len(searchCV.n_best_forecasters_) == total_combinations
else:
try:
assert len(searchCV.n_best_forecasters_) == return_n_best_forecasters
except AssertionError:
key = (
"param_distributions"
if "param_distributions" in kwargs
else "param_grid"
)
total_combinations = calculate_total_combinations(kwargs[key])
assert len(searchCV.n_best_forecasters_) == total_combinations

0 comments on commit 9755bc3

Please sign in to comment.