Skip to content

Commit

Permalink
Merge branch 'main' into refactor-scenarios-lazy
Browse files Browse the repository at this point in the history
  • Loading branch information
fkiraly committed Apr 10, 2024
2 parents d827667 + bab0b84 commit 2aceec5
Show file tree
Hide file tree
Showing 19 changed files with 130 additions and 27 deletions.
16 changes: 14 additions & 2 deletions .all-contributorsrc
Original file line number Diff line number Diff line change
Expand Up @@ -2116,7 +2116,8 @@
"profile": "https://www.linkedin.com/in/pranav-prajapati-a5b413226/",
"contributions": [
"code",
"test"
"test",
"bug"
]
},
{
Expand Down Expand Up @@ -2342,7 +2343,9 @@
"profile": "https://github.com/Ram0nB",
"contributions": [
"doc",
"code"
"code",
"bug",
"test"
]
},
{
Expand Down Expand Up @@ -2735,6 +2738,15 @@
"doc"
]
},
{
"login": "morestart",
"name": "ctl",
"avatar_url": "https://avatars.githubusercontent.com/u/35556811",
"profile": "https://github.com/morestart",
"contributions": [
"bug"
]
},
{
"login": "anteemony",
"name": "Anthony Okonneh",
Expand Down
5 changes: 4 additions & 1 deletion extension_templates/alignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,10 @@ class MyAligner(BaseAligner):
"authors": ["author1", "author2"], # authors, GitHub handles
"maintainers": ["maintainer1", "maintainer2"], # maintainers, GitHub handles
# author = significant contribution to code at some point
# maintainer = algorithm maintainer role, "owner"
# if interfacing a 3rd party estimator, ensure to give credit to the
# authors of the interfaced estimator
# maintainer = algorithm maintainer role, "owner" of the sktime class
# for 3rd party interfaces, the scope is the sktime class only
# specify one or multiple authors and maintainers, only for sktime contribution
# remove maintainer tag if maintained by sktime core team
#
Expand Down
7 changes: 5 additions & 2 deletions extension_templates/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,11 @@ class MyTimeSeriesClassifier(BaseClassifier):
"authors": ["author1", "author2"], # authors, GitHub handles
"maintainers": ["maintainer1", "maintainer2"], # maintainers, GitHub handles
# author = significant contribution to code at some point
# maintainer = algorithm maintainer role, "owner"
# specify one or multiple authors and maintainers, only for sktime contribution
# if interfacing a 3rd party estimator, ensure to give credit to the
# authors of the interfaced estimator
# maintainer = algorithm maintainer role, "owner" of the sktime class
# for 3rd party interfaces, the scope is the sktime class only
# specify one or multiple authors and maintainers
# remove maintainer tag if maintained by sktime core team
#
"python_version": None, # PEP 440 python version specifier to limit versions
Expand Down
5 changes: 4 additions & 1 deletion extension_templates/clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,10 @@ class MyClusterer(BaseClusterer):
"authors": ["author1", "author2"], # authors, GitHub handles
"maintainers": ["maintainer1", "maintainer2"], # maintainers, GitHub handles
# author = significant contribution to code at some point
# maintainer = algorithm maintainer role, "owner"
# if interfacing a 3rd party estimator, ensure to give credit to the
# authors of the interfaced estimator
# maintainer = algorithm maintainer role, "owner" of the sktime class
# for 3rd party interfaces, the scope is the sktime class only
# specify one or multiple authors and maintainers, only for sktime contribution
# remove maintainer tag if maintained by sktime core team
#
Expand Down
5 changes: 4 additions & 1 deletion extension_templates/dist_kern_panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,10 @@ class MyTrafoPwPanel(BasePairwiseTransformerPanel):
"authors": ["author1", "author2"], # authors, GitHub handles
"maintainers": ["maintainer1", "maintainer2"], # maintainers, GitHub handles
# author = significant contribution to code at some point
# maintainer = algorithm maintainer role, "owner"
# if interfacing a 3rd party estimator, ensure to give credit to the
# authors of the interfaced estimator
# maintainer = algorithm maintainer role, "owner" of the sktime class
# for 3rd party interfaces, the scope is the sktime class only
# remove maintainer tag if maintained by sktime core team
}
# in case of inheritance, concrete class should typically set tags
Expand Down
5 changes: 4 additions & 1 deletion extension_templates/dist_kern_tab.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,10 @@ class MyTrafoPw(BasePairwiseTransformer):
"authors": ["author1", "author2"], # authors, GitHub handles
"maintainers": ["maintainer1", "maintainer2"], # maintainers, GitHub handles
# author = significant contribution to code at some point
# maintainer = algorithm maintainer role, "owner"
# if interfacing a 3rd party estimator, ensure to give credit to the
# authors of the interfaced estimator
# maintainer = algorithm maintainer role, "owner" of the sktime class
# for 3rd party interfaces, the scope is the sktime class only
# remove maintainer tag if maintained by sktime core team
}
# in case of inheritance, concrete class should typically set tags
Expand Down
3 changes: 3 additions & 0 deletions extension_templates/forecasting.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,10 +165,13 @@ class MyForecaster(BaseForecaster):
# valid values: str or list of str, should be GitHub handles
# this should follow best scientific contribution practices
# scope is the code, not the methodology (method is per paper citation)
# if interfacing a 3rd party estimator, ensure to give credit to the
# authors of the interfaced estimator
#
# maintainer = current maintainer(s) of the estimator
# per algorithm maintainer role, see governance document
# this is an "owner" type role, with rights and maintenance duties
# for 3rd party interfaces, the scope is the sktime class only
"maintainers": ["maintainer1", "maintainer2"],
# valid values: str or list of str, should be GitHub handles
# remove tag if maintained by sktime core team
Expand Down
3 changes: 3 additions & 0 deletions extension_templates/forecasting_simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,10 +117,13 @@ class MyForecaster(BaseForecaster):
# valid values: str or list of str, should be GitHub handles
# this should follow best scientific contribution practices
# scope is the code, not the methodology (method is per paper citation)
# if interfacing a 3rd party estimator, ensure to give credit to the
# authors of the interfaced estimator
#
# maintainer = current maintainer(s) of the estimator
# per algorithm maintainer role, see governance document
# this is an "owner" type role, with rights and maintenance duties
# for 3rd party interfaces, the scope is the sktime class only
"maintainers": ["maintainer1", "maintainer2"],
# valid values: str or list of str, should be GitHub handles
# remove tag if maintained by sktime core team
Expand Down
5 changes: 4 additions & 1 deletion extension_templates/forecasting_supersimple.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,10 @@ class MyForecaster(BaseForecaster):
"authors": ["author1", "author2"], # authors, GitHub handles
"maintainers": ["maintainer1", "maintainer2"], # maintainers, GitHub handles
# author = significant contribution to code at some point
# maintainer = algorithm maintainer role, "owner"
# if interfacing a 3rd party estimator, ensure to give credit to the
# authors of the interfaced estimator
# maintainer = algorithm maintainer role, "owner" of the sktime class
# for 3rd party interfaces, the scope is the sktime class only
# remove maintainer tag if maintained by sktime core team
#
# do not change these:
Expand Down
3 changes: 3 additions & 0 deletions extension_templates/split.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,10 +101,13 @@ class MySplitter(BaseSplitter):
# valid values: str or list of str, should be GitHub handles
# this should follow best scientific contribution practices
# scope is the code, not the methodology (method is per paper citation)
# if interfacing a 3rd party estimator, ensure to give credit to the
# authors of the interfaced estimator
#
# maintainer = current maintainer(s) of the estimator
# per algorithm maintainer role, see governance document
# this is an "owner" type role, with rights and maintenance duties
# for 3rd party interfaces, the scope is the sktime class only
"maintainers": ["maintainer1", "maintainer2"],
# valid values: str or list of str, should be GitHub handles
# remove tag if maintained by sktime core team
Expand Down
3 changes: 3 additions & 0 deletions extension_templates/transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,10 +257,13 @@ class MyTransformer(BaseTransformer):
# valid values: str or list of str, should be GitHub handles
# this should follow best scientific contribution practices
# scope is the code, not the methodology (method is per paper citation)
# if interfacing a 3rd party estimator, ensure to give credit to the
# authors of the interfaced estimator
#
# maintainer = current maintainer(s) of the estimator
# per algorithm maintainer role, see governance document
# this is an "owner" type role, with rights and maintenance duties
# for 3rd party interfaces, the scope is the sktime class only
"maintainers": ["maintainer1", "maintainer2"],
# valid values: str or list of str, should be GitHub handles
# remove tag if maintained by sktime core team
Expand Down
3 changes: 3 additions & 0 deletions extension_templates/transformer_simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,10 +182,13 @@ class MyTransformer(BaseTransformer):
# valid values: str or list of str, should be GitHub handles
# this should follow best scientific contribution practices
# scope is the code, not the methodology (method is per paper citation)
# if interfacing a 3rd party estimator, ensure to give credit to the
# authors of the interfaced estimator
#
# maintainer = current maintainer(s) of the estimator
# per algorithm maintainer role, see governance document
# this is an "owner" type role, with rights and maintenance duties
# for 3rd party interfaces, the scope is the sktime class only
"maintainers": ["maintainer1", "maintainer2"],
# valid values: str or list of str, should be GitHub handles
# remove tag if maintained by sktime core team
Expand Down
5 changes: 4 additions & 1 deletion extension_templates/transformer_supersimple.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,10 @@ class MyTransformer(BaseTransformer):
"authors": ["author1", "author2"], # authors, GitHub handles
"maintainers": ["maintainer1", "maintainer2"], # maintainers, GitHub handles
# author = significant contribution to code at some point
# maintainer = algorithm maintainer role, "owner"
# if interfacing a 3rd party estimator, ensure to give credit to the
# authors of the interfaced estimator
# maintainer = algorithm maintainer role, "owner" of the sktime class
# for 3rd party interfaces, the scope is the sktime class only
# remove maintainer tag if maintained by sktime core team
#
# do not change these:
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ all_extras = [
"arch>=5.6,<6.4.0",
"cloudpickle",
"dash!=2.9.0",
"dask<2024.2.2",
"dask<2024.4.2; extra == 'dataframe'",
"dtw-python",
'esig==0.9.7; python_version < "3.10"',
'filterpy>=1.4.5; python_version < "3.11"',
Expand Down Expand Up @@ -121,7 +121,7 @@ all_extras_pandas2 = [
"arch>=5.6,<6.4.0",
"cloudpickle",
"dash!=2.9.0",
"dask<2024.2.2",
"dask<2024.4.2; extra == 'dataframe'",
"dtw-python",
'esig==0.9.7; python_version < "3.10"',
'filterpy>=1.4.5; python_version < "3.11"',
Expand Down
5 changes: 1 addition & 4 deletions sktime/datasets/_single_problem_loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -1413,10 +1413,7 @@ def load_forecastingdata(
# valid dataset names for classification, regression, forecasting datasets repo
if name not in list(tsf_all_datasets):
raise ValueError(
{name}
+ " is not a valid dataset name. \
List of valid dataset names can be found at \
sktime.datasets.tsf_dataset_names.tsf_all_datasets"
f"Error in load_forecastingdata, Invalid dataset name = {name}."
)

url = f"https://zenodo.org/record/{tsf_all[name]}/files/{name}.zip"
Expand Down
11 changes: 11 additions & 0 deletions sktime/datasets/tests/test_datadownload.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,3 +80,14 @@ def test_check_link_downloadable(name):

assert "application/octet-stream" in content_type, "URL is not downloadable."
assert "attachment" in content_disposition, "URL is not downloadable."


@pytest.mark.datadownload
@pytest.mark.parametrize("name", ["invalid_name"])
def test_load_forecasting_data_invalid_name(name):
"""Test load_forecastingdata with invalid name."""
with pytest.raises(
ValueError,
match=f"Error in load_forecastingdata, Invalid dataset name = {name}.",
):
load_forecastingdata(name=name)
3 changes: 2 additions & 1 deletion sktime/regression/deep_learning/cnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ def _fit(self, X, y):
self.input_shape = X.shape[1:]
self.model_ = self.build_model(self.input_shape)
if self.verbose:
self.model.summary()
self.model_.summary()

self.history = self.model_.fit(
X,
Expand Down Expand Up @@ -234,6 +234,7 @@ def get_test_params(cls, parameter_set="default"):
"batch_size": 6,
"kernel_size": 2,
"n_conv_layers": 1,
"verbose": True,
}
test_params = [param1, param2]

Expand Down
26 changes: 16 additions & 10 deletions sktime/transformations/series/impute.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@ class Imputer(BaseTransformer):
Parameters
----------
method : str, default="drift"
Method to fill the missing values.
Method to fill the missing values. Not all methods can extrapolate, so after
``method`` is applied the remaining missing values are filled with ``ffill``
then ``bfill``.
* "drift" : drift/trend values by sktime.PolynomialTrendForecaster(degree=1)
first, X in transform() is filled with ffill then bfill
Expand Down Expand Up @@ -231,22 +233,26 @@ def _transform(self, X, y=None):
elif self.method == "constant":
return X.fillna(value=self.value)
elif isinstance(index, pd.MultiIndex):
X_grouped = X.groupby(level=list(range(index.nlevels - 1)))
X_group_levels = list(range(index.nlevels - 1))

if self.method in ["backfill", "bfill"]:
X = X_grouped.bfill()
# fill trailing NAs of panel instances with reverse method
return X.ffill()
X = X.groupby(level=X_group_levels).bfill()
elif self.method in ["pad", "ffill"]:
X = X_grouped.ffill()
# fill leading NAs of panel instances with reverse method
return X.bfill()
X = X.groupby(level=X_group_levels).ffill()
elif self.method == "mean":
return X_grouped.fillna(value=self._mean)
X = X.groupby(level=X_group_levels).fillna(value=self._mean)
elif self.method == "median":
return X_grouped.fillna(value=self._median)
X = X.groupby(level=X_group_levels).fillna(value=self._median)
else:
raise AssertionError("Code should not be reached")

# fill first/last elements of series,
# as some methods can't impute those
X = X.groupby(level=X_group_levels).ffill()
X = X.groupby(level=X_group_levels).bfill()

return X

else:
if self.method in ["backfill", "bfill"]:
X = X.bfill()
Expand Down
40 changes: 40 additions & 0 deletions sktime/transformations/series/tests/test_imputer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
import numpy as np
import pytest

from sktime.datatypes import get_examples
from sktime.forecasting.naive import NaiveForecaster
from sktime.transformations.compose import TransformByLevel
from sktime.transformations.series.impute import Imputer
from sktime.utils._testing.forecasting import make_forecasting_problem
from sktime.utils._testing.hierarchical import _make_hierarchical
Expand Down Expand Up @@ -58,6 +60,44 @@ def test_imputer(method, Z, value, forecaster):
assert not y_hat.isnull().to_numpy().any()


@pytest.mark.parametrize(
"method",
[
"linear",
"nearest",
"mean",
"median",
"backfill",
"pad",
],
)
def test_impute_multiindex(method):
"""Test for data leakage in case of pd-multiindex data.
Failure case in bug #6224
"""

df = get_examples(mtype="pd-multiindex")[0].copy()
df.iloc[:3, :] = np.nan # instance 0 entirely missing
df.iloc[3:4, :] = np.nan # instance 1 first timepoint missing
df.iloc[8:, :] = np.nan # instance 2 last timepoint missing

imp = Imputer(method=method)
df_imp = imp.fit_transform(df)

# instance 0 entirely missing, so it should remain missing
assert np.array_equal(df.iloc[:3, :], df_imp.iloc[:3, :], equal_nan=True)

# instance 1 and 2 should not have any missing values
assert not df_imp.iloc[3:, :].isna().any().any()

# test consistency between applying the imputer to every instance separately,
# vs applying them to the panel
imp_tbl = TransformByLevel(Imputer(method=method))
df_imp_tbl = imp_tbl.fit_transform(df)
assert np.array_equal(df_imp, df_imp_tbl, equal_nan=True)


def test_imputer_forecaster_y():
"""Test that forecaster imputer works with y.
Expand Down

0 comments on commit 2aceec5

Please sign in to comment.