Skip to content

Commit

Permalink
Merge branch 'main' into contributors-update
Browse files Browse the repository at this point in the history
  • Loading branch information
fkiraly committed May 21, 2023
2 parents 74c6e33 + df5a005 commit 755afc7
Show file tree
Hide file tree
Showing 10 changed files with 194 additions and 36 deletions.
12 changes: 11 additions & 1 deletion .all-contributorsrc
Original file line number Diff line number Diff line change
Expand Up @@ -2157,6 +2157,16 @@
"doc"
]
},
{
"login": "luca-miniati",
"name": "Luca Miniati",
"avatar_url": "https://avatars.githubusercontent.com/u/87467600?v=4",
"profile": "https://github.com/luca-miniati",
"contributions": [
"code",
"doc"
]
},
{
"login": "marrov",
"name": "Marc Rovira",
Expand All @@ -2165,6 +2175,6 @@
"contributions": [
"doc"
]
}
}
]
}
27 changes: 14 additions & 13 deletions CONTRIBUTORS.md

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/source/api_reference/dists_kernels.rst
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ Composition

DistFromAligner

.. currentmodule:: sktime.dists_to_kern
.. currentmodule:: sktime.dists_kernels.dist_to_kern

.. autosummary::
:toctree: auto_generated/
Expand Down
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ all_extras = [
"filterpy>=1.4.5; python_version < '3.11'",
"h5py",
"hmmlearn>=0.2.7; python_version < '3.11'",
"holidays<0.25",
"gluonts>=0.9.0",
"keras-self-attention; python_version < '3.11'",
"kotsu>=0.3.1",
Expand Down
12 changes: 11 additions & 1 deletion sktime/forecasting/base/adapters/_statsmodels.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,8 +188,18 @@ def _predict_interval(self, fh, X=None, coverage=0.95):
start, end = fh.to_absolute_int(self._y.index[0], self.cutoff)[[0, -1]]
valid_indices = fh.to_absolute(self.cutoff).to_pandas()

get_prediction_arguments = {"start": start, "end": end}

if hasattr(self, "random_state"):
get_prediction_arguments["random_state"] = self.random_state

if inspect.signature(self._fitted_forecaster.get_prediction).parameters.get(
"exog"
):
get_prediction_arguments["exog"] = X

prediction_results = self._fitted_forecaster.get_prediction(
start=start, end=end, exog=X
**get_prediction_arguments
)

columns = pd.MultiIndex.from_product(
Expand Down
54 changes: 49 additions & 5 deletions sktime/forecasting/ets.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,14 @@ class AutoETS(_StatsModelsAdapter):
Parameters
----------
error : str, default="add"
The error model. Takes one of "add" or "mul".
The error model. Takes one of "add" or "mul". Ignored if auto=True.
trend : str or None, default=None
The trend component model. Takes one of "add", "mul", or None.
The trend component model. Takes one of "add", "mul", or None. Ignored if
auto=True.
damped_trend : bool, default=False
Whether or not an included trend component is damped.
Whether or not an included trend component is damped. Ignored if auto=True.
seasonal : str or None, default=None
The seasonality model. Takes one of "add", "mul", or None.
The seasonality model. Takes one of "add", "mul", or None. Ignored if auto=True.
sp : int, default=1
The number of periods in a complete seasonal cycle for seasonal
(Holt-Winters) models. For example, 4 for quarterly data with an
Expand Down Expand Up @@ -119,7 +120,8 @@ class AutoETS(_StatsModelsAdapter):
return_params : bool, default=False
Whether or not to return only the array of maximizing parameters.
auto : bool, default=False
Set True to enable automatic model selection.
Set True to enable automatic model selection. If auto=True, then error,
trend, seasonal and damped_trend are ignored.
information_criterion : str, default="aic"
Information criterion to be used in model selection. One of:
Expand Down Expand Up @@ -239,6 +241,21 @@ def __init__(

super(AutoETS, self).__init__(random_state=random_state)

if self.auto:
# If auto=True, check if trend, damped_trend, seasonal, or error are not set
# to default values
if any([trend, damped_trend, seasonal]) or error != "add":
warnings.warn(
"The user-specified parameters provided alongside auto=True in "
"AutoETS may not be respected. The AutoETS function "
"automatically selects the best model based on the "
"information criterion, ignoring the error, trend, "
"seasonal, and damped_trend parameters when auto=True"
" is set. Please ensure that your intended behavior"
" aligns with the automatic model selection.",
stacklevel=2,
)

def _fit_forecaster(self, y, X=None):
from statsmodels.tsa.exponential_smoothing.ets import ETSModel as _ETSModel

Expand Down Expand Up @@ -450,3 +467,30 @@ def summary(self):
https://www.statsmodels.org/dev/examples/notebooks/generated/ets.html
"""
return self._fitted_forecaster.summary()

@classmethod
def get_test_params(cls, parameter_set="default"):
"""Return testing parameter settings for the estimator.
Parameters
----------
parameter_set : str, default="default"
Name of the set of test parameters to return, for use in tests. If no
special parameters are defined for a value, will return `"default"` set.
Returns
-------
params : dict or list of dict
"""
params = [
# default setting, non-auto
{},
# "auto-ets"
# TODO: uncomment following line while fixing #4591
# {"sp": 2, "auto": True},
# ets (non-auto) with some non-default parameters
{"information_criterion": "bic", "trend": "add", "damped_trend": True},
]

return params
3 changes: 2 additions & 1 deletion sktime/forecasting/model_selection/_tune.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,8 @@ def evaluate_candidates(candidate_params):

# Sort values according to rank
results = results.sort_values(
by=f"rank_{scoring_name}", ascending=scoring.get_tag("lower_is_better")
by=f"rank_{scoring_name}",
ascending=True,
)
# Select n best forecaster
self.n_best_forecasters_ = []
Expand Down
56 changes: 43 additions & 13 deletions sktime/proba/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,17 +185,15 @@ def pdf(self, x):
`DataFrame` with same columns and index as `self`
containing :math:`p_{X_{ij}}(x_{ij})`, as above
"""
try:
self.pdf(x=x).applymap(np.log)

if self._has_implementation_of("log_pdf"):
approx_method = (
"by exponentiating the output returned by the log_pdf method, "
"this may be numerically unstable"
)
warn(self._method_error_msg("pdf", fill_in=approx_method))
return self.log_pdf(x=x).applymap(np.exp)

except NotImplementedError:
raise NotImplementedError(self._method_err_msg("pdf", "error"))
raise NotImplementedError(self._method_err_msg("pdf", "error"))

def log_pdf(self, x):
r"""Logarithmic probability density function.
Expand Down Expand Up @@ -226,17 +224,35 @@ def log_pdf(self, x):
`DataFrame` with same columns and index as `self`
containing :math:`\log p_{X_{ij}}(x_{ij})`, as above
"""
try:
self.pdf(x=x).applymap(np.log)

if self._has_implementation_of("pdf"):
approx_method = (
"by taking the logarithm of the output returned by the pdf method, "
"this may be numerically unstable"
)
warn(self._method_error_msg("log_pdf", fill_in=approx_method))

except NotImplementedError:
raise NotImplementedError(self._method_err_msg("log_pdf", "error"))
return self.pdf(x=x).applymap(np.log)

raise NotImplementedError(self._method_err_msg("log_pdf", "error"))

def cdf(self, x):
"""Cumulative distribution function."""
N = self.APPROX_SPL
approx_method = (
"by approximating the expected value by the indicator function on "
f"{N} samples"
)
warn(self._method_error_msg("mean", fill_in=approx_method))

splx = pd.concat([x] * N, keys=range(N))
spl = self.sample(N)
ind = splx <= spl

return ind.groupby(level=1).mean()

def ppf(self, p):
"""Quantile function = percent point function = inverse cdf."""
raise NotImplementedError(self._method_err_msg("cdf", "error"))

def energy(self, x=None):
r"""Energy of self, w.r.t. self or a constant frame x.
Expand Down Expand Up @@ -303,7 +319,7 @@ def mean(self):
warn(self._method_error_msg("mean", fill_in=approx_method))

spl = self.sample(approx_spl_size)
return spl.groupby(level=0).mean()
return spl.groupby(level=1).mean()

def var(self):
r"""Return element/entry-wise variance of the distribution.
Expand All @@ -326,7 +342,7 @@ def var(self):
spl1 = self.sample(approx_spl_size)
spl2 = self.sample(approx_spl_size)
spl = (spl1 - spl2) ** 2
return spl.groupby(level=0).mean()
return spl.groupby(level=1).mean()

def pdfnorm(self, a=2):
r"""a-norm of pdf, defaults to 2-norm.
Expand Down Expand Up @@ -358,7 +374,7 @@ def pdfnorm(self, a=2):

# uses formula int p(x)^a dx = E[p(X)^{a-1}], and MC approximates the RHS
spl = [self.pdf(self.sample()) ** (a - 1) for _ in range(approx_spl_size)]
return pd.concat(spl, axis=0).groupby(level=0).mean()
return pd.concat(spl, axis=0).groupby(level=1).mean()

def _coerce_to_self_index_df(self, x):
x = np.array(x)
Expand Down Expand Up @@ -427,6 +443,20 @@ def sample(self, n_samples=None):
in `pd-multiindex` mtype format convention, with same `columns` as `self`,
and `MultiIndex` that is product of `RangeIndex(n_samples)` and `self.index`
"""

def gen_unif():
np_unif = np.random.uniform(size=self.shape)
return pd.DataFrame(np_unif, index=self.index, columns=self.columns)

# if ppf is implemented, we use inverse transform sampling
if self._has_implementation_of("ppf"):
if n_samples is None:
return self.ppf(gen_unif())
else:
pd_smpl = [self.ppf(gen_unif()) for _ in range(n_samples)]
df_spl = pd.concat(pd_smpl, keys=range(n_samples))
return df_spl

raise NotImplementedError(self._method_err_msg("sample", "error"))


Expand Down
61 changes: 61 additions & 0 deletions sktime/proba/tests/test_base_default_methods.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# -*- coding: utf-8 -*-
# copyright: sktime developers, BSD-3-Clause License (see LICENSE file)
"""Test class for default methods.
This is not for direct use, but for testing whether the defaulting in various
methods works.
Testing works via TestAllDistributions which discovers the classes in
here, executes the public methods in interface conformance tests,
which in turn triggers the fallback defaults.
"""

__author__ = ["fkiraly"]

import numpy as np
import pandas as pd
from scipy.special import erfinv

from sktime.proba.base import BaseDistribution


# normal distribution with exact implementations removed
class _DistrDefaultMethodTester(BaseDistribution):
"""Tester distribution for default methods."""

_tags = {
"capabilities:approx": ["pdfnorm", "mean", "var", "energy", "log_pdf", "cdf"],
"capabilities:exact": ["pdf", "ppf"],
"distr:measuretype": "continuous",
}

def __init__(self, mu, sigma, index=None, columns=None):

self.mu = mu
self.sigma = sigma
self.index = index
self.columns = columns

self._mu, self._sigma = self._get_bc_params()
shape = self._mu.shape

if index is None:
index = pd.RangeIndex(shape[0])

if columns is None:
columns = pd.RangeIndex(shape[1])

super(_DistrDefaultMethodTester, self).__init__(index=index, columns=columns)

def ppf(self, p):
"""Quantile function = percent point function = inverse cdf."""
d = self.loc[p.index, p.columns]
icdf_arr = d.mu + d.sigma * np.sqrt(2) * erfinv(2 * p.values - 1)
return pd.DataFrame(icdf_arr, index=p.index, columns=p.columns)

def pdf(self, x):
"""Probability density function."""
d = self.loc[x.index, x.columns]
pdf_arr = np.exp(-0.5 * ((x.values - d.mu) / d.sigma) ** 2)
pdf_arr = pdf_arr / (d.sigma * np.sqrt(2 * np.pi))
return pd.DataFrame(pdf_arr, index=x.index, columns=x.columns)
2 changes: 2 additions & 0 deletions sktime/tests/_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
"ResNetClassifier", # known ResNetClassifier sporafic failures, see #3954
"LSTMFCNClassifier", # unknown cause, see bug report #4033
"TimeSeriesLloyds", # an abstract class, but does not follow naming convention
# DL classifier suspected to cause hangs and memouts, see #4610
"FCNClassifier",
]


Expand Down

0 comments on commit 755afc7

Please sign in to comment.