Skip to content

Commit

Permalink
Merge branch 'main' into sklearn-nightly
Browse files Browse the repository at this point in the history
  • Loading branch information
fkiraly committed May 21, 2023
2 parents 8f5382f + df5a005 commit 1bfc9ac
Show file tree
Hide file tree
Showing 8 changed files with 154 additions and 34 deletions.
12 changes: 11 additions & 1 deletion .all-contributorsrc
Original file line number Diff line number Diff line change
Expand Up @@ -2157,6 +2157,16 @@
"doc"
]
},
{
"login": "luca-miniati",
"name": "Luca Miniati",
"avatar_url": "https://avatars.githubusercontent.com/u/87467600?v=4",
"profile": "https://github.com/luca-miniati",
"contributions": [
"code",
"doc"
]
},
{
"login": "marrov",
"name": "Marc Rovira",
Expand All @@ -2165,6 +2175,6 @@
"contributions": [
"doc"
]
}
}
]
}
27 changes: 14 additions & 13 deletions CONTRIBUTORS.md

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/source/api_reference/dists_kernels.rst
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ Composition

DistFromAligner

.. currentmodule:: sktime.dists_to_kern
.. currentmodule:: sktime.dists_kernels.dist_to_kern

.. autosummary::
:toctree: auto_generated/
Expand Down
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ all_extras = [
"filterpy>=1.4.5; python_version < '3.11'",
"h5py",
"hmmlearn>=0.2.7; python_version < '3.11'",
"holidays<0.25",
"gluonts>=0.9.0",
"keras-self-attention; python_version < '3.11'",
"kotsu>=0.3.1",
Expand Down
27 changes: 22 additions & 5 deletions sktime/forecasting/ets.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,14 @@ class AutoETS(_StatsModelsAdapter):
Parameters
----------
error : str, default="add"
The error model. Takes one of "add" or "mul".
The error model. Takes one of "add" or "mul". Ignored if auto=True.
trend : str or None, default=None
The trend component model. Takes one of "add", "mul", or None.
The trend component model. Takes one of "add", "mul", or None. Ignored if
auto=True.
damped_trend : bool, default=False
Whether or not an included trend component is damped.
Whether or not an included trend component is damped. Ignored if auto=True.
seasonal : str or None, default=None
The seasonality model. Takes one of "add", "mul", or None.
The seasonality model. Takes one of "add", "mul", or None. Ignored if auto=True.
sp : int, default=1
The number of periods in a complete seasonal cycle for seasonal
(Holt-Winters) models. For example, 4 for quarterly data with an
Expand Down Expand Up @@ -119,7 +120,8 @@ class AutoETS(_StatsModelsAdapter):
return_params : bool, default=False
Whether or not to return only the array of maximizing parameters.
auto : bool, default=False
Set True to enable automatic model selection.
Set True to enable automatic model selection. If auto=True, then error,
trend, seasonal and damped_trend are ignored.
information_criterion : str, default="aic"
Information criterion to be used in model selection. One of:
Expand Down Expand Up @@ -239,6 +241,21 @@ def __init__(

super(AutoETS, self).__init__(random_state=random_state)

if self.auto:
# If auto=True, check if trend, damped_trend, seasonal, or error are not set
# to default values
if any([trend, damped_trend, seasonal]) or error != "add":
warnings.warn(
"The user-specified parameters provided alongside auto=True in "
"AutoETS may not be respected. The AutoETS function "
"automatically selects the best model based on the "
"information criterion, ignoring the error, trend, "
"seasonal, and damped_trend parameters when auto=True"
" is set. Please ensure that your intended behavior"
" aligns with the automatic model selection.",
stacklevel=2,
)

def _fit_forecaster(self, y, X=None):
from statsmodels.tsa.exponential_smoothing.ets import ETSModel as _ETSModel

Expand Down
56 changes: 43 additions & 13 deletions sktime/proba/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,17 +185,15 @@ def pdf(self, x):
`DataFrame` with same columns and index as `self`
containing :math:`p_{X_{ij}}(x_{ij})`, as above
"""
try:
self.pdf(x=x).applymap(np.log)

if self._has_implementation_of("log_pdf"):
approx_method = (
"by exponentiating the output returned by the log_pdf method, "
"this may be numerically unstable"
)
warn(self._method_error_msg("pdf", fill_in=approx_method))
return self.log_pdf(x=x).applymap(np.exp)

except NotImplementedError:
raise NotImplementedError(self._method_err_msg("pdf", "error"))
raise NotImplementedError(self._method_err_msg("pdf", "error"))

def log_pdf(self, x):
r"""Logarithmic probability density function.
Expand Down Expand Up @@ -226,17 +224,35 @@ def log_pdf(self, x):
`DataFrame` with same columns and index as `self`
containing :math:`\log p_{X_{ij}}(x_{ij})`, as above
"""
try:
self.pdf(x=x).applymap(np.log)

if self._has_implementation_of("pdf"):
approx_method = (
"by taking the logarithm of the output returned by the pdf method, "
"this may be numerically unstable"
)
warn(self._method_error_msg("log_pdf", fill_in=approx_method))

except NotImplementedError:
raise NotImplementedError(self._method_err_msg("log_pdf", "error"))
return self.pdf(x=x).applymap(np.log)

raise NotImplementedError(self._method_err_msg("log_pdf", "error"))

def cdf(self, x):
"""Cumulative distribution function."""
N = self.APPROX_SPL
approx_method = (
"by approximating the expected value by the indicator function on "
f"{N} samples"
)
warn(self._method_error_msg("mean", fill_in=approx_method))

splx = pd.concat([x] * N, keys=range(N))
spl = self.sample(N)
ind = splx <= spl

return ind.groupby(level=1).mean()

def ppf(self, p):
"""Quantile function = percent point function = inverse cdf."""
raise NotImplementedError(self._method_err_msg("cdf", "error"))

def energy(self, x=None):
r"""Energy of self, w.r.t. self or a constant frame x.
Expand Down Expand Up @@ -303,7 +319,7 @@ def mean(self):
warn(self._method_error_msg("mean", fill_in=approx_method))

spl = self.sample(approx_spl_size)
return spl.groupby(level=0).mean()
return spl.groupby(level=1).mean()

def var(self):
r"""Return element/entry-wise variance of the distribution.
Expand All @@ -326,7 +342,7 @@ def var(self):
spl1 = self.sample(approx_spl_size)
spl2 = self.sample(approx_spl_size)
spl = (spl1 - spl2) ** 2
return spl.groupby(level=0).mean()
return spl.groupby(level=1).mean()

def pdfnorm(self, a=2):
r"""a-norm of pdf, defaults to 2-norm.
Expand Down Expand Up @@ -358,7 +374,7 @@ def pdfnorm(self, a=2):

# uses formula int p(x)^a dx = E[p(X)^{a-1}], and MC approximates the RHS
spl = [self.pdf(self.sample()) ** (a - 1) for _ in range(approx_spl_size)]
return pd.concat(spl, axis=0).groupby(level=0).mean()
return pd.concat(spl, axis=0).groupby(level=1).mean()

def _coerce_to_self_index_df(self, x):
x = np.array(x)
Expand Down Expand Up @@ -427,6 +443,20 @@ def sample(self, n_samples=None):
in `pd-multiindex` mtype format convention, with same `columns` as `self`,
and `MultiIndex` that is product of `RangeIndex(n_samples)` and `self.index`
"""

def gen_unif():
np_unif = np.random.uniform(size=self.shape)
return pd.DataFrame(np_unif, index=self.index, columns=self.columns)

# if ppf is implemented, we use inverse transform sampling
if self._has_implementation_of("ppf"):
if n_samples is None:
return self.ppf(gen_unif())
else:
pd_smpl = [self.ppf(gen_unif()) for _ in range(n_samples)]
df_spl = pd.concat(pd_smpl, keys=range(n_samples))
return df_spl

raise NotImplementedError(self._method_err_msg("sample", "error"))


Expand Down
61 changes: 61 additions & 0 deletions sktime/proba/tests/test_base_default_methods.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# -*- coding: utf-8 -*-
# copyright: sktime developers, BSD-3-Clause License (see LICENSE file)
"""Test class for default methods.
This is not for direct use, but for testing whether the defaulting in various
methods works.
Testing works via TestAllDistributions which discovers the classes in
here, executes the public methods in interface conformance tests,
which in turn triggers the fallback defaults.
"""

__author__ = ["fkiraly"]

import numpy as np
import pandas as pd
from scipy.special import erfinv

from sktime.proba.base import BaseDistribution


# normal distribution with exact implementations removed
class _DistrDefaultMethodTester(BaseDistribution):
"""Tester distribution for default methods."""

_tags = {
"capabilities:approx": ["pdfnorm", "mean", "var", "energy", "log_pdf", "cdf"],
"capabilities:exact": ["pdf", "ppf"],
"distr:measuretype": "continuous",
}

def __init__(self, mu, sigma, index=None, columns=None):

self.mu = mu
self.sigma = sigma
self.index = index
self.columns = columns

self._mu, self._sigma = self._get_bc_params()
shape = self._mu.shape

if index is None:
index = pd.RangeIndex(shape[0])

if columns is None:
columns = pd.RangeIndex(shape[1])

super(_DistrDefaultMethodTester, self).__init__(index=index, columns=columns)

def ppf(self, p):
"""Quantile function = percent point function = inverse cdf."""
d = self.loc[p.index, p.columns]
icdf_arr = d.mu + d.sigma * np.sqrt(2) * erfinv(2 * p.values - 1)
return pd.DataFrame(icdf_arr, index=p.index, columns=p.columns)

def pdf(self, x):
"""Probability density function."""
d = self.loc[x.index, x.columns]
pdf_arr = np.exp(-0.5 * ((x.values - d.mu) / d.sigma) ** 2)
pdf_arr = pdf_arr / (d.sigma * np.sqrt(2 * np.pi))
return pd.DataFrame(pdf_arr, index=x.index, columns=x.columns)
2 changes: 2 additions & 0 deletions sktime/tests/_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
"ResNetClassifier", # known ResNetClassifier sporafic failures, see #3954
"LSTMFCNClassifier", # unknown cause, see bug report #4033
"TimeSeriesLloyds", # an abstract class, but does not follow naming convention
# DL classifier suspected to cause hangs and memouts, see #4610
"FCNClassifier",
]


Expand Down

0 comments on commit 1bfc9ac

Please sign in to comment.