Merge branch 'main' into sklearn-nightly

sktime · May 21, 2023 · 1bfc9ac · 1bfc9ac
2 parents 8f5382f + df5a005
commit 1bfc9ac
Show file tree

Hide file tree

Showing 8 changed files with 154 additions and 34 deletions.
diff --git a/.all-contributorsrc b/.all-contributorsrc
@@ -2157,6 +2157,16 @@
         "doc"
       ]
     },
+    {
+      "login": "luca-miniati",
+      "name": "Luca Miniati",
+      "avatar_url": "https://avatars.githubusercontent.com/u/87467600?v=4",
+      "profile": "https://github.com/luca-miniati",
+      "contributions": [
+        "code",
+        "doc"
+      ]
+    },
     {
       "login": "marrov",
       "name": "Marc Rovira",
@@ -2165,6 +2175,6 @@
       "contributions": [
         "doc"
       ]
-    }    
+    }
   ]
 }
diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md
diff --git a/docs/source/api_reference/dists_kernels.rst b/docs/source/api_reference/dists_kernels.rst
@@ -78,7 +78,7 @@ Composition
 
     DistFromAligner
 
-.. currentmodule:: sktime.dists_to_kern
+.. currentmodule:: sktime.dists_kernels.dist_to_kern
 
 .. autosummary::
     :toctree: auto_generated/

diff --git a/pyproject.toml b/pyproject.toml
@@ -64,7 +64,6 @@ all_extras = [
     "filterpy>=1.4.5; python_version < '3.11'",
     "h5py",
     "hmmlearn>=0.2.7; python_version < '3.11'",
-    "holidays<0.25",
     "gluonts>=0.9.0",
     "keras-self-attention; python_version < '3.11'",
     "kotsu>=0.3.1",

diff --git a/sktime/forecasting/ets.py b/sktime/forecasting/ets.py
@@ -35,13 +35,14 @@ class AutoETS(_StatsModelsAdapter):
     Parameters
     ----------
     error : str, default="add"
-        The error model. Takes one of "add" or "mul".
+        The error model. Takes one of "add" or "mul". Ignored if auto=True.
     trend : str or None, default=None
-        The trend component model. Takes one of "add", "mul", or None.
+        The trend component model. Takes one of "add", "mul", or None. Ignored if
+        auto=True.
     damped_trend : bool, default=False
-        Whether or not an included trend component is damped.
+        Whether or not an included trend component is damped. Ignored if auto=True.
     seasonal : str or None, default=None
-        The seasonality model. Takes one of "add", "mul", or None.
+        The seasonality model. Takes one of "add", "mul", or None. Ignored if auto=True.
     sp : int, default=1
         The number of periods in a complete seasonal cycle for seasonal
         (Holt-Winters) models. For example, 4 for quarterly data with an
@@ -119,7 +120,8 @@ class AutoETS(_StatsModelsAdapter):
     return_params : bool, default=False
         Whether or not to return only the array of maximizing parameters.
     auto : bool, default=False
-        Set True to enable automatic model selection.
+        Set True to enable automatic model selection. If auto=True, then error,
+        trend, seasonal and damped_trend are ignored.
     information_criterion : str, default="aic"
         Information criterion to be used in model selection. One of:
 
@@ -239,6 +241,21 @@ def __init__(
 
         super(AutoETS, self).__init__(random_state=random_state)
 
+        if self.auto:
+            # If auto=True, check if trend, damped_trend, seasonal, or error are not set
+            # to default values
+            if any([trend, damped_trend, seasonal]) or error != "add":
+                warnings.warn(
+                    "The user-specified parameters provided alongside auto=True in "
+                    "AutoETS may not be respected. The AutoETS function "
+                    "automatically selects the best model based on the "
+                    "information criterion, ignoring the error, trend, "
+                    "seasonal, and damped_trend parameters when auto=True"
+                    " is set. Please ensure that your intended behavior"
+                    " aligns with the automatic model selection.",
+                    stacklevel=2,
+                )
+
     def _fit_forecaster(self, y, X=None):
         from statsmodels.tsa.exponential_smoothing.ets import ETSModel as _ETSModel
 

diff --git a/sktime/proba/base.py b/sktime/proba/base.py
@@ -185,17 +185,15 @@ def pdf(self, x):
         `DataFrame` with same columns and index as `self`
             containing :math:`p_{X_{ij}}(x_{ij})`, as above
         """
-        try:
-            self.pdf(x=x).applymap(np.log)
-
+        if self._has_implementation_of("log_pdf"):
             approx_method = (
                 "by exponentiating the output returned by the log_pdf method, "
                 "this may be numerically unstable"
             )
             warn(self._method_error_msg("pdf", fill_in=approx_method))
+            return self.log_pdf(x=x).applymap(np.exp)
 
-        except NotImplementedError:
-            raise NotImplementedError(self._method_err_msg("pdf", "error"))
+        raise NotImplementedError(self._method_err_msg("pdf", "error"))
 
     def log_pdf(self, x):
         r"""Logarithmic probability density function.
@@ -226,17 +224,35 @@ def log_pdf(self, x):
         `DataFrame` with same columns and index as `self`
             containing :math:`\log p_{X_{ij}}(x_{ij})`, as above
         """
-        try:
-            self.pdf(x=x).applymap(np.log)
-
+        if self._has_implementation_of("pdf"):
             approx_method = (
                 "by taking the logarithm of the output returned by the pdf method, "
                 "this may be numerically unstable"
             )
             warn(self._method_error_msg("log_pdf", fill_in=approx_method))
 
-        except NotImplementedError:
-            raise NotImplementedError(self._method_err_msg("log_pdf", "error"))
+            return self.pdf(x=x).applymap(np.log)
+
+        raise NotImplementedError(self._method_err_msg("log_pdf", "error"))
+
+    def cdf(self, x):
+        """Cumulative distribution function."""
+        N = self.APPROX_SPL
+        approx_method = (
+            "by approximating the expected value by the indicator function on "
+            f"{N} samples"
+        )
+        warn(self._method_error_msg("mean", fill_in=approx_method))
+
+        splx = pd.concat([x] * N, keys=range(N))
+        spl = self.sample(N)
+        ind = splx <= spl
+
+        return ind.groupby(level=1).mean()
+
+    def ppf(self, p):
+        """Quantile function = percent point function = inverse cdf."""
+        raise NotImplementedError(self._method_err_msg("cdf", "error"))
 
     def energy(self, x=None):
         r"""Energy of self, w.r.t. self or a constant frame x.
@@ -303,7 +319,7 @@ def mean(self):
         warn(self._method_error_msg("mean", fill_in=approx_method))
 
         spl = self.sample(approx_spl_size)
-        return spl.groupby(level=0).mean()
+        return spl.groupby(level=1).mean()
 
     def var(self):
         r"""Return element/entry-wise variance of the distribution.
@@ -326,7 +342,7 @@ def var(self):
         spl1 = self.sample(approx_spl_size)
         spl2 = self.sample(approx_spl_size)
         spl = (spl1 - spl2) ** 2
-        return spl.groupby(level=0).mean()
+        return spl.groupby(level=1).mean()
 
     def pdfnorm(self, a=2):
         r"""a-norm of pdf, defaults to 2-norm.
@@ -358,7 +374,7 @@ def pdfnorm(self, a=2):
 
         # uses formula int p(x)^a dx = E[p(X)^{a-1}], and MC approximates the RHS
         spl = [self.pdf(self.sample()) ** (a - 1) for _ in range(approx_spl_size)]
-        return pd.concat(spl, axis=0).groupby(level=0).mean()
+        return pd.concat(spl, axis=0).groupby(level=1).mean()
 
     def _coerce_to_self_index_df(self, x):
         x = np.array(x)
@@ -427,6 +443,20 @@ def sample(self, n_samples=None):
         in `pd-multiindex` mtype format convention, with same `columns` as `self`,
         and `MultiIndex` that is product of `RangeIndex(n_samples)` and `self.index`
         """
+
+        def gen_unif():
+            np_unif = np.random.uniform(size=self.shape)
+            return pd.DataFrame(np_unif, index=self.index, columns=self.columns)
+
+        # if ppf is implemented, we use inverse transform sampling
+        if self._has_implementation_of("ppf"):
+            if n_samples is None:
+                return self.ppf(gen_unif())
+            else:
+                pd_smpl = [self.ppf(gen_unif()) for _ in range(n_samples)]
+                df_spl = pd.concat(pd_smpl, keys=range(n_samples))
+                return df_spl
+
         raise NotImplementedError(self._method_err_msg("sample", "error"))
 
 

diff --git a/sktime/proba/tests/test_base_default_methods.py b/sktime/proba/tests/test_base_default_methods.py
@@ -0,0 +1,61 @@
+# -*- coding: utf-8 -*-
+# copyright: sktime developers, BSD-3-Clause License (see LICENSE file)
+"""Test class for default methods.
+
+This is not for direct use, but for testing whether the defaulting in various
+methods works.
+
+Testing works via TestAllDistributions which discovers the classes in
+here, executes the public methods in interface conformance tests,
+which in turn triggers the fallback defaults.
+"""
+
+__author__ = ["fkiraly"]
+
+import numpy as np
+import pandas as pd
+from scipy.special import erfinv
+
+from sktime.proba.base import BaseDistribution
+
+
+# normal distribution with exact implementations removed
+class _DistrDefaultMethodTester(BaseDistribution):
+    """Tester distribution for default methods."""
+
+    _tags = {
+        "capabilities:approx": ["pdfnorm", "mean", "var", "energy", "log_pdf", "cdf"],
+        "capabilities:exact": ["pdf", "ppf"],
+        "distr:measuretype": "continuous",
+    }
+
+    def __init__(self, mu, sigma, index=None, columns=None):
+
+        self.mu = mu
+        self.sigma = sigma
+        self.index = index
+        self.columns = columns
+
+        self._mu, self._sigma = self._get_bc_params()
+        shape = self._mu.shape
+
+        if index is None:
+            index = pd.RangeIndex(shape[0])
+
+        if columns is None:
+            columns = pd.RangeIndex(shape[1])
+
+        super(_DistrDefaultMethodTester, self).__init__(index=index, columns=columns)
+
+    def ppf(self, p):
+        """Quantile function = percent point function = inverse cdf."""
+        d = self.loc[p.index, p.columns]
+        icdf_arr = d.mu + d.sigma * np.sqrt(2) * erfinv(2 * p.values - 1)
+        return pd.DataFrame(icdf_arr, index=p.index, columns=p.columns)
+
+    def pdf(self, x):
+        """Probability density function."""
+        d = self.loc[x.index, x.columns]
+        pdf_arr = np.exp(-0.5 * ((x.values - d.mu) / d.sigma) ** 2)
+        pdf_arr = pdf_arr / (d.sigma * np.sqrt(2 * np.pi))
+        return pd.DataFrame(pdf_arr, index=x.index, columns=x.columns)
diff --git a/sktime/tests/_config.py b/sktime/tests/_config.py
@@ -33,6 +33,8 @@
     "ResNetClassifier",  # known ResNetClassifier sporafic failures, see #3954
     "LSTMFCNClassifier",  # unknown cause, see bug report #4033
     "TimeSeriesLloyds",  # an abstract class, but does not follow naming convention
+    # DL classifier suspected to cause hangs and memouts, see #4610
+    "FCNClassifier",
 ]