Merge commit '12e0284e8eee9610f35a471749b6ea5c79e49a52' into backport…

…-0.14.1
statsmodels · Dec 14, 2023 · 44c1020 · 44c1020
2 parents 203558b + 12e0284
commit 44c1020
Show file tree

Hide file tree

Showing 4 changed files with 88 additions and 25 deletions.
diff --git a/statsmodels/graphics/tests/test_tsaplots.py b/statsmodels/graphics/tests/test_tsaplots.py
@@ -375,3 +375,14 @@ def test_predict_plot(use_pandas, model_and_args, alpha):
     res = model(y, **kwargs).fit()
     fig = plot_predict(res, start, end, alpha=alpha)
     assert isinstance(fig, plt.Figure)
+
+
+@pytest.mark.matplotlib
+def test_plot_pacf_small_sample():
+    idx = [pd.Timestamp.now() + pd.Timedelta(seconds=i) for i in range(10)]
+    df = pd.DataFrame(
+        index=idx,
+        columns=["a"],
+        data=list(range(10))
+    )
+    plot_pacf(df)
diff --git a/statsmodels/graphics/tsaplots.py b/statsmodels/graphics/tsaplots.py
@@ -17,7 +17,7 @@ def _prepare_data_corr_plot(x, lags, zero):
     if lags is None:
         # GH 4663 - use a sensible default value
         nobs = x.shape[0]
-        lim = min(int(np.ceil(10 * np.log10(nobs))), nobs - 1)
+        lim = min(int(np.ceil(10 * np.log10(nobs))), nobs // 2)
         lags = np.arange(not zero, lim + 1)
     elif np.isscalar(lags):
         lags = np.arange(not zero, int(lags) + 1)  # +1 for zero lag

diff --git a/statsmodels/tsa/stattools.py b/statsmodels/tsa/stattools.py
@@ -8,7 +8,7 @@
 from statsmodels.compat.python import Literal, lzip
 from statsmodels.compat.scipy import _next_regular
 
-from typing import Tuple
+from typing import Union, List
 import warnings
 
 import numpy as np
@@ -40,6 +40,8 @@
 from statsmodels.tsa.adfvalues import mackinnoncrit, mackinnonp
 from statsmodels.tsa.tsatools import add_trend, lagmat, lagmat2ds
 
+ArrayLike1D = Union[np.ndarray, pd.Series, List[float]]
+
 __all__ = [
     "acovf",
     "acf",
@@ -709,8 +711,11 @@ def acf(
     else:
         return acf, qstat, pvalue
 
-
-def pacf_yw(x, nlags=None, method="adjusted"):
+def pacf_yw(
+    x: ArrayLike1D,
+    nlags: int | None = None,
+    method: Literal["adjusted", "mle"] = "adjusted",
+) -> np.ndarray:
     """
     Partial autocorrelation estimated with non-recursive yule_walker.
 
@@ -747,8 +752,7 @@ def pacf_yw(x, nlags=None, method="adjusted"):
     nlags = int_like(nlags, "nlags", optional=True)
     nobs = x.shape[0]
     if nlags is None:
-        nlags = min(int(10 * np.log10(nobs)), nobs - 1)
-
+        nlags = max(min(int(10 * np.log10(nobs)), nobs - 1), 1)
     method = string_like(method, "method", options=("adjusted", "mle"))
     pacf = [1.0]
     with warnings.catch_warnings():
@@ -758,7 +762,9 @@ def pacf_yw(x, nlags=None, method="adjusted"):
     return np.array(pacf)
 
 
-def pacf_burg(x, nlags=None, demean=True):
+def pacf_burg(
+    x: ArrayLike1D, nlags: int | None = None, demean: bool = True
+) -> tuple[np.ndarray, np.ndarray]:
     """
     Calculate Burg"s partial autocorrelation estimator.
 
@@ -800,6 +806,7 @@ def pacf_burg(x, nlags=None, demean=True):
         x = x - x.mean()
     nobs = x.shape[0]
     p = nlags if nlags is not None else min(int(10 * np.log10(nobs)), nobs - 1)
+    p = max(p, 1)
     if p > nobs - 1:
         raise ValueError("nlags must be smaller than nobs - 1")
     d = np.zeros(p + 1)
@@ -818,14 +825,19 @@ def pacf_burg(x, nlags=None, demean=True):
         v[1:] = last_v[1:] - pacf[i] * last_u[:-1]
         d[i + 1] = (1 - pacf[i] ** 2) * d[i] - v[i] ** 2 - u[-1] ** 2
         pacf[i + 1] = 2 / d[i + 1] * v[i + 1 :].dot(u[i:-1])
-    sigma2 = (1 - pacf ** 2) * d / (2.0 * (nobs - np.arange(0, p + 1)))
+    sigma2 = (1 - pacf**2) * d / (2.0 * (nobs - np.arange(0, p + 1)))
     pacf[0] = 1  # Insert the 0 lag partial autocorrel
 
     return pacf, sigma2
 
 
 @deprecate_kwarg("unbiased", "adjusted")
-def pacf_ols(x, nlags=None, efficient=True, adjusted=False):
+def pacf_ols(
+    x: ArrayLike1D,
+    nlags: int | None = None,
+    efficient: bool = True,
+    adjusted: bool = False,
+) -> np.ndarray:
     """
     Calculate partial autocorrelations via OLS.
 
@@ -885,8 +897,9 @@ def pacf_ols(x, nlags=None, efficient=True, adjusted=False):
     adjusted = bool_like(adjusted, "adjusted")
     nobs = x.shape[0]
     if nlags is None:
-        nlags = min(int(10 * np.log10(nobs)), nobs - 1)
-
+        nlags = max(min(int(10 * np.log10(nobs)), nobs // 2), 1)
+    if nlags > nobs//2:
+        raise ValueError(f"nlags must be smaller than nobs // 2 ({nobs//2})")
     pacf = np.empty(nlags + 1)
     pacf[0] = 1.0
     if efficient:
@@ -903,14 +916,30 @@ def pacf_ols(x, nlags=None, efficient=True, adjusted=False):
             params = lstsq(xlags[:, :k], x0, rcond=None)[0]
             # Last coefficient corresponds to PACF value (see [1])
             pacf[k] = np.squeeze(params[-1])
-
     if adjusted:
         pacf *= nobs / (nobs - np.arange(nlags + 1))
-
     return pacf
 
 
-def pacf(x, nlags=None, method="ywadjusted", alpha=None):
+def pacf(
+    x: ArrayLike1D,
+    nlags: int | None = None,
+    method: Literal[
+        "yw",
+        "ywadjusted",
+        "ols",
+        "ols-inefficient",
+        "ols-adjusted",
+        "ywm",
+        "ywmle",
+        "ld",
+        "ldadjusted",
+        "ldb",
+        "ldbiased",
+        "burg",
+    ] = "ywadjusted",
+    alpha: float | None = None,
+) -> np.ndarray | tuple[np.ndarray, np.ndarray]:
     """
     Partial autocorrelation estimate.
 
@@ -996,7 +1025,7 @@ def pacf(x, nlags=None, method="ywadjusted", alpha=None):
         "ldb",
         "ldbiased",
         "ld_biased",
-        "burg"
+        "burg",
     )
     x = array_like(x, "x", maxdim=2)
     method = string_like(method, "method", options=methods)
@@ -1005,13 +1034,13 @@ def pacf(x, nlags=None, method="ywadjusted", alpha=None):
     nobs = x.shape[0]
     if nlags is None:
         nlags = min(int(10 * np.log10(nobs)), nobs // 2 - 1)
-    if nlags >= x.shape[0] // 2:
+    nlags = max(nlags, 1)
+    if nlags > x.shape[0] // 2:
         raise ValueError(
             "Can only compute partial correlations for lags up to 50% of the "
             f"sample size. The requested nlags {nlags} must be < "
             f"{x.shape[0] // 2}."
         )
-
     if method in ("ols", "ols-inefficient", "ols-adjusted"):
         efficient = "inefficient" not in method
         adjusted = "adjusted" in method
@@ -1031,7 +1060,6 @@ def pacf(x, nlags=None, method="ywadjusted", alpha=None):
         acv = acovf(x, adjusted=False, fft=False)
         ld_ = levinson_durbin(acv, nlags=nlags, isacov=True)
         ret = ld_[2]
-
     if alpha is not None:
         varacf = 1.0 / len(x)  # for all lags >=1
         interval = stats.norm.ppf(1.0 - alpha / 2.0) * np.sqrt(varacf)
@@ -1538,7 +1566,7 @@ def grangercausalitytests(x, maxlag, addconst=True, verbose=None):
     if x.shape[0] <= 3 * maxlag + int(addconst):
         raise ValueError(
             "Insufficient observations. Maximum allowable "
-            "lag is {0}".format(int((x.shape[0] - int(addconst)) / 3) - 1)
+            "lag is {}".format(int((x.shape[0] - int(addconst)) / 3) - 1)
         )
 
     resli = {}
@@ -1945,7 +1973,7 @@ def kpss(
     regression: Literal["c", "ct"] = "c",
     nlags: Literal["auto", "legacy"] | int = "auto",
     store: bool = False,
-) -> Tuple[float, float, int, dict[str, float]]:
+) -> tuple[float, float, int, dict[str, float]]:
     """
     Kwiatkowski-Phillips-Schmidt-Shin test for stationarity.
 
@@ -2032,7 +2060,7 @@ def kpss(
 
     # if m is not one, n != m * n
     if nobs != x.size:
-        raise ValueError("x of shape {0} not understood".format(x.shape))
+        raise ValueError(f"x of shape {x.shape} not understood")
 
     if hypo == "ct":
         # p. 162 Kwiatkowski et al. (1992): y_t = beta * t + r_t + e_t,
@@ -2106,8 +2134,8 @@ def kpss(
         rstore.nobs = nobs
 
         stationary_type = "level" if hypo == "c" else "trend"
-        rstore.H0 = "The series is {0} stationary".format(stationary_type)
-        rstore.HA = "The series is not {0} stationary".format(stationary_type)
+        rstore.H0 = f"The series is {stationary_type} stationary"
+        rstore.HA = f"The series is not {stationary_type} stationary"
 
         return kpss_stat, p_value, crit_dict, rstore
     else:
@@ -2200,7 +2228,7 @@ def range_unit_root_test(x, store=False):
 
     # if m is not one, n != m * n
     if nobs != x.size:
-        raise ValueError("x of shape {0} not understood".format(x.shape))
+        raise ValueError(f"x of shape {x.shape} not understood")
 
     # Table from [1] has been replicated using 200,000 samples
     # Critical values for new n_obs values have been identified

diff --git a/statsmodels/tsa/tests/test_stattools.py b/statsmodels/tsa/tests/test_stattools.py
@@ -1533,7 +1533,7 @@ def test_acf_conservate_nanops(reset_randomstate):
 
 
 def test_pacf_nlags_error(reset_randomstate):
-    e = np.random.standard_normal(100)
+    e = np.random.standard_normal(99)
     with pytest.raises(ValueError, match="Can only compute partial"):
         pacf(e, 50)
 
@@ -1584,3 +1584,27 @@ def test_granger_causality_exception_maxlag(gc_data):
 def test_granger_causality_verbose(gc_data):
     with pytest.warns(FutureWarning, match="verbose"):
         grangercausalitytests(gc_data, 3, verbose=True)
+
+@pytest.mark.parametrize("size",[3,5,7,9])
+def test_pacf_small_sample(size,reset_randomstate):
+    y = np.random.standard_normal(size)
+    a = pacf(y)
+    assert isinstance(a, np.ndarray)
+    a, b = pacf_burg(y)
+    assert isinstance(a, np.ndarray)
+    assert isinstance(b, np.ndarray)
+    a = pacf_ols(y)
+    assert isinstance(a, np.ndarray)
+    a = pacf_yw(y)
+    assert isinstance(a, np.ndarray)
+
+
+def test_pacf_1_obs(reset_randomstate):
+    y = np.random.standard_normal(1)
+    with pytest.raises(ValueError):
+        pacf(y)
+    with pytest.raises(ValueError):
+        pacf_burg(y)
+    with pytest.raises(ValueError):
+        pacf_ols(y)
+    pacf_yw(y)