From 2a65e8eaca19fa04041289fcc6112620b9a09c80 Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Fri, 25 Oct 2013 10:42:16 +0100 Subject: [PATCH 1/5] ENH: Check that there are sufficient dof for estimation. --- statsmodels/tsa/arima_model.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/statsmodels/tsa/arima_model.py b/statsmodels/tsa/arima_model.py index a41ff4525d6..f7b83d2dbd1 100644 --- a/statsmodels/tsa/arima_model.py +++ b/statsmodels/tsa/arima_model.py @@ -349,6 +349,10 @@ def _make_arma_exog(endog, exog, trend): k_trend = 0 return k_trend, exog +def _check_estimable(nobs, n_params): + if nobs <= n_params: + raise ValueError("Insufficient degrees of freedom to estimate") + class ARMA(tsbase.TimeSeriesModel): __doc__ = tsbase._tsa_doc % {"model" : _arma_model, @@ -364,6 +368,7 @@ def __init__(self, endog, order=None, exog=None, dates=None, freq=None, warnings.warn("In the next release order will not be optional " "in the model constructor.", FutureWarning) else: + _check_estimable(len(self.endog), sum(order)) self.k_ar = k_ar = order[0] self.k_ma = k_ma = order[1] self.k_lags = k_lags = max(k_ar,k_ma+1) @@ -794,6 +799,7 @@ def fit(self, order=None, start_params=None, trend='c', method = "css-mle", "This will overwrite any order given in the model " "constructor.", FutureWarning) + _check_estimable(len(self.endog), sum(order)) # get model order and constants self.k_ar = k_ar = int(order[0]) self.k_ma = k_ma = int(order[1]) @@ -820,6 +826,8 @@ def fit(self, order=None, start_params=None, trend='c', method = "css-mle", # (re)set trend and handle exogenous variables # always pass original exog k_trend, exog = _make_arma_exog(endog, self.exog, trend) + # check again now that we know the trend + _check_estimable(len(endog), k_ar + k_ma + k_exog + k_trend) self.k_trend = k_trend self.exog = exog # overwrites original exog from __init__ @@ -899,6 +907,8 @@ def __init__(self, endog, order, exog=None, dates=None, freq=None, super(ARIMA, self).__init__(endog, (p,q), exog, dates, freq, missing) self.k_diff = d self.endog = np.diff(self.endog, n=d) + #NOTE: will check in ARMA but check again since differenced now + _check_estimable(len(self.endog), p+q) if exog is not None: self.exog = self.exog[d:] self.data.ynames = 'D.' + self.endog_names From 8d27f2a3a45a034bf98d348ce16283769d9cce2c Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Fri, 25 Oct 2013 10:59:46 +0100 Subject: [PATCH 2/5] BUG: Never squeeze to a scalar. --- statsmodels/base/data.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/statsmodels/base/data.py b/statsmodels/base/data.py index d72e2b708d6..6becf371326 100644 --- a/statsmodels/base/data.py +++ b/statsmodels/base/data.py @@ -234,7 +234,14 @@ def _get_names(self, arr): def _get_yarr(self, endog): if data_util._is_structured_ndarray(endog): endog = data_util.struct_to_ndarray(endog) - return np.asarray(endog).squeeze() + endog = np.asarray(endog) + if len(endog) == 1: # never squeeze to a scalar + if endog.ndim == 1: + return endog + elif endog.ndim > 1: + return np.asarray([endog.squeeze()]) + + return endog.squeeze() def _get_xarr(self, exog): if data_util._is_structured_ndarray(exog): From 826be00df0996f6b7f1b5bb8d8a9acfde437efad Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Fri, 25 Oct 2013 11:05:49 +0100 Subject: [PATCH 3/5] ENH: Handle rare case of nobs == 1. --- statsmodels/regression/linear_model.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/statsmodels/regression/linear_model.py b/statsmodels/regression/linear_model.py index e8acb9806e0..b4e06b1d335 100644 --- a/statsmodels/regression/linear_model.py +++ b/statsmodels/regression/linear_model.py @@ -412,7 +412,11 @@ def __init__(self, endog, exog, weights=1., missing='none', hasconst=None): weights = np.array(weights) if weights.shape == (): weights = np.repeat(weights, len(endog)) - weights = weights.squeeze() + # handle case that endog might be of len == 1 + if len(weights) == 1: + weights = np.array([weights.squeeze()]) + else: + weights = weights.squeeze() super(WLS, self).__init__(endog, exog, missing=missing, weights=weights, hasconst=hasconst) nobs = self.exog.shape[0] From 6cc6c6a27e8fd30523df704e29086c4b569c925e Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Fri, 25 Oct 2013 11:16:38 +0100 Subject: [PATCH 4/5] BUG: Don't let maxlag > nobs. Closes #1146. --- statsmodels/tsa/arima_model.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/statsmodels/tsa/arima_model.py b/statsmodels/tsa/arima_model.py index f7b83d2dbd1..4e6f8aff276 100644 --- a/statsmodels/tsa/arima_model.py +++ b/statsmodels/tsa/arima_model.py @@ -417,7 +417,12 @@ def _fit_start_params_hr(self, order): endog -= np.dot(exog, ols_params).squeeze() if q != 0: if p != 0: - armod = AR(endog).fit(ic='bic', trend='nc') + # make sure we don't run into small data problems in AR fit + nobs = len(endog) + maxlag = int(round(12*(nobs/100.)**(1/4.))) + if maxlag >= nobs: + maxlag = nobs - 1 + armod = AR(endog).fit(ic='bic', trend='nc', maxlag=maxlag) arcoefs_tmp = armod.params p_tmp = armod.k_ar # it's possible in small samples that optimal lag-order From b70da1b69d9ba340af5ad7d4c705873405e6aa0c Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Fri, 25 Oct 2013 11:30:38 +0100 Subject: [PATCH 5/5] TST: Add test for #1146. --- statsmodels/tsa/tests/test_arima.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/statsmodels/tsa/tests/test_arima.py b/statsmodels/tsa/tests/test_arima.py index 11b9a829762..ceeed2af294 100644 --- a/statsmodels/tsa/tests/test_arima.py +++ b/statsmodels/tsa/tests/test_arima.py @@ -1873,6 +1873,24 @@ def test_arima_1123(): assert_almost_equal(fc[1], 0.968759, 6) assert_almost_equal(fc[2][0], [0.582485, 4.379952], 6) +def test_small_data(): + # 1146 + y = [-1214.360173, -1848.209905, -2100.918158, -3647.483678, -4711.186773] + + # refuse to estimate these + assert_raises(ValueError, ARIMA, y, (2, 0, 3)) + assert_raises(ValueError, ARIMA, y, (1, 1, 3)) + mod = ARIMA(y, (1, 0, 3)) + assert_raises(ValueError, mod.fit, trend="c") + + # try to estimate these...leave it up to the user to check for garbage + # and be clear, these are garbage parameters. + # X-12 arima will estimate, gretl refuses to estimate likely a problem + # in start params regression. + res = mod.fit(trend="nc", disp=0, start_params=[.1,.1,.1,.1]) + mod = ARIMA(y, (1, 0, 2)) + res = mod.fit(disp=0, start_params=[.1, .1, .1, .1]) + if __name__ == "__main__": import nose