From 2a65e8eaca19fa04041289fcc6112620b9a09c80 Mon Sep 17 00:00:00 2001
From: Skipper Seabold <jsseabold@gmail.com>
Date: Fri, 25 Oct 2013 10:42:16 +0100
Subject: [PATCH 1/5] ENH: Check that there are sufficient dof for estimation.

---
 statsmodels/tsa/arima_model.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/statsmodels/tsa/arima_model.py b/statsmodels/tsa/arima_model.py
index a41ff4525d6..f7b83d2dbd1 100644
--- a/statsmodels/tsa/arima_model.py
+++ b/statsmodels/tsa/arima_model.py
@@ -349,6 +349,10 @@ def _make_arma_exog(endog, exog, trend):
         k_trend = 0
     return k_trend, exog
 
+def _check_estimable(nobs, n_params):
+    if nobs <= n_params:
+        raise ValueError("Insufficient degrees of freedom to estimate")
+
 class ARMA(tsbase.TimeSeriesModel):
 
     __doc__ = tsbase._tsa_doc % {"model" : _arma_model,
@@ -364,6 +368,7 @@ def __init__(self, endog, order=None, exog=None, dates=None, freq=None,
             warnings.warn("In the next release order will not be optional "
                     "in the model constructor.", FutureWarning)
         else:
+            _check_estimable(len(self.endog), sum(order))
             self.k_ar = k_ar = order[0]
             self.k_ma = k_ma = order[1]
             self.k_lags = k_lags = max(k_ar,k_ma+1)
@@ -794,6 +799,7 @@ def fit(self, order=None, start_params=None, trend='c', method = "css-mle",
                     "This will overwrite any order given in the model "
                     "constructor.", FutureWarning)
 
+            _check_estimable(len(self.endog), sum(order))
             # get model order and constants
             self.k_ar = k_ar = int(order[0])
             self.k_ma = k_ma = int(order[1])
@@ -820,6 +826,8 @@ def fit(self, order=None, start_params=None, trend='c', method = "css-mle",
         # (re)set trend and handle exogenous variables
         # always pass original exog
         k_trend, exog = _make_arma_exog(endog, self.exog, trend)
+        # check again now that we know the trend
+        _check_estimable(len(endog), k_ar + k_ma + k_exog + k_trend)
 
         self.k_trend = k_trend
         self.exog = exog    # overwrites original exog from __init__
@@ -899,6 +907,8 @@ def __init__(self, endog, order, exog=None, dates=None, freq=None,
         super(ARIMA, self).__init__(endog, (p,q), exog, dates, freq, missing)
         self.k_diff = d
         self.endog = np.diff(self.endog, n=d)
+        #NOTE: will check in ARMA but check again since differenced now
+        _check_estimable(len(self.endog), p+q)
         if exog is not None:
             self.exog = self.exog[d:]
         self.data.ynames = 'D.' + self.endog_names

From 8d27f2a3a45a034bf98d348ce16283769d9cce2c Mon Sep 17 00:00:00 2001
From: Skipper Seabold <jsseabold@gmail.com>
Date: Fri, 25 Oct 2013 10:59:46 +0100
Subject: [PATCH 2/5] BUG: Never squeeze to a scalar.

---
 statsmodels/base/data.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/statsmodels/base/data.py b/statsmodels/base/data.py
index d72e2b708d6..6becf371326 100644
--- a/statsmodels/base/data.py
+++ b/statsmodels/base/data.py
@@ -234,7 +234,14 @@ def _get_names(self, arr):
     def _get_yarr(self, endog):
         if data_util._is_structured_ndarray(endog):
             endog = data_util.struct_to_ndarray(endog)
-        return np.asarray(endog).squeeze()
+        endog = np.asarray(endog)
+        if len(endog) == 1: # never squeeze to a scalar
+            if endog.ndim == 1:
+                return endog
+            elif endog.ndim > 1:
+                return np.asarray([endog.squeeze()])
+
+        return endog.squeeze()
 
     def _get_xarr(self, exog):
         if data_util._is_structured_ndarray(exog):

From 826be00df0996f6b7f1b5bb8d8a9acfde437efad Mon Sep 17 00:00:00 2001
From: Skipper Seabold <jsseabold@gmail.com>
Date: Fri, 25 Oct 2013 11:05:49 +0100
Subject: [PATCH 3/5] ENH: Handle rare case of nobs == 1.

---
 statsmodels/regression/linear_model.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/statsmodels/regression/linear_model.py b/statsmodels/regression/linear_model.py
index e8acb9806e0..b4e06b1d335 100644
--- a/statsmodels/regression/linear_model.py
+++ b/statsmodels/regression/linear_model.py
@@ -412,7 +412,11 @@ def __init__(self, endog, exog, weights=1., missing='none', hasconst=None):
         weights = np.array(weights)
         if weights.shape == ():
             weights = np.repeat(weights, len(endog))
-        weights = weights.squeeze()
+        # handle case that endog might be of len == 1
+        if len(weights) == 1:
+            weights = np.array([weights.squeeze()])
+        else:
+            weights = weights.squeeze()
         super(WLS, self).__init__(endog, exog, missing=missing,
                                   weights=weights, hasconst=hasconst)
         nobs = self.exog.shape[0]

From 6cc6c6a27e8fd30523df704e29086c4b569c925e Mon Sep 17 00:00:00 2001
From: Skipper Seabold <jsseabold@gmail.com>
Date: Fri, 25 Oct 2013 11:16:38 +0100
Subject: [PATCH 4/5] BUG: Don't let maxlag > nobs. Closes #1146.

---
 statsmodels/tsa/arima_model.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/statsmodels/tsa/arima_model.py b/statsmodels/tsa/arima_model.py
index f7b83d2dbd1..4e6f8aff276 100644
--- a/statsmodels/tsa/arima_model.py
+++ b/statsmodels/tsa/arima_model.py
@@ -417,7 +417,12 @@ def _fit_start_params_hr(self, order):
             endog -= np.dot(exog, ols_params).squeeze()
         if q != 0:
             if p != 0:
-                armod = AR(endog).fit(ic='bic', trend='nc')
+                # make sure we don't run into small data problems in AR fit
+                nobs = len(endog)
+                maxlag = int(round(12*(nobs/100.)**(1/4.)))
+                if maxlag >= nobs:
+                    maxlag = nobs - 1
+                armod = AR(endog).fit(ic='bic', trend='nc', maxlag=maxlag)
                 arcoefs_tmp = armod.params
                 p_tmp = armod.k_ar
                 # it's possible in small samples that optimal lag-order

From b70da1b69d9ba340af5ad7d4c705873405e6aa0c Mon Sep 17 00:00:00 2001
From: Skipper Seabold <jsseabold@gmail.com>
Date: Fri, 25 Oct 2013 11:30:38 +0100
Subject: [PATCH 5/5] TST: Add test for #1146.

---
 statsmodels/tsa/tests/test_arima.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/statsmodels/tsa/tests/test_arima.py b/statsmodels/tsa/tests/test_arima.py
index 11b9a829762..ceeed2af294 100644
--- a/statsmodels/tsa/tests/test_arima.py
+++ b/statsmodels/tsa/tests/test_arima.py
@@ -1873,6 +1873,24 @@ def test_arima_1123():
     assert_almost_equal(fc[1], 0.968759, 6)
     assert_almost_equal(fc[2][0], [0.582485, 4.379952], 6)
 
+def test_small_data():
+    # 1146
+    y = [-1214.360173, -1848.209905, -2100.918158, -3647.483678, -4711.186773]
+
+    # refuse to estimate these
+    assert_raises(ValueError, ARIMA, y, (2, 0, 3))
+    assert_raises(ValueError, ARIMA, y, (1, 1, 3))
+    mod = ARIMA(y, (1, 0, 3))
+    assert_raises(ValueError, mod.fit, trend="c")
+
+    # try to estimate these...leave it up to the user to check for garbage
+    # and be clear, these are garbage parameters.
+    # X-12 arima will estimate, gretl refuses to estimate likely a problem
+    # in start params regression.
+    res = mod.fit(trend="nc", disp=0, start_params=[.1,.1,.1,.1])
+    mod = ARIMA(y, (1, 0, 2))
+    res = mod.fit(disp=0, start_params=[.1, .1, .1, .1])
+
 
 if __name__ == "__main__":
     import nose