BUG: fix df_resid, unit test compared to discrete Logit

statsmodels · Sep 21, 2020 · ce0899b · ce0899b
1 parent 97101b3
commit ce0899b
Show file tree

Hide file tree

Showing 3 changed files with 73 additions and 6 deletions.
diff --git a/statsmodels/base/model.py b/statsmodels/base/model.py
@@ -2492,6 +2492,13 @@ def __init__(self, model, mlefit):
         self._cache = {}
         self.__dict__.update(mlefit.__dict__)
 
+        k_params = len(mlefit.params)
+        # checks mainly for adding new models or subclassing
+        if self.df_model + self.model.k_constant != k_params:
+            warnings.warn("df_model + k_constant differs from nparams")
+        if self.df_resid != self.nobs - k_params:
+            warnings.warn("df_resid differs from nobs - nparams")
+
     def summary(self, yname=None, xname=None, title=None, alpha=.05):
         """Summarize the Regression Results
 

diff --git a/statsmodels/miscmodels/ordinal_model.py b/statsmodels/miscmodels/ordinal_model.py
@@ -90,7 +90,7 @@ def __init__(self, endog, exog, offset=None, distr='probit', **kwds):
         endog, labels, is_pandas = self._check_inputs(endog, exog)
 
         super(OrderedModel, self).__init__(endog, exog, **kwds)
-
+        k_levels = None  # initialize
         if not is_pandas:
             if self.endog.ndim == 1:
                 unique, index = np.unique(self.endog, return_inverse=True)
@@ -100,16 +100,21 @@ def __init__(self, endog, exog, offset=None, distr='probit', **kwds):
                 if not hasattr(self, "design_info"):
                     raise ValueError("2-dim endog not supported")
                 # this branch is currently only in support of from_formula
-                # labels here are only needed to choose k_levels in initialize
-                labels = [str(i) for i in range(self.endog.shape[1])]
+                # we need to initialize k_levels correctly for df_resid
+                k_levels = self.endog.shape[1]
                 labels = []
                 # Note: Doing the following here would break from_formula
                 # self.endog = self.endog.argmax(1)
 
         if self.k_constant > 0:
             raise ValueError("there should not be a constant in the model")
 
-        self._initialize_labels(labels)
+        self._initialize_labels(labels, k_levels=k_levels)
+
+        # adjust df
+        self.k_extra = self.k_levels - 1
+        self.df_model = self.k_vars + self.k_extra
+        self.df_resid = self.nobs - self.df_model
 
         self.results_class = OrderedResults
 
@@ -148,9 +153,12 @@ def _check_inputs(self, endog, exog):
 
         return endog, labels, is_pandas
 
-    def _initialize_labels(self, labels):
+    def _initialize_labels(self, labels, k_levels=None):
         self.labels = labels
-        self.k_levels = len(labels)
+        if k_levels is None:
+            self.k_levels = len(labels)
+        else:
+            self.k_levels = k_levels
 
         if self.exog is not None:
             self.nobs, self.k_vars = self.exog.shape
@@ -364,6 +372,9 @@ def fit(self, start_params=None, method='nm', maxiter=500, full_output=1,
         # use the proper result class
         ordmlefit = OrderedResults(self, mlefit)
 
+        # TODO: temporary, needs better fix, modelwc adds 1 by default
+        ordmlefit.hasconst = 0
+
         return ordmlefit
 
 

diff --git a/statsmodels/miscmodels/tests/test_ordinal_model.py b/statsmodels/miscmodels/tests/test_ordinal_model.py
@@ -13,6 +13,9 @@
 from .results.results_ordinal_model import data_store as ds
 from statsmodels.miscmodels.ordinal_model import OrderedModel
 
+from statsmodels.discrete.discrete_model import Logit
+from statsmodels.tools.tools import add_constant
+
 
 class CheckOrdinalModelMixin(object):
 
@@ -116,6 +119,9 @@ def test_results_other(self):
         fitted = self.resf.predict()
         assert_allclose(pred, fitted[-5:], rtol=1e-13)
 
+        n, k = res1.model.exog.shape
+        assert_equal(self.resf.df_resid, n - (k + 2))
+
 
 class TestLogitModel(CheckOrdinalModelMixin):
 
@@ -393,3 +399,46 @@ def _cdf(self, x):
         cls.resp = resp
         cls.resf = resf
         cls.resu = resu
+
+
+class TestLogitBinary():
+    # compare OrderedModel with discrete Logit for binary case
+    def test_attributes(self):
+        data = ds.df
+
+        mask_drop = data['apply'] == "somewhat likely"
+        data2 = data.loc[~mask_drop, :]
+        # we need to remove the category also from the Categorical Index
+        data2['apply'].cat.remove_categories("somewhat likely", inplace=True)
+
+        # standard fit with pandas input
+        modp = OrderedModel(data2['apply'],
+                            data2[['pared', 'public', 'gpa']],
+                            distr='logit')
+        resp = modp.fit(method='bfgs', disp=False)
+
+        exog = add_constant(data2[['pared', 'public', 'gpa']], prepend=False)
+        mod_logit = Logit(data2['apply'].cat.codes, exog)
+        res_logit = mod_logit.fit()
+
+        attributes = "bse df_resid llf aic bic".split()
+        assert_allclose(resp.params[:3], res_logit.params[:3], rtol=1e-5)
+        assert_allclose(resp.params[3], -res_logit.params[3], rtol=1e-5)
+        for attr in attributes:
+            assert_allclose(getattr(resp, attr), getattr(res_logit, attr),
+                            rtol=1e-4)
+
+        resp = modp.fit(method='bfgs', disp=False,
+                        cov_type="hac", cov_kwds={"maxlags": 2})
+        res_logit = mod_logit.fit(method='bfgs', disp=False,
+                                  cov_type="hac", cov_kwds={"maxlags": 2})
+        for attr in attributes:
+            assert_allclose(getattr(resp, attr), getattr(res_logit, attr),
+                            rtol=1e-4)
+
+        resp = modp.fit(method='bfgs', disp=False, cov_type="hc1")
+        res_logit = mod_logit.fit(method='bfgs', disp=False,
+                                  cov_type="hc1")
+        for attr in attributes:
+            assert_allclose(getattr(resp, attr), getattr(res_logit, attr),
+                            rtol=1e-4)