From c05ceadb5e5de8d4480e3df938a447f43869c98e Mon Sep 17 00:00:00 2001
From: Josef Perktold <josef.pktd@gmail.com>
Date: Sat, 5 Sep 2020 16:19:44 -0400
Subject: [PATCH] ENH: properly connect param_names, give pandas codes endog to
 super

---
 statsmodels/miscmodels/ordinal_model.py       | 58 ++++++++++---------
 .../miscmodels/tests/test_ordinal_model.py    | 17 +++++-
 2 files changed, 44 insertions(+), 31 deletions(-)

diff --git a/statsmodels/miscmodels/ordinal_model.py b/statsmodels/miscmodels/ordinal_model.py
index c0922dad0bd..7a146521055 100644
--- a/statsmodels/miscmodels/ordinal_model.py
+++ b/statsmodels/miscmodels/ordinal_model.py
@@ -85,19 +85,32 @@ def __init__(self, endog, exog, offset=None, distr='probit', **kwds):
         # TODO: check if super can handle offset
         self.offset = offset
 
-        self.names, endog, exog = self._check_inputs(endog, exog)
+        endog, labels, is_pandas = self._check_inputs(endog, exog)
 
         super(OrderedModel, self).__init__(endog, exog, **kwds)
 
-        unique, index = np.unique(self.endog, return_inverse=True)
-        self.k_levels = len(unique)
-        self.endog = index
-        self.labels = unique
+        if not is_pandas:
+            unique, index = np.unique(self.endog, return_inverse=True)
+            self.endog = index
+            labels = unique
+
+        self.labels = labels
+        self.k_levels = len(labels)
 
         if self.exog is not None:
             self.nobs, self.k_vars = self.exog.shape
         else:  # no exog in model
             self.nobs, self.k_vars = self.endog.shape[0], 0
+
+        threshold_names = [str(x) + '/' + str(y)
+                           for x, y in zip(labels[:-1], labels[1:])]
+
+        # from GenericLikelihoodModel.fit
+        if self.exog is not None:
+            self.exog_names.extend(threshold_names)
+        else:
+            self.data.xnames = threshold_names
+
         self.results_class = OrderedResults
 
     def _check_inputs(self, endog, exog):
@@ -106,20 +119,14 @@ def _check_inputs(self, endog, exog):
         support for endog and exog. Also retrieves columns & categories
         names for .summary() of the results class.
         """
-        names = {}
         if not isinstance(self.distr, stats.rv_continuous):
             msg = (
                 f"{self.distr.name} must be a scipy.stats distribution."
             )
             raise ValueError(msg)
 
-        # Pandas' support
-        if (isinstance(exog, pd.DataFrame)) or (isinstance(exog, pd.Series)):
-            exog_name = ([exog.name] if isinstance(exog, pd.Series)
-                         else exog.columns.tolist())
-            names['xname'] = exog_name
-            # exog = np.asarray(exog)
-
+        labels = None
+        is_pandas = False
         if isinstance(endog, pd.Series):
             if isinstance(endog.dtypes, CategoricalDtype):
                 if not endog.dtype.ordered:
@@ -129,20 +136,19 @@ def _check_inputs(self, endog, exog):
                                   "categories. ordered == True preferred.",
                                   Warning)
                 endog_name = endog.name
-                threshold_name = [str(x) + '/' + str(y)
-                                  for x, y in zip(endog.values.categories[:-1],
-                                                  endog.values.categories[1:])]
-                names['yname'] = endog_name
-                names['xname'] = names['xname'] + threshold_name
-                endog = np.asarray(endog.values.codes)
+                labels = endog.values.categories
+                endog = endog.cat.codes
+                if endog.min() == -1:  # means there is a missing value
+                    raise ValueError("missing values in categorical endog are "
+                                     "not supported")
+                endog.name = endog_name
+                is_pandas = True
             else:
-                msg = (
-                    "If the endog is a pandas.Serie "
-                    "it must be of categoricalDtype."
-                )
+                msg = ("If endog is a pandas.Series, "
+                       "it must be of CategoricalDtype.")
                 raise ValueError(msg)
 
-        return names, endog, exog
+        return endog, labels, is_pandas
 
     def cdf(self, x):
         """cdf evaluated at x
@@ -331,7 +337,3 @@ def pred_table(self):
         table = pd.crosstab(observed, predicted, margins=True, dropna=False)
         return table
 
-    @Appender(GenericLikelihoodModelResults.summary.__doc__)
-    def summary(self, yname=None, xname=None, title=None, alpha=.05):
-        names = self.model.names
-        return super(OrderedResults, self).summary(**names)
diff --git a/statsmodels/miscmodels/tests/test_ordinal_model.py b/statsmodels/miscmodels/tests/test_ordinal_model.py
index 28e14403660..9fe9612b86d 100644
--- a/statsmodels/miscmodels/tests/test_ordinal_model.py
+++ b/statsmodels/miscmodels/tests/test_ordinal_model.py
@@ -72,7 +72,16 @@ def test_unordered(self):
 
     def test_results_other(self):
 
-        res1 = self.res1
+        res1 = self.res1  # numpy
+        resp = self.resp  # pandas
+
+        param_names_np = ['x1', 'x2', 'x3', '0/1', '1/2']
+        param_names_pd = ['pared', 'public', 'gpa', 'unlikely/somewhat likely',
+                          'somewhat likely/very likely']
+
+        assert res1.model.data.param_names == param_names_np
+        assert self.resp.model.data.param_names == param_names_pd
+        assert self.resp.model.endog_names == "apply"
 
         # results
         if hasattr(self, "pred_table"):
@@ -85,7 +94,9 @@ def test_results_other(self):
         # inherited
         tt = res1.t_test(np.eye(len(res1.params)))
         assert_allclose(tt.pvalue, res1.pvalues, rtol=1e-13)
-        # TODO: test using string definition of constraints
+
+        tt = resp.t_test(['pared', 'public', 'gpa'])  # pandas names
+        assert_allclose(tt.pvalue, res1.pvalues[:3], rtol=1e-13)
 
         pred = res1.predict(exog=res1.model.exog[-5:])
         fitted = res1.predict()
@@ -194,7 +205,7 @@ def test_loglikerelated(self):
         # null model
         mod_null = OrderedModel(mod.endog, None,
                                 offset=np.zeros(mod.nobs),
-                                distr='probit')
+                                distr=mod.distr)
         null_params = mod.start_params
         res_null = mod_null.fit(method='bfgs', disp=False)
         assert_allclose(res_null.params, null_params[mod.k_vars:], rtol=1e-8)