From d1bef78911e5a325393fed66adb1beb5d1cdb85b Mon Sep 17 00:00:00 2001
From: Josef Perktold <josef.pktd@gmail.com>
Date: Mon, 24 Aug 2015 11:23:58 -0400
Subject: [PATCH 01/24] ENH: OrderedModel initial version

---
 statsmodels/miscmodels/ordinal_model.py | 174 ++++++++++++++++++++++++
 1 file changed, 174 insertions(+)
 create mode 100644 statsmodels/miscmodels/ordinal_model.py

diff --git a/statsmodels/miscmodels/ordinal_model.py b/statsmodels/miscmodels/ordinal_model.py
new file mode 100644
index 00000000000..934ea212f94
--- /dev/null
+++ b/statsmodels/miscmodels/ordinal_model.py
@@ -0,0 +1,174 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Sat Aug 22 20:24:42 2015
+
+Author: Josef Perktold
+License: BSD-3
+"""
+
+import numpy as np
+from scipy import stats
+from statsmodels.base.model import GenericLikelihoodModel, GenericLikelihoodModelResults
+
+
+class OrderedModel(GenericLikelihoodModel):
+    """Ordinal Model based on logistic or normal distribution
+
+    The parameterization corresponds to the proportional odds model.
+
+    The mode assumes that the endogenous variable is ordered but that the
+    labels have no numeric interpretation besides the ordering.
+
+    The model is based on a latent linear variable, where we observe only
+
+    y_latent = X beta + u
+
+    The observed variable is defined by the interval
+
+    y = {0 if y_latent <= cut_0
+         1 of cut_0 < y_latent <= cut_1
+         ...
+         K if cut_K < y_latent
+
+    The probability of observing y=k conditional on the explanatory variables
+    X is given by
+
+    prob(y = k | x) = Prob(cut_k < y_latent <= cut_k+1)
+                    = Prob(cut_k - x beta < u <= cut_k+1 - x beta
+                    = F(cut_k+1 - x beta) - F(cut_k - x beta)
+
+    Where F is the cumulative distribution of u which is either the normal
+    or the logistic distribution, but can be set to any other continuous
+    distribution. We use standardized distributions to avoid identifiability
+    problems.
+
+
+    Parameters
+    ----------
+    endog : array_like
+        endogenous or dependent ordered categorical variable with k levels.
+        Labels or values of endog will internally transformed to consecutive
+        integers, 0, 1, 2, ...
+    exog : array_like
+        exogenous explanatory variables. This should not include an intercept.
+        (TODO: verify)
+    distr : string 'probit' or 'logit', or a distribution instance
+        The default is currently 'probit' which uses the normal distribution
+        and corresponds to an ordered Probit model. The distribution is
+        assumed to have the main methods of scipy.stats distributions, mainly
+        cdf, pdf and ppf. The inverse cdf, ppf, is only use to calculate
+        starting values.
+
+    Status: initial version, subclasses `GenericLikelihoodModel`
+
+    """
+
+    def __init__(self, endog, exog, distr='probit', **kwds):
+        super(OrderedModel, self).__init__(endog, exog, **kwds)
+        unique, index = np.unique(self.endog, return_inverse=True)
+        self.k_levels = len(unique)
+        self.endog = index
+        self.labels = unique
+        if distr == 'probit':
+            self.distr = stats.norm
+        elif distr == 'logit':
+            self.distr = stats.logistic
+        else:
+            self.distr = distr
+
+        self.k_vars = self.exog.shape[1]
+        self.results_class = OrderedResults  #TODO: doesn't work
+
+
+    def cdf(self, x):
+        """cdf evaluated at x
+        """
+        return self.distr.cdf(x)
+
+    def prob(self, low, upp):
+        """interval probability
+        """
+        return np.maximum(self.cdf(upp) - self.cdf(low), 0)
+
+    def transform_threshold_params(self, params):
+        """transformation of the parameters in the optimization
+
+        Parameters
+        ----------
+        params : nd_array
+            contains (exog_coef, transformed_thresholds) where exog_coef are
+            the coefficient for the explanatory variables in the linear term,
+            transformed threshold or cutoff points. The first, lowest threshold
+            is unchanged, all other thresholds are in terms of exponentiated
+            increments
+
+        Returns
+        -------
+        thresh : nd_array
+            thresh are the thresholds or cutoff constants for the intervals.
+
+        """
+        th_params = params[-(self.k_levels - 1):]
+        thresh = np.concatenate((th_params[:1], np.exp(th_params[1:]))).cumsum()
+        thresh = np.concatenate(([-np.inf], thresh, [np.inf]))
+        return thresh
+
+    def transform_reverse_threshold_params(self, params):
+        """obtain transformed thresholds from original thresholds, cutoff
+        constants.
+
+        """
+        start_ppf = params
+        thresh_params = np.concatenate((start_ppf[:1], np.log(np.diff(start_ppf[:-1]))))
+        return thresh_params
+
+
+    def predict(self, params, exog=None):
+        """predicted probabilities for each level of the ordinal endog.
+
+
+        """
+        #structure of params = [beta, constants_or_thresholds]
+
+        # explicit in several steps to avoid bugs
+        th_params = params[-(self.k_levels - 1):]
+        thresh = np.concatenate((th_params[:1], np.exp(th_params[1:]))).cumsum()
+        thresh = np.concatenate(([-np.inf], thresh, [np.inf]))
+        xb = self.exog.dot(params[:-(self.k_levels - 1)])[:,None]
+        low = thresh[:-1] - xb
+        upp = thresh[1:] - xb
+        prob = self.prob(low, upp)
+        return prob
+
+
+    def loglike(self, params):
+
+        #structure of params = [beta, constants_or_thresholds]
+
+        thresh = np.concatenate(([-np.inf], params[-(self.k_levels - 1):], [np.inf]))
+
+        # explicit in several steps to avoid bugs
+        th_params = params[-(self.k_levels - 1):]
+        thresh = np.concatenate((th_params[:1], np.exp(th_params[1:]))).cumsum()
+        thresh = np.concatenate(([-np.inf], thresh, [np.inf]))
+        thresh_i_low = thresh[self.endog]
+        thresh_i_upp = thresh[self.endog + 1]
+        xb = self.exog.dot(params[:-(self.k_levels - 1)])
+        low = thresh_i_low - xb
+        upp = thresh_i_upp - xb
+        prob = self.prob(low, upp)
+        return np.log(prob + 1e-20).sum()
+
+    @property
+    def start_params(self):
+        # start params based on model without exog
+        freq = np.bincount(self.endog) / len(self.endog)
+        start_ppf = self.distr.ppf(np.clip(freq.cumsum(), 0, 1))
+        start_threshold = self.transform_reverse_threshold_params(start_ppf)
+        start_params = np.concatenate((np.zeros(self.k_vars), start_threshold))
+        return start_params
+
+
+class OrderedResults(GenericLikelihoodModelResults):
+
+    pass

From ac0abebbbe48c7ecc4d03968ef2bc80dcd9b0f04 Mon Sep 17 00:00:00 2001
From: Josef Perktold <josef.pktd@gmail.com>
Date: Mon, 24 Aug 2015 11:24:38 -0400
Subject: [PATCH 02/24] Example script for OrderedModel

---
 statsmodels/examples/ex_ordered_model.py | 72 ++++++++++++++++++++++++
 1 file changed, 72 insertions(+)
 create mode 100644 statsmodels/examples/ex_ordered_model.py

diff --git a/statsmodels/examples/ex_ordered_model.py b/statsmodels/examples/ex_ordered_model.py
new file mode 100644
index 00000000000..b6afcec34d9
--- /dev/null
+++ b/statsmodels/examples/ex_ordered_model.py
@@ -0,0 +1,72 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Aug 24 11:17:06 2015
+
+Author: Josef Perktold
+License: BSD-3
+"""
+
+import numpy as np
+from scipy import stats
+
+from statsmodels.miscmodels.ordinal_model import OrderedModel
+
+nobs, k_vars = 1000, 3
+x = np.random.randn(nobs, k_vars)
+#x = np.column_stack((np.ones(nobs), x))  #constant will be in integration limits
+xb = x.dot(np.ones(k_vars))
+y_latent = xb + np.random.randn(nobs)
+y = np.round(np.clip(y_latent, -2.4, 2.4)).astype(int) + 2
+
+print(np.unique(y))
+print(np.bincount(y))
+
+mod = OrderedModel(y, x)
+start_params = np.ones(k_vars + 4)
+start_params = np.concatenate((np.ones(k_vars), np.arange(4)))
+start_ppf = stats.norm.ppf((np.bincount(y) / len(y)).cumsum())
+start_threshold = np.concatenate((start_ppf[:1], np.log(np.diff(start_ppf[:-1]))))
+start_params = np.concatenate((np.zeros(k_vars), start_threshold))
+res = mod.fit(start_params=start_params, maxiter=5000, maxfun=5000)
+print(res.params)
+#res = mod.fit(start_params=res.params, method='bfgs')
+res = mod.fit(start_params=start_params, method='bfgs')
+
+print(res.params)
+print(np.exp(res.params[-(mod.k_levels - 1):]).cumsum())
+#print(res.summary())
+
+predicted = res.model.predict(res.params)
+pred_choice = predicted.argmax(1)
+print('Fraction of correct choice predictions')
+print((y == pred_choice).mean())
+
+print('\ncomparing bincount')
+print(np.bincount(res.model.predict(res.params).argmax(1)))
+print(np.bincount(res.model.endog))
+
+res_log = OrderedModel(y, x, distr='logit').fit(method='bfgs')
+pred_choice_log = res_log.predict().argmax(1)
+print((y == pred_choice_log).mean())
+
+
+# example form UCLA Stats pages http://www.ats.ucla.edu/stat/stata/dae/ologit.htm
+# requires downloaded dataset ologit.dta
+
+import pandas
+dataf = pandas.read_stata(r"M:\josef_new\scripts\ologit_ucla.dta")
+
+# this works but sorts category levels alphabetically
+res_log2 = OrderedModel(np.asarray(dataf['apply']),
+                        np.asarray(dataf[['pared', 'public', 'gpa']], float),
+                        distr='logit').fit(method='bfgs')
+
+# this replicates the UCLA example except for different parameterization of par2
+res_log3 = OrderedModel(dataf['apply'].values.codes,
+                        np.asarray(dataf[['pared', 'public', 'gpa']], float),
+                        distr='logit').fit(method='bfgs')
+
+print(res_log3.summary())
+
+# with ordered probit - not on UCLA page
+print(OrderedModel(dataf['apply'].values.codes, np.asarray(dataf[['pared', 'public', 'gpa']], float), distr='probit').fit(method='bfgs').summary())

From b9459a0580ffe6b3d746ef9824faf45e35e0cf58 Mon Sep 17 00:00:00 2001
From: Bolzano-Weierstrass <thomas.p.marchand@gmail.com>
Date: Sat, 22 Aug 2020 13:42:29 +0200
Subject: [PATCH 03/24] ENH: added Pandas support for exog & endog

---
 statsmodels/miscmodels/ordinal_model.py | 60 ++++++++++++++++++++++---
 1 file changed, 54 insertions(+), 6 deletions(-)

diff --git a/statsmodels/miscmodels/ordinal_model.py b/statsmodels/miscmodels/ordinal_model.py
index 934ea212f94..c48c030e191 100644
--- a/statsmodels/miscmodels/ordinal_model.py
+++ b/statsmodels/miscmodels/ordinal_model.py
@@ -7,6 +7,7 @@
 """
 
 import numpy as np
+import pandas as pd
 from scipy import stats
 from statsmodels.base.model import GenericLikelihoodModel, GenericLikelihoodModelResults
 
@@ -49,9 +50,12 @@ class OrderedModel(GenericLikelihoodModel):
         endogenous or dependent ordered categorical variable with k levels.
         Labels or values of endog will internally transformed to consecutive
         integers, 0, 1, 2, ...
+        pd.Series with Categorical as dtype should be preferred as it gives
+        the order relation between the levels.
     exog : array_like
         exogenous explanatory variables. This should not include an intercept.
         (TODO: verify)
+        pd.DataFrame are also accepted.
     distr : string 'probit' or 'logit', or a distribution instance
         The default is currently 'probit' which uses the normal distribution
         and corresponds to an ordered Probit model. The distribution is
@@ -64,11 +68,7 @@ class OrderedModel(GenericLikelihoodModel):
     """
 
     def __init__(self, endog, exog, distr='probit', **kwds):
-        super(OrderedModel, self).__init__(endog, exog, **kwds)
-        unique, index = np.unique(self.endog, return_inverse=True)
-        self.k_levels = len(unique)
-        self.endog = index
-        self.labels = unique
+
         if distr == 'probit':
             self.distr = stats.norm
         elif distr == 'logit':
@@ -76,8 +76,56 @@ def __init__(self, endog, exog, distr='probit', **kwds):
         else:
             self.distr = distr
 
+        self.names, endog, exog = self._check_inputs(endog, exog)
+
+        super(OrderedModel, self).__init__(endog, exog, **kwds)
+
+        unique, index = np.unique(self.endog, return_inverse=True)
+        self.k_levels = len(unique)
+        self.endog = index
+        self.labels = unique
+
         self.k_vars = self.exog.shape[1]
-        self.results_class = OrderedResults  #TODO: doesn't work
+        self.results_class = OrderedResults
+
+    def _check_inputs(self, endog, exog):
+        """
+        checks if self.distrib is legal and does the Pandas support for endog and exog.
+        Also retrieves columns & categories names for .summary() of the results class.
+        """
+        names = {}
+        if not isinstance(self.distr, stats.rv_continuous):
+            msg = (
+                f"{self.distr.name} must be a scipy.stats distribution."
+            )
+            raise ValueError(msg)
+
+        # Pandas' support
+        if (isinstance(exog, pd.DataFrame)) or (isinstance(exog, pd.Series)):
+            exog_name = (exog.name if isinstance(exog, pd.Series) else exog.columns.tolist())
+            names['xname'] = exog_name
+            exog = np.asarray(exog)
+
+        if isinstance(endog, pd.Series):
+            if isinstance(endog.dtypes, pd.CategoricalDtype):
+                if not endog.dtype.ordered:
+                    import warnings
+                    warnings.warn("the endog has ordered == False, risk of capturing a wrong order"
+                                  " for the categories. ordered == True preferred.", Warning)
+                endog_name = endog.name
+                thresholds_name = [str(x) + '/' + str(y)
+                                   for x, y in zip(endog.values.categories[:-1], endog.values.categories[1:])]
+                names['yname'] = endog_name
+                names['xname'] = names['xname'] + thresholds_name
+                endog = np.asarray(endog.values.codes)
+            else:
+                msg = (
+                    f"If the endog is a pandas.Serie it must be of categoricalDtype."
+                )
+                raise ValueError(msg)
+
+        return names, endog, exog
+
 
 
     def cdf(self, x):

From 774a12df4830288eadd9662092db0279b04d0cf3 Mon Sep 17 00:00:00 2001
From: Bolzano-Weierstrass <thomas.p.marchand@gmail.com>
Date: Sat, 22 Aug 2020 15:30:03 +0200
Subject: [PATCH 04/24] FIX: pd.CategoricalDtype changed for pandas < 0.24

---
 statsmodels/miscmodels/ordinal_model.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/statsmodels/miscmodels/ordinal_model.py b/statsmodels/miscmodels/ordinal_model.py
index c48c030e191..cb4d3525ceb 100644
--- a/statsmodels/miscmodels/ordinal_model.py
+++ b/statsmodels/miscmodels/ordinal_model.py
@@ -8,6 +8,7 @@
 
 import numpy as np
 import pandas as pd
+from pandas.api.types import CategoricalDtype
 from scipy import stats
 from statsmodels.base.model import GenericLikelihoodModel, GenericLikelihoodModelResults
 
@@ -107,7 +108,7 @@ def _check_inputs(self, endog, exog):
             exog = np.asarray(exog)
 
         if isinstance(endog, pd.Series):
-            if isinstance(endog.dtypes, pd.CategoricalDtype):
+            if isinstance(endog.dtypes, CategoricalDtype):
                 if not endog.dtype.ordered:
                     import warnings
                     warnings.warn("the endog has ordered == False, risk of capturing a wrong order"

From a02b691765e41ab8a6687d7dd61e2ac8809c3e45 Mon Sep 17 00:00:00 2001
From: Bolzano-Weierstrass <thomas.p.marchand@gmail.com>
Date: Sat, 22 Aug 2020 15:32:16 +0200
Subject: [PATCH 05/24] ENH: added OrderedResults

---
 statsmodels/miscmodels/ordinal_model.py | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/statsmodels/miscmodels/ordinal_model.py b/statsmodels/miscmodels/ordinal_model.py
index cb4d3525ceb..39aa240e1fb 100644
--- a/statsmodels/miscmodels/ordinal_model.py
+++ b/statsmodels/miscmodels/ordinal_model.py
@@ -11,7 +11,7 @@
 from pandas.api.types import CategoricalDtype
 from scipy import stats
 from statsmodels.base.model import GenericLikelihoodModel, GenericLikelihoodModelResults
-
+from statsmodels.compat.pandas import Appender
 
 class OrderedModel(GenericLikelihoodModel):
     """Ordinal Model based on logistic or normal distribution
@@ -217,7 +217,23 @@ def start_params(self):
         start_params = np.concatenate((np.zeros(self.k_vars), start_threshold))
         return start_params
 
+    @Appender(GenericLikelihoodModel.fit.__doc__)
+    def fit(self, start_params=None, method='nm', maxiter=500, full_output=1,
+            disp=1, callback=None, retall=0, **kwargs):
 
-class OrderedResults(GenericLikelihoodModelResults):
+        fit_method = super(OrderedModel, self).fit
+        mlefit = fit_method(start_params=start_params,
+                            method=method, maxiter=maxiter,
+                            full_output=full_output,
+                            disp=disp, callback=callback, **kwargs)
+        # use the proper result class
+        ordmlefit = OrderedResults(self, mlefit)
 
-    pass
+        return ordmlefit
+
+class OrderedResults(GenericLikelihoodModelResults):
+    @Appender(GenericLikelihoodModelResults.summary.__doc__)
+    def summary(self, yname=None, xname=None, title=None, alpha=.05, **kwargs):
+        names = self.model.names
+        print(names)
+        return super(OrderedResults, self).summary(**names, **kwargs)

From cf43203e61e46f839ce658e7010ebd5e8952b167 Mon Sep 17 00:00:00 2001
From: Bolzano-Weierstrass <thomas.p.marchand@gmail.com>
Date: Sat, 22 Aug 2020 15:41:10 +0200
Subject: [PATCH 06/24] TST: added raw data & R results Holders

---
 .../miscmodels/tests/results/ologit_ucla.csv  | 401 ++++++++++++++++++
 .../tests/results/results_ordinal_method.py   | 101 +++++
 2 files changed, 502 insertions(+)
 create mode 100644 statsmodels/miscmodels/tests/results/ologit_ucla.csv
 create mode 100644 statsmodels/miscmodels/tests/results/results_ordinal_method.py

diff --git a/statsmodels/miscmodels/tests/results/ologit_ucla.csv b/statsmodels/miscmodels/tests/results/ologit_ucla.csv
new file mode 100644
index 00000000000..0c77e0ea8b5
--- /dev/null
+++ b/statsmodels/miscmodels/tests/results/ologit_ucla.csv
@@ -0,0 +1,401 @@
+apply,pared,public,gpa
+very likely,0,0,3.26
+somewhat likely,1,0,3.21
+unlikely,1,1,3.94
+somewhat likely,0,0,2.81
+somewhat likely,0,0,2.53
+unlikely,0,1,2.59
+somewhat likely,0,0,2.56
+somewhat likely,0,0,2.73
+unlikely,0,0,3.0
+somewhat likely,1,0,3.5
+unlikely,1,1,3.65
+somewhat likely,0,0,2.84
+very likely,0,1,3.9
+somewhat likely,0,0,2.68
+unlikely,1,0,3.57
+unlikely,0,0,3.09
+unlikely,0,1,3.5
+unlikely,0,0,2.17
+very likely,0,1,3.36
+somewhat likely,0,0,3.4
+very likely,0,0,2.75
+somewhat likely,1,0,3.2
+unlikely,0,0,2.44
+unlikely,0,0,2.83
+unlikely,0,1,3.0
+somewhat likely,0,1,3.27
+somewhat likely,1,0,3.14
+somewhat likely,0,0,3.37
+very likely,0,1,2.79
+unlikely,0,0,2.9
+somewhat likely,0,0,3.38
+unlikely,0,1,2.95
+unlikely,0,0,2.98
+unlikely,1,1,3.81
+unlikely,0,0,2.74
+unlikely,0,0,2.62
+unlikely,0,0,2.85
+somewhat likely,0,0,2.5
+somewhat likely,0,0,2.75
+unlikely,0,0,2.26
+unlikely,0,0,2.03
+somewhat likely,1,0,2.85
+unlikely,0,0,2.72
+somewhat likely,0,0,2.89
+somewhat likely,1,0,2.47
+unlikely,0,0,3.04
+unlikely,0,0,3.1
+unlikely,0,0,2.57
+unlikely,0,0,2.09
+somewhat likely,0,0,2.94
+unlikely,0,0,3.45
+unlikely,0,0,2.76
+very likely,0,1,2.96
+unlikely,0,0,2.89
+somewhat likely,0,0,2.97
+somewhat likely,0,1,3.91
+somewhat likely,0,0,2.77
+unlikely,0,0,2.51
+unlikely,0,0,3.24
+unlikely,0,0,2.44
+somewhat likely,0,0,2.78
+unlikely,0,0,2.94
+unlikely,1,0,3.22
+unlikely,0,0,3.5
+very likely,1,0,3.57
+somewhat likely,0,0,3.17
+unlikely,0,1,3.23
+unlikely,0,0,2.91
+unlikely,0,0,3.28
+unlikely,0,1,3.32
+unlikely,0,0,3.62
+unlikely,0,0,2.55
+unlikely,0,0,2.97
+very likely,1,0,3.63
+unlikely,0,1,3.02
+unlikely,0,1,3.6
+unlikely,0,0,2.95
+very likely,1,1,3.81
+somewhat likely,1,0,2.68
+unlikely,1,0,3.72
+unlikely,0,0,2.49
+unlikely,0,0,2.72
+unlikely,1,0,2.25
+unlikely,0,0,2.53
+unlikely,0,0,3.26
+very likely,0,0,2.57
+unlikely,0,0,2.9
+unlikely,0,0,3.05
+somewhat likely,0,0,2.85
+somewhat likely,0,0,4.0
+very likely,1,0,3.4
+somewhat likely,0,1,3.24
+unlikely,0,1,3.51
+very likely,0,0,2.55
+unlikely,0,0,3.06
+very likely,0,0,3.0
+somewhat likely,0,0,3.13
+unlikely,0,0,3.16
+somewhat likely,0,0,2.47
+somewhat likely,0,0,2.89
+unlikely,0,0,2.74
+unlikely,0,0,3.16
+unlikely,0,0,2.92
+somewhat likely,0,0,3.57
+somewhat likely,1,0,3.49
+somewhat likely,0,1,2.56
+somewhat likely,1,0,3.12
+somewhat likely,1,0,2.65
+somewhat likely,0,0,2.51
+somewhat likely,0,0,3.08
+somewhat likely,0,0,2.65
+unlikely,0,0,2.99
+unlikely,0,0,2.63
+somewhat likely,0,0,2.8
+somewhat likely,0,1,3.46
+unlikely,0,0,3.61
+unlikely,0,0,3.08
+unlikely,1,0,3.28
+unlikely,0,1,3.31
+unlikely,0,0,2.57
+somewhat likely,0,0,3.39
+unlikely,0,0,2.26
+unlikely,0,0,2.42
+unlikely,0,0,2.6
+somewhat likely,1,1,3.44
+somewhat likely,0,0,2.98
+somewhat likely,1,0,3.09
+unlikely,0,1,3.34
+unlikely,0,0,2.84
+unlikely,0,0,2.81
+unlikely,0,0,2.79
+unlikely,0,0,2.4
+unlikely,0,0,3.02
+somewhat likely,0,0,2.65
+somewhat likely,0,0,2.55
+somewhat likely,1,0,3.12
+unlikely,0,0,2.98
+very likely,1,0,3.61
+unlikely,0,0,2.98
+unlikely,0,0,3.19
+unlikely,0,0,3.51
+unlikely,0,0,3.1
+unlikely,0,0,3.55
+unlikely,0,0,2.98
+very likely,0,0,2.99
+unlikely,0,0,3.05
+unlikely,0,0,2.99
+unlikely,0,0,2.32
+unlikely,0,0,2.5
+unlikely,0,1,2.9
+very likely,1,1,3.28
+somewhat likely,0,0,2.95
+somewhat likely,0,0,3.54
+somewhat likely,0,0,3.11
+unlikely,1,0,3.25
+unlikely,0,0,2.44
+unlikely,0,0,2.13
+somewhat likely,0,0,3.22
+somewhat likely,0,0,3.16
+unlikely,0,0,3.39
+somewhat likely,0,0,2.7
+somewhat likely,0,0,3.09
+somewhat likely,0,0,3.16
+unlikely,0,0,2.28
+unlikely,0,0,2.91
+unlikely,0,0,3.65
+unlikely,0,0,2.86
+unlikely,0,1,3.39
+unlikely,0,0,3.71
+unlikely,0,0,3.25
+unlikely,0,0,3.14
+unlikely,0,0,2.41
+somewhat likely,0,0,3.08
+unlikely,0,1,3.02
+somewhat likely,0,0,3.15
+very likely,0,0,2.95
+unlikely,0,0,2.22
+somewhat likely,0,0,2.86
+somewhat likely,1,0,2.88
+very likely,0,0,2.62
+unlikely,0,0,3.37
+somewhat likely,0,0,3.51
+somewhat likely,0,0,3.65
+very likely,1,0,3.42
+very likely,0,0,2.41
+very likely,0,1,3.21
+unlikely,0,0,3.22
+unlikely,0,0,2.53
+somewhat likely,0,0,2.64
+unlikely,0,0,2.94
+somewhat likely,0,0,2.56
+unlikely,0,1,3.12
+somewhat likely,0,0,3.34
+unlikely,0,0,3.22
+somewhat likely,0,0,3.05
+somewhat likely,0,0,3.29
+somewhat likely,0,0,2.71
+unlikely,0,0,2.87
+unlikely,0,0,3.29
+unlikely,0,0,3.36
+unlikely,1,1,2.85
+unlikely,0,0,2.79
+somewhat likely,0,0,3.69
+very likely,0,0,3.56
+unlikely,0,0,3.52
+unlikely,1,1,3.38
+very likely,0,1,3.11
+somewhat likely,1,0,3.2
+unlikely,0,0,2.83
+unlikely,0,0,3.08
+very likely,0,1,2.97
+unlikely,0,0,2.64
+unlikely,0,0,2.47
+very likely,0,0,3.07
+somewhat likely,0,0,3.2
+unlikely,0,0,2.55
+unlikely,0,0,2.48
+unlikely,0,0,3.29
+somewhat likely,0,0,2.64
+somewhat likely,0,0,3.22
+somewhat likely,1,0,2.8
+somewhat likely,0,0,3.62
+unlikely,0,0,2.62
+somewhat likely,0,1,3.04
+unlikely,0,1,2.49
+unlikely,0,1,3.1
+very likely,1,0,3.15
+somewhat likely,0,0,2.65
+unlikely,0,0,3.04
+unlikely,0,1,3.05
+unlikely,0,0,2.88
+somewhat likely,0,0,2.86
+somewhat likely,0,0,3.0
+unlikely,0,0,2.23
+unlikely,0,1,3.14
+unlikely,0,0,2.67
+unlikely,0,0,3.11
+somewhat likely,0,0,3.73
+somewhat likely,0,0,2.45
+unlikely,0,0,3.04
+unlikely,0,0,2.4
+somewhat likely,0,0,3.43
+somewhat likely,0,0,2.57
+unlikely,0,0,2.84
+unlikely,0,0,2.67
+very likely,1,1,3.45
+somewhat likely,1,0,2.88
+somewhat likely,0,0,2.78
+unlikely,0,0,3.22
+unlikely,1,0,3.3
+somewhat likely,1,0,2.86
+somewhat likely,0,0,2.83
+somewhat likely,0,0,3.7
+somewhat likely,0,0,3.15
+somewhat likely,1,1,3.08
+unlikely,0,0,2.92
+unlikely,0,0,2.89
+very likely,1,0,3.53
+unlikely,0,0,3.1
+somewhat likely,1,0,3.36
+somewhat likely,0,0,2.74
+somewhat likely,0,0,2.73
+unlikely,1,0,2.72
+unlikely,0,0,3.53
+unlikely,1,0,3.45
+unlikely,0,0,2.98
+somewhat likely,0,0,2.88
+very likely,1,0,3.21
+unlikely,0,0,3.06
+unlikely,0,0,2.4
+somewhat likely,0,0,3.57
+unlikely,1,0,2.88
+unlikely,0,0,3.27
+unlikely,0,0,2.92
+very likely,0,0,2.7
+somewhat likely,1,0,2.53
+somewhat likely,0,0,3.38
+unlikely,0,0,3.16
+somewhat likely,0,0,2.6
+unlikely,0,0,2.76
+unlikely,0,0,3.56
+unlikely,0,0,2.87
+somewhat likely,1,0,2.99
+unlikely,0,0,2.68
+unlikely,0,0,2.99
+somewhat likely,0,0,2.96
+unlikely,1,1,2.77
+somewhat likely,0,0,3.28
+unlikely,0,1,2.74
+unlikely,0,0,1.9
+unlikely,0,0,3.05
+somewhat likely,0,1,2.44
+somewhat likely,0,0,3.33
+somewhat likely,0,1,3.56
+unlikely,0,0,2.15
+unlikely,0,0,2.83
+somewhat likely,0,0,2.79
+somewhat likely,0,0,2.72
+very likely,1,0,3.04
+unlikely,0,1,3.03
+unlikely,0,0,2.88
+somewhat likely,0,0,3.21
+unlikely,0,0,2.74
+unlikely,0,0,3.57
+somewhat likely,0,0,2.43
+unlikely,0,0,3.11
+unlikely,0,0,2.76
+somewhat likely,0,0,3.13
+unlikely,0,0,2.75
+unlikely,0,0,2.6
+unlikely,0,0,3.67
+unlikely,0,0,3.26
+unlikely,0,0,3.41
+very likely,0,0,3.24
+unlikely,0,0,3.05
+somewhat likely,1,0,2.38
+somewhat likely,0,0,2.61
+somewhat likely,1,1,3.18
+unlikely,0,0,3.34
+somewhat likely,0,0,2.84
+unlikely,0,0,2.8
+unlikely,0,0,3.21
+unlikely,0,0,2.63
+unlikely,0,0,2.31
+unlikely,0,0,2.29
+unlikely,0,0,3.51
+somewhat likely,0,0,2.87
+unlikely,0,0,3.09
+somewhat likely,1,0,2.88
+unlikely,1,0,2.47
+somewhat likely,0,1,3.72
+unlikely,0,0,3.2
+somewhat likely,0,0,2.57
+unlikely,0,0,3.05
+unlikely,0,0,3.27
+unlikely,0,0,3.3
+unlikely,0,0,2.73
+unlikely,1,0,2.63
+unlikely,0,0,2.77
+unlikely,0,0,3.15
+unlikely,0,0,2.85
+unlikely,0,0,2.25
+somewhat likely,0,0,3.56
+unlikely,0,0,2.7
+very likely,0,0,2.94
+somewhat likely,1,0,2.58
+somewhat likely,0,0,2.97
+very likely,0,0,3.38
+very likely,0,0,2.96
+unlikely,0,0,2.44
+somewhat likely,0,1,3.75
+somewhat likely,0,0,3.07
+somewhat likely,0,0,2.72
+unlikely,0,0,3.13
+unlikely,0,0,3.39
+unlikely,0,1,2.38
+somewhat likely,0,0,2.87
+unlikely,0,1,2.91
+very likely,1,0,3.25
+somewhat likely,1,1,3.51
+unlikely,1,0,3.65
+somewhat likely,0,0,3.11
+very likely,0,0,2.71
+unlikely,0,0,3.05
+unlikely,0,0,2.93
+somewhat likely,0,0,2.35
+somewhat likely,0,0,2.49
+unlikely,0,0,3.26
+unlikely,0,0,3.77
+unlikely,0,1,3.49
+unlikely,0,0,3.39
+unlikely,0,0,3.37
+unlikely,0,0,1.98
+unlikely,1,0,2.93
+unlikely,0,0,3.36
+unlikely,0,0,3.12
+somewhat likely,0,0,3.23
+somewhat likely,0,1,3.9
+unlikely,0,0,2.7
+unlikely,0,0,2.34
+unlikely,0,0,3.3
+somewhat likely,0,0,3.11
+unlikely,0,0,3.14
+somewhat likely,1,0,3.68
+somewhat likely,0,0,2.22
+unlikely,0,0,2.61
+somewhat likely,0,1,3.48
+very likely,0,0,3.08
+unlikely,0,0,2.79
+unlikely,0,0,3.12
+unlikely,0,1,2.67
+somewhat likely,0,0,3.54
+very likely,0,0,2.98
+somewhat likely,1,0,3.32
+somewhat likely,1,0,3.54
+unlikely,0,0,3.7
+unlikely,0,0,2.63
+somewhat likely,0,0,2.25
+somewhat likely,0,0,3.26
+very likely,0,0,3.52
diff --git a/statsmodels/miscmodels/tests/results/results_ordinal_method.py b/statsmodels/miscmodels/tests/results/results_ordinal_method.py
new file mode 100644
index 00000000000..25497b0c2d8
--- /dev/null
+++ b/statsmodels/miscmodels/tests/results/results_ordinal_method.py
@@ -0,0 +1,101 @@
+"""
+Test Results for ordinal models from R MASS lib
+"""
+
+import numpy as np
+import os
+import pandas as pd
+from statsmodels.tools.testing import Holder
+
+# R (v3.4.4) code inspired from https://stats.idre.ucla.edu/r/dae/ordinal-logistic-regression/
+# library(readr) # to open the file
+# library(MASS) # to perform ordinal regression
+#
+# ## load the data, 400 rows with 3 exogs(2 binaries, 1 float) and target 3-ordinal variable
+# ologit_ucla <- read_csv("ologit_ucla.csv")
+# ologit_ucla$apply <- as.factor(ologit_ucla$apply)
+# ologit_ucla$apply <- factor(ologit_ucla$apply, levels=c("unlikely", "somewhat likely", "very likely"))
+#
+# ## fit ordered logit model
+# r_logit <- polr(apply ~ pared + public + gpa,
+#          data = ologit_ucla,
+#          method = 'logit', # or 'probit'
+#          Hess=TRUE)
+#
+# ## fit ordered probit model
+# r_probit <- polr(apply ~ pared + public + gpa,
+#      data = ologit_ucla,
+#      method = 'probit',
+#      Hess=TRUE)
+#
+# ## fit ordered cloglog model
+# r_cloglog <- polr(apply ~ pared + public + gpa,
+#          data = ologit_ucla,
+#          method = 'cloglog',
+#          Hess=TRUE)
+#
+# ## with r = r_logit or r_probit or r_cloglog
+# ## we add p-values
+# (ctable <- coef(summary(r)))
+# p <- pnorm(abs(ctable[, "t value"]), lower.tail = FALSE) * 2
+# (ctable <- cbind(ctable, "p value" = p))
+# ## show 7 first predictions
+# head(predict(r, subset(ologit_ucla, select=c("pared", "public","gpa")), type='prob'),7)
+
+data_store = Holder()
+cur_dir = os.path.dirname(os.path.abspath(__file__))
+df = pd.read_csv(os.path.join(cur_dir, "ologit_ucla.csv"))
+# df_unordered['apply'] is pd.Categorical with ordered = False
+df_unordered = df.copy()
+df_unordered['apply'] = pd.Categorical(df['apply'], ordered=False)
+# but categories are set in order
+df_unordered['apply'].cat.set_categories(['unlikely', 'somewhat likely', 'very likely'], inplace=True)
+# df['apply'] is pd.Categorical with ordered = True
+df['apply'] = pd.Categorical(df['apply'], ordered=True)
+df['apply'].cat.set_categories(['unlikely', 'somewhat likely', 'very likely'], inplace=True)
+data_store.df_unordered = df_unordered
+data_store.df = df
+data_store.nobs = 400
+data_store.n_ordinal_cat = 3
+
+res_ord_logit = Holder()
+res_ord_logit.coefficients_val = np.array([1.04769011, -0.05878572, 0.61594057])
+res_ord_logit.coefficients_stdE = np.array([0.2658, 0.2979, 0.2606])
+res_ord_logit.coefficients_tval = np.array([3.9418, -0.1974, 2.3632])
+res_ord_logit.coefficients_pval = np.array([8.087070e-05, 8.435464e-01, 1.811594e-02])
+res_ord_logit.thresholds = np.array([2.203915, 4.299363])
+res_ord_logit.prob_pred = np.array([[0.5488310, 0.3593310, 0.09183798],
+                                    [0.3055632, 0.4759496, 0.21848725],
+                                    [0.2293835, 0.4781951, 0.29242138],
+                                    [0.6161224, 0.3126888, 0.07118879],
+                                    [0.6560149, 0.2833901, 0.06059505],
+                                    [0.6609240, 0.2797117, 0.05936431],
+                                    [0.6518332, 0.2865114, 0.06165547]])
+
+res_ord_probit = Holder()
+res_ord_probit.coefficients_val = np.array([0.59811, 0.01016, 0.35815])
+res_ord_probit.coefficients_stdE = np.array([0.1579, 0.1728, 0.1568])
+res_ord_probit.coefficients_tval = np.array([3.78881, 0.05878, 2.28479])
+res_ord_probit.coefficients_pval = np.array([1.513681e-04, 9.531256e-01, 2.232519e-02])
+res_ord_probit.thresholds = np.array([1.2968, 2.5028])
+res_ord_probit.prob_pred = np.array([[0.5514181, 0.3576848, 0.09089707],
+                                     [0.3260107, 0.4488799, 0.22510933],
+                                     [0.2349733, 0.4506351, 0.31439162],
+                                     [0.6142501, 0.3184778, 0.06727214],
+                                     [0.6519891, 0.2928449, 0.05516602],
+                                     [0.6402204, 0.3009945, 0.05878509],
+                                     [0.6480094, 0.2956162, 0.05637442]])
+
+res_ord_cloglog = Holder()
+res_ord_cloglog.coefficients_val = np.array([0.5166455, 0.1081131, 0.3343895])
+res_ord_cloglog.coefficients_stdE = np.array([0.1613525, 0.1680675, 0.1542065])
+res_ord_cloglog.coefficients_tval = np.array([3.2019668, 0.6432721, 2.1684534])
+res_ord_cloglog.coefficients_pval = np.array([1.364927e-03, 5.200475e-01, 3.012421e-02])
+res_ord_cloglog.thresholds = np.array([0.8705304, 1.9744660])
+res_ord_cloglog.prob_pred = np.array([[0.5519526, 0.3592524, 0.08879500],
+                                      [0.3855287, 0.3842645, 0.23020682],
+                                      [0.2899487, 0.3540202, 0.35603111],
+                                      [0.6067184, 0.3333548, 0.05992678],
+                                      [0.6411418, 0.3133969, 0.04546127],
+                                      [0.5940557, 0.3400072, 0.06593710],
+                                      [0.6374521, 0.3156622, 0.04688570]])

From 35aa720cadd95a9d803652771bf6b1899fed10c3 Mon Sep 17 00:00:00 2001
From: Bolzano-Weierstrass <thomas.p.marchand@gmail.com>
Date: Sat, 22 Aug 2020 15:45:36 +0200
Subject: [PATCH 07/24] TST: added logit & probit tests

---
 .../miscmodels/tests/test_ordinal_model.py    | 138 ++++++++++++++++++
 1 file changed, 138 insertions(+)
 create mode 100644 statsmodels/miscmodels/tests/test_ordinal_model.py

diff --git a/statsmodels/miscmodels/tests/test_ordinal_model.py b/statsmodels/miscmodels/tests/test_ordinal_model.py
new file mode 100644
index 00000000000..b4c85fdbb00
--- /dev/null
+++ b/statsmodels/miscmodels/tests/test_ordinal_model.py
@@ -0,0 +1,138 @@
+"""
+Test  for ordinal models
+"""
+
+import numpy as np
+
+from numpy.testing import assert_allclose
+from .results.results_ordinal_model import data_store as ds
+from statsmodels.miscmodels.ordinal_model import OrderedModel
+
+
+class CheckOrdinalModelMixin(object):
+
+    def test_basic(self):
+        n_cat = ds.n_ordinal_cat
+        res1 = self.res1
+        res2 = self.res2
+        # coefficients values, standard errors, t & p values
+        assert_allclose(res1.params[:-n_cat + 1], res2.coefficients_val, atol=2e-4)
+        assert_allclose(res1.bse[:-n_cat + 1], res2.coefficients_stdE, rtol=0.003, atol=1e-5)
+        assert_allclose(res1.tvalues[:-n_cat + 1], res2.coefficients_tval, rtol=0.003, atol=7e-4)
+        assert_allclose(res1.pvalues[:-n_cat + 1], res2.coefficients_pval, rtol=0.009, atol=1e-5)
+        # thresholds are given with exponentiated increments from the first threshold
+        assert_allclose(res1.model.transform_threshold_params(res1.params)[1:-1], res2.thresholds, atol=4e-4)
+
+        # probabilities
+        assert_allclose(res1.predict()[:7, :],
+                        res2.prob_pred, atol=5e-5)
+
+    def test_Pandas(self):
+        # makes sure that the Pandas ecosystem is supported
+        res1 = self.res1
+        resp = self.resp
+        # converges slightly differently why?
+        assert_allclose(res1.params, resp.params, atol=1e-10)
+        assert_allclose(res1.bse, resp.bse, atol=1e-10)
+
+        assert_allclose(res1.model.endog, resp.model.endog, rtol=1e-10)
+        assert_allclose(res1.model.exog, resp.model.exog, rtol=1e-10)
+
+    def test_formula(self):
+        res1 = self.res1
+        resf = self.resf
+        # converges slightly differently why? yet e-5 is ok
+        assert_allclose(res1.params, resf.params, atol=5e-5)
+        assert_allclose(res1.bse, resf.bse, atol=5e-5)
+
+        assert_allclose(res1.model.endog, resf.model.endog, rtol=1e-10)
+        assert_allclose(res1.model.exog, resf.model.exog, rtol=1e-10)
+
+    def test_unordered(self):
+        # makes sure that ordered = True is optional for the endog Serie
+        # et categories have to be set in the right order
+        res1 = self.res1
+        resf = self.resu
+        # converges slightly differently why?
+        assert_allclose(res1.params, resf.params, atol=1e-10)
+        assert_allclose(res1.bse, resf.bse, atol=1e-10)
+
+        assert_allclose(res1.model.endog, resf.model.endog, rtol=1e-10)
+        assert_allclose(res1.model.exog, resf.model.exog, rtol=1e-10)
+
+
+class TestLogitModel(CheckOrdinalModelMixin):
+
+    @classmethod
+    def setup_class(cls):
+        data = ds.df
+        data_unordered = ds.df_unordered
+
+        # standard fit
+        mod = OrderedModel(data['apply'].values.codes,
+                           np.asarray(data[['pared', 'public', 'gpa']], float),
+                           distr='logit')
+        res = mod.fit(method='bfgs', disp=False)
+        # standard fit with pandas input
+        modp = OrderedModel(data['apply'],
+                            data[['pared', 'public', 'gpa']],
+                            distr='logit')
+        resp = modp.fit(method='bfgs', disp=False)
+        # fit with formula
+        modf = OrderedModel.from_formula("apply ~ pared + public + gpa - 1",
+                                         data={"apply": data['apply'].values.codes,
+                                               "pared": data['pared'],
+                                               "public": data['public'],
+                                               "gpa": data['gpa']},
+                                         distr='logit')
+        resf = modf.fit(method='bfgs', disp=False)
+        # fit on data with ordered=False
+        modu = OrderedModel(data_unordered['apply'].values.codes,
+                            np.asarray(data_unordered[['pared', 'public', 'gpa']], float),
+                            distr='logit')
+        resu = modu.fit(method='bfgs', disp=False)
+
+        from .results.results_ordinal_model import res_ord_logit as res2
+        cls.res2 = res2
+        cls.res1 = res
+        cls.resp = resp
+        cls.resf = resf
+        cls.resu = resu
+
+
+class TestProbitModel(CheckOrdinalModelMixin):
+
+    @classmethod
+    def setup_class(cls):
+        data = ds.df
+        data_unordered = ds.df_unordered
+
+        mod = OrderedModel(data['apply'].values.codes,
+                           np.asarray(data[['pared', 'public', 'gpa']], float),
+                           distr='probit')
+        res = mod.fit(method='bfgs', disp=False)
+
+        modp = OrderedModel(data['apply'],
+                            data[['pared', 'public', 'gpa']],
+                            distr='probit')
+        resp = modp.fit(method='bfgs', disp=False)
+
+        modf = OrderedModel.from_formula("apply ~ pared + public + gpa - 1",
+                                         data={"apply": data['apply'].values.codes,
+                                               "pared": data['pared'],
+                                               "public": data['public'],
+                                               "gpa": data['gpa']},
+                                         distr='probit')
+        resf = modf.fit(method='bfgs', disp=False)
+
+        modu = OrderedModel(data_unordered['apply'].values.codes,
+                            np.asarray(data_unordered[['pared', 'public', 'gpa']], float),
+                            distr='probit')
+        resu = modu.fit(method='bfgs', disp=False)
+
+        from .results.results_ordinal_model import res_ord_probit as res2
+        cls.res2 = res2
+        cls.res1 = res
+        cls.resp = resp
+        cls.resf = resf
+        cls.resu = resu

From 4b2034177e18254383a3b080731863f68374d44b Mon Sep 17 00:00:00 2001
From: Bolzano-Weierstrass <thomas.p.marchand@gmail.com>
Date: Sat, 22 Aug 2020 15:49:43 +0200
Subject: [PATCH 08/24] TST: added custom distribution tests (with cLogLog)

---
 ...nal_method.py => results_ordinal_model.py} |  3 ++
 .../miscmodels/tests/test_ordinal_model.py    | 48 +++++++++++++++++++
 2 files changed, 51 insertions(+)
 rename statsmodels/miscmodels/tests/results/{results_ordinal_method.py => results_ordinal_model.py} (99%)

diff --git a/statsmodels/miscmodels/tests/results/results_ordinal_method.py b/statsmodels/miscmodels/tests/results/results_ordinal_model.py
similarity index 99%
rename from statsmodels/miscmodels/tests/results/results_ordinal_method.py
rename to statsmodels/miscmodels/tests/results/results_ordinal_model.py
index 25497b0c2d8..ab6bb277800 100644
--- a/statsmodels/miscmodels/tests/results/results_ordinal_method.py
+++ b/statsmodels/miscmodels/tests/results/results_ordinal_model.py
@@ -45,14 +45,17 @@
 data_store = Holder()
 cur_dir = os.path.dirname(os.path.abspath(__file__))
 df = pd.read_csv(os.path.join(cur_dir, "ologit_ucla.csv"))
+
 # df_unordered['apply'] is pd.Categorical with ordered = False
 df_unordered = df.copy()
 df_unordered['apply'] = pd.Categorical(df['apply'], ordered=False)
 # but categories are set in order
 df_unordered['apply'].cat.set_categories(['unlikely', 'somewhat likely', 'very likely'], inplace=True)
+
 # df['apply'] is pd.Categorical with ordered = True
 df['apply'] = pd.Categorical(df['apply'], ordered=True)
 df['apply'].cat.set_categories(['unlikely', 'somewhat likely', 'very likely'], inplace=True)
+
 data_store.df_unordered = df_unordered
 data_store.df = df
 data_store.nobs = 400
diff --git a/statsmodels/miscmodels/tests/test_ordinal_model.py b/statsmodels/miscmodels/tests/test_ordinal_model.py
index b4c85fdbb00..e7e7efd3967 100644
--- a/statsmodels/miscmodels/tests/test_ordinal_model.py
+++ b/statsmodels/miscmodels/tests/test_ordinal_model.py
@@ -3,6 +3,7 @@
 """
 
 import numpy as np
+import scipy.stats as stats
 
 from numpy.testing import assert_allclose
 from .results.results_ordinal_model import data_store as ds
@@ -136,3 +137,50 @@ def setup_class(cls):
         cls.resp = resp
         cls.resf = resf
         cls.resu = resu
+
+class TestCLogLogModel(CheckOrdinalModelMixin):
+
+    @classmethod
+    def setup_class(cls):
+        data = ds.df
+        data_unordered = ds.df_unordered
+
+        # a Scipy distribution defined minimally
+        class CLogLog(stats.rv_continuous):
+            def _ppf(self, q):
+                return np.log(-np.log(1 - q))
+
+            def _cdf(self, x):
+                return 1 - np.exp(-np.exp(x))
+
+        cloglog = CLogLog()
+
+        mod = OrderedModel(data['apply'].values.codes,
+                           np.asarray(data[['pared', 'public', 'gpa']], float),
+                           distr=cloglog)
+        res = mod.fit(method='bfgs', disp=False)
+
+        modp = OrderedModel(data['apply'],
+                            data[['pared', 'public', 'gpa']],
+                            distr=cloglog)
+        resp = modp.fit(method='bfgs', disp=False)
+
+        modf = OrderedModel.from_formula("apply ~ pared + public + gpa - 1",
+                                         data={"apply": data['apply'].values.codes,
+                                               "pared": data['pared'],
+                                               "public": data['public'],
+                                               "gpa": data['gpa']},
+                                         distr=cloglog)
+        resf = modf.fit(method='bfgs', disp=False)
+
+        modu = OrderedModel(data_unordered['apply'].values.codes,
+                            np.asarray(data_unordered[['pared', 'public', 'gpa']], float),
+                            distr=cloglog)
+        resu = modu.fit(method='bfgs', disp=False)
+
+        from .results.results_ordinal_model import res_ord_cloglog as res2
+        cls.res2 = res2
+        cls.res1 = res
+        cls.resp = resp
+        cls.resf = resf
+        cls.resu = resu

From 23edfd8cf813898205c8bd62e705f9a62b00b71e Mon Sep 17 00:00:00 2001
From: Bolzano-Weierstrass <thomas.p.marchand@gmail.com>
Date: Sat, 22 Aug 2020 16:03:11 +0200
Subject: [PATCH 09/24] ENH: add custom distribution ex with Pandas inputs

---
 statsmodels/examples/ex_ordered_model.py | 26 +++++++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)

diff --git a/statsmodels/examples/ex_ordered_model.py b/statsmodels/examples/ex_ordered_model.py
index b6afcec34d9..3edd270eabe 100644
--- a/statsmodels/examples/ex_ordered_model.py
+++ b/statsmodels/examples/ex_ordered_model.py
@@ -8,6 +8,7 @@
 
 import numpy as np
 from scipy import stats
+import pandas
 
 from statsmodels.miscmodels.ordinal_model import OrderedModel
 
@@ -48,12 +49,11 @@
 res_log = OrderedModel(y, x, distr='logit').fit(method='bfgs')
 pred_choice_log = res_log.predict().argmax(1)
 print((y == pred_choice_log).mean())
-
+print(res_log.summary())
 
 # example form UCLA Stats pages http://www.ats.ucla.edu/stat/stata/dae/ologit.htm
 # requires downloaded dataset ologit.dta
 
-import pandas
 dataf = pandas.read_stata(r"M:\josef_new\scripts\ologit_ucla.dta")
 
 # this works but sorts category levels alphabetically
@@ -69,4 +69,24 @@
 print(res_log3.summary())
 
 # with ordered probit - not on UCLA page
-print(OrderedModel(dataf['apply'].values.codes, np.asarray(dataf[['pared', 'public', 'gpa']], float), distr='probit').fit(method='bfgs').summary())
+print(
+    OrderedModel(dataf['apply'].values.codes, np.asarray(dataf[['pared', 'public', 'gpa']], float), distr='probit').fit(
+        method='bfgs').summary())
+
+
+# example with a custom distribution - not on UCLA page
+# definition of the SciPy dist
+class CLogLog(stats.rv_continuous):
+    def _ppf(self, q):
+        return np.log(-np.log(1 - q))
+
+    def _cdf(self, x):
+        return 1 - np.exp(-np.exp(x))
+
+
+cloglog = CLogLog()
+
+res_cloglog = OrderedModel(dataf['apply'],
+                           dataf[['pared', 'public', 'gpa']],
+                           distr=cloglog).fit(method='bfgs', disp=False)
+print(res_cloglog.summary())

From b88f2be5f513c3d31f65e3a3139b85fa3bf2704a Mon Sep 17 00:00:00 2001
From: Bolzano-Weierstrass <thomas.p.marchand@gmail.com>
Date: Sat, 22 Aug 2020 16:44:36 +0200
Subject: [PATCH 10/24] ENH: small typos

---
 statsmodels/miscmodels/tests/test_ordinal_model.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/statsmodels/miscmodels/tests/test_ordinal_model.py b/statsmodels/miscmodels/tests/test_ordinal_model.py
index e7e7efd3967..8fbba09973f 100644
--- a/statsmodels/miscmodels/tests/test_ordinal_model.py
+++ b/statsmodels/miscmodels/tests/test_ordinal_model.py
@@ -13,6 +13,7 @@
 class CheckOrdinalModelMixin(object):
 
     def test_basic(self):
+        # checks basic results againt R MASS package
         n_cat = ds.n_ordinal_cat
         res1 = self.res1
         res2 = self.res2
@@ -28,7 +29,7 @@ def test_basic(self):
         assert_allclose(res1.predict()[:7, :],
                         res2.prob_pred, atol=5e-5)
 
-    def test_Pandas(self):
+    def test_pandas(self):
         # makes sure that the Pandas ecosystem is supported
         res1 = self.res1
         resp = self.resp
@@ -40,6 +41,7 @@ def test_Pandas(self):
         assert_allclose(res1.model.exog, resp.model.exog, rtol=1e-10)
 
     def test_formula(self):
+        # makes sure the "R-way" of writing models is supported
         res1 = self.res1
         resf = self.resf
         # converges slightly differently why? yet e-5 is ok
@@ -138,6 +140,7 @@ def setup_class(cls):
         cls.resf = resf
         cls.resu = resu
 
+
 class TestCLogLogModel(CheckOrdinalModelMixin):
 
     @classmethod

From cb25187b840a5c51fedb1327467b4595a6212b96 Mon Sep 17 00:00:00 2001
From: Bolzano-Weierstrass <thomas.p.marchand@gmail.com>
Date: Sun, 23 Aug 2020 14:25:58 +0200
Subject: [PATCH 11/24] STY: flake8 friendly

---
 statsmodels/examples/ex_ordered_model.py      | 21 +++--
 statsmodels/miscmodels/ordinal_model.py       | 54 +++++++------
 .../tests/results/results_ordinal_model.py    | 30 ++++---
 .../miscmodels/tests/test_ordinal_model.py    | 79 +++++++++++--------
 4 files changed, 111 insertions(+), 73 deletions(-)

diff --git a/statsmodels/examples/ex_ordered_model.py b/statsmodels/examples/ex_ordered_model.py
index 3edd270eabe..b06c3c569ae 100644
--- a/statsmodels/examples/ex_ordered_model.py
+++ b/statsmodels/examples/ex_ordered_model.py
@@ -14,7 +14,8 @@
 
 nobs, k_vars = 1000, 3
 x = np.random.randn(nobs, k_vars)
-#x = np.column_stack((np.ones(nobs), x))  #constant will be in integration limits
+# x = np.column_stack((np.ones(nobs), x))
+# #constant will be in integration limits
 xb = x.dot(np.ones(k_vars))
 y_latent = xb + np.random.randn(nobs)
 y = np.round(np.clip(y_latent, -2.4, 2.4)).astype(int) + 2
@@ -26,16 +27,17 @@
 start_params = np.ones(k_vars + 4)
 start_params = np.concatenate((np.ones(k_vars), np.arange(4)))
 start_ppf = stats.norm.ppf((np.bincount(y) / len(y)).cumsum())
-start_threshold = np.concatenate((start_ppf[:1], np.log(np.diff(start_ppf[:-1]))))
+start_threshold = np.concatenate((start_ppf[:1],
+                                  np.log(np.diff(start_ppf[:-1]))))
 start_params = np.concatenate((np.zeros(k_vars), start_threshold))
 res = mod.fit(start_params=start_params, maxiter=5000, maxfun=5000)
 print(res.params)
-#res = mod.fit(start_params=res.params, method='bfgs')
+# res = mod.fit(start_params=res.params, method='bfgs')
 res = mod.fit(start_params=start_params, method='bfgs')
 
 print(res.params)
 print(np.exp(res.params[-(mod.k_levels - 1):]).cumsum())
-#print(res.summary())
+# print(res.summary())
 
 predicted = res.model.predict(res.params)
 pred_choice = predicted.argmax(1)
@@ -51,7 +53,8 @@
 print((y == pred_choice_log).mean())
 print(res_log.summary())
 
-# example form UCLA Stats pages http://www.ats.ucla.edu/stat/stata/dae/ologit.htm
+# example form UCLA Stats pages
+# http://www.ats.ucla.edu/stat/stata/dae/ologit.htm
 # requires downloaded dataset ologit.dta
 
 dataf = pandas.read_stata(r"M:\josef_new\scripts\ologit_ucla.dta")
@@ -61,7 +64,8 @@
                         np.asarray(dataf[['pared', 'public', 'gpa']], float),
                         distr='logit').fit(method='bfgs')
 
-# this replicates the UCLA example except for different parameterization of par2
+# this replicates the UCLA example except
+# for different parameterization of par2
 res_log3 = OrderedModel(dataf['apply'].values.codes,
                         np.asarray(dataf[['pared', 'public', 'gpa']], float),
                         distr='logit').fit(method='bfgs')
@@ -70,8 +74,9 @@
 
 # with ordered probit - not on UCLA page
 print(
-    OrderedModel(dataf['apply'].values.codes, np.asarray(dataf[['pared', 'public', 'gpa']], float), distr='probit').fit(
-        method='bfgs').summary())
+    OrderedModel(dataf['apply'].values.codes,
+                 np.asarray(dataf[['pared', 'public', 'gpa']], float),
+                 distr='probit').fit(method='bfgs').summary())
 
 
 # example with a custom distribution - not on UCLA page
diff --git a/statsmodels/miscmodels/ordinal_model.py b/statsmodels/miscmodels/ordinal_model.py
index 39aa240e1fb..679a60e53a1 100644
--- a/statsmodels/miscmodels/ordinal_model.py
+++ b/statsmodels/miscmodels/ordinal_model.py
@@ -10,9 +10,11 @@
 import pandas as pd
 from pandas.api.types import CategoricalDtype
 from scipy import stats
-from statsmodels.base.model import GenericLikelihoodModel, GenericLikelihoodModelResults
+from statsmodels.base.model import GenericLikelihoodModel, \
+    GenericLikelihoodModelResults
 from statsmodels.compat.pandas import Appender
 
+
 class OrderedModel(GenericLikelihoodModel):
     """Ordinal Model based on logistic or normal distribution
 
@@ -91,8 +93,9 @@ def __init__(self, endog, exog, distr='probit', **kwds):
 
     def _check_inputs(self, endog, exog):
         """
-        checks if self.distrib is legal and does the Pandas support for endog and exog.
-        Also retrieves columns & categories names for .summary() of the results class.
+        checks if self.distrib is legal and does the Pandas
+        support for endog and exog. Also retrieves columns & categories
+        names for .summary() of the results class.
         """
         names = {}
         if not isinstance(self.distr, stats.rv_continuous):
@@ -103,7 +106,8 @@ def _check_inputs(self, endog, exog):
 
         # Pandas' support
         if (isinstance(exog, pd.DataFrame)) or (isinstance(exog, pd.Series)):
-            exog_name = (exog.name if isinstance(exog, pd.Series) else exog.columns.tolist())
+            exog_name = (exog.name if isinstance(exog, pd.Series)
+                         else exog.columns.tolist())
             names['xname'] = exog_name
             exog = np.asarray(exog)
 
@@ -111,24 +115,26 @@ def _check_inputs(self, endog, exog):
             if isinstance(endog.dtypes, CategoricalDtype):
                 if not endog.dtype.ordered:
                     import warnings
-                    warnings.warn("the endog has ordered == False, risk of capturing a wrong order"
-                                  " for the categories. ordered == True preferred.", Warning)
+                    warnings.warn("the endog has ordered == False, "
+                                  "risk of capturing a wrong order for the "
+                                  "categories. ordered == True preferred.",
+                                  Warning)
                 endog_name = endog.name
-                thresholds_name = [str(x) + '/' + str(y)
-                                   for x, y in zip(endog.values.categories[:-1], endog.values.categories[1:])]
+                threshold_name = [str(x) + '/' + str(y)
+                                  for x, y in zip(endog.values.categories[:-1],
+                                                  endog.values.categories[1:])]
                 names['yname'] = endog_name
-                names['xname'] = names['xname'] + thresholds_name
+                names['xname'] = names['xname'] + threshold_name
                 endog = np.asarray(endog.values.codes)
             else:
                 msg = (
-                    f"If the endog is a pandas.Serie it must be of categoricalDtype."
+                    "If the endog is a pandas.Serie "
+                    "it must be of categoricalDtype."
                 )
                 raise ValueError(msg)
 
         return names, endog, exog
 
-
-
     def cdf(self, x):
         """cdf evaluated at x
         """
@@ -158,7 +164,8 @@ def transform_threshold_params(self, params):
 
         """
         th_params = params[-(self.k_levels - 1):]
-        thresh = np.concatenate((th_params[:1], np.exp(th_params[1:]))).cumsum()
+        thresh = np.concatenate((th_params[:1],
+                                 np.exp(th_params[1:]))).cumsum()
         thresh = np.concatenate(([-np.inf], thresh, [np.inf]))
         return thresh
 
@@ -168,37 +175,39 @@ def transform_reverse_threshold_params(self, params):
 
         """
         start_ppf = params
-        thresh_params = np.concatenate((start_ppf[:1], np.log(np.diff(start_ppf[:-1]))))
+        thresh_params = np.concatenate((start_ppf[:1],
+                                        np.log(np.diff(start_ppf[:-1]))))
         return thresh_params
 
-
     def predict(self, params, exog=None):
         """predicted probabilities for each level of the ordinal endog.
 
 
         """
-        #structure of params = [beta, constants_or_thresholds]
+        # structure of params = [beta, constants_or_thresholds]
 
         # explicit in several steps to avoid bugs
         th_params = params[-(self.k_levels - 1):]
-        thresh = np.concatenate((th_params[:1], np.exp(th_params[1:]))).cumsum()
+        thresh = np.concatenate((th_params[:1],
+                                 np.exp(th_params[1:]))).cumsum()
         thresh = np.concatenate(([-np.inf], thresh, [np.inf]))
-        xb = self.exog.dot(params[:-(self.k_levels - 1)])[:,None]
+        xb = self.exog.dot(params[:-(self.k_levels - 1)])[:, None]
         low = thresh[:-1] - xb
         upp = thresh[1:] - xb
         prob = self.prob(low, upp)
         return prob
 
-
     def loglike(self, params):
 
-        #structure of params = [beta, constants_or_thresholds]
+        # structure of params = [beta, constants_or_thresholds]
 
-        thresh = np.concatenate(([-np.inf], params[-(self.k_levels - 1):], [np.inf]))
+        thresh = np.concatenate(([-np.inf],
+                                 params[-(self.k_levels - 1):], [np.inf]))
 
         # explicit in several steps to avoid bugs
         th_params = params[-(self.k_levels - 1):]
-        thresh = np.concatenate((th_params[:1], np.exp(th_params[1:]))).cumsum()
+        thresh = np.concatenate((th_params[:1],
+                                 np.exp(th_params[1:]))).cumsum()
         thresh = np.concatenate(([-np.inf], thresh, [np.inf]))
         thresh_i_low = thresh[self.endog]
         thresh_i_upp = thresh[self.endog + 1]
@@ -231,6 +240,7 @@ def fit(self, start_params=None, method='nm', maxiter=500, full_output=1,
 
         return ordmlefit
 
+
 class OrderedResults(GenericLikelihoodModelResults):
     @Appender(GenericLikelihoodModelResults.summary.__doc__)
     def summary(self, yname=None, xname=None, title=None, alpha=.05, **kwargs):
diff --git a/statsmodels/miscmodels/tests/results/results_ordinal_model.py b/statsmodels/miscmodels/tests/results/results_ordinal_model.py
index ab6bb277800..d4fb122796c 100644
--- a/statsmodels/miscmodels/tests/results/results_ordinal_model.py
+++ b/statsmodels/miscmodels/tests/results/results_ordinal_model.py
@@ -7,14 +7,17 @@
 import pandas as pd
 from statsmodels.tools.testing import Holder
 
-# R (v3.4.4) code inspired from https://stats.idre.ucla.edu/r/dae/ordinal-logistic-regression/
+# R (v3.4.4) code inspired from
+# https://stats.idre.ucla.edu/r/dae/ordinal-logistic-regression/
 # library(readr) # to open the file
 # library(MASS) # to perform ordinal regression
 #
-# ## load the data, 400 rows with 3 exogs(2 binaries, 1 float) and target 3-ordinal variable
+# ## load the data, 400 rows with 3 exogs(2 binaries, 1 float)
+# ##and target 3-ordinal variable
 # ologit_ucla <- read_csv("ologit_ucla.csv")
 # ologit_ucla$apply <- as.factor(ologit_ucla$apply)
-# ologit_ucla$apply <- factor(ologit_ucla$apply, levels=c("unlikely", "somewhat likely", "very likely"))
+# ologit_ucla$apply <- factor(ologit_ucla$apply,
+#               levels=c("unlikely", "somewhat likely", "very likely"))
 #
 # ## fit ordered logit model
 # r_logit <- polr(apply ~ pared + public + gpa,
@@ -40,7 +43,8 @@
 # p <- pnorm(abs(ctable[, "t value"]), lower.tail = FALSE) * 2
 # (ctable <- cbind(ctable, "p value" = p))
 # ## show 7 first predictions
-# head(predict(r, subset(ologit_ucla, select=c("pared", "public","gpa")), type='prob'),7)
+# head(predict(r, subset(ologit_ucla,
+#                        select=c("pared", "public","gpa")), type='prob'),7)
 
 data_store = Holder()
 cur_dir = os.path.dirname(os.path.abspath(__file__))
@@ -50,11 +54,13 @@
 df_unordered = df.copy()
 df_unordered['apply'] = pd.Categorical(df['apply'], ordered=False)
 # but categories are set in order
-df_unordered['apply'].cat.set_categories(['unlikely', 'somewhat likely', 'very likely'], inplace=True)
+df_unordered['apply'].cat.set_categories(
+    ['unlikely', 'somewhat likely', 'very likely'], inplace=True)
 
 # df['apply'] is pd.Categorical with ordered = True
 df['apply'] = pd.Categorical(df['apply'], ordered=True)
-df['apply'].cat.set_categories(['unlikely', 'somewhat likely', 'very likely'], inplace=True)
+df['apply'].cat.set_categories(
+    ['unlikely', 'somewhat likely', 'very likely'], inplace=True)
 
 data_store.df_unordered = df_unordered
 data_store.df = df
@@ -62,10 +68,12 @@
 data_store.n_ordinal_cat = 3
 
 res_ord_logit = Holder()
-res_ord_logit.coefficients_val = np.array([1.04769011, -0.05878572, 0.61594057])
+res_ord_logit.coefficients_val = \
+    np.array([1.04769011, -0.05878572, 0.61594057])
 res_ord_logit.coefficients_stdE = np.array([0.2658, 0.2979, 0.2606])
 res_ord_logit.coefficients_tval = np.array([3.9418, -0.1974, 2.3632])
-res_ord_logit.coefficients_pval = np.array([8.087070e-05, 8.435464e-01, 1.811594e-02])
+res_ord_logit.coefficients_pval = \
+    np.array([8.087070e-05, 8.435464e-01, 1.811594e-02])
 res_ord_logit.thresholds = np.array([2.203915, 4.299363])
 res_ord_logit.prob_pred = np.array([[0.5488310, 0.3593310, 0.09183798],
                                     [0.3055632, 0.4759496, 0.21848725],
@@ -79,7 +87,8 @@
 res_ord_probit.coefficients_val = np.array([0.59811, 0.01016, 0.35815])
 res_ord_probit.coefficients_stdE = np.array([0.1579, 0.1728, 0.1568])
 res_ord_probit.coefficients_tval = np.array([3.78881, 0.05878, 2.28479])
-res_ord_probit.coefficients_pval = np.array([1.513681e-04, 9.531256e-01, 2.232519e-02])
+res_ord_probit.coefficients_pval = \
+    np.array([1.513681e-04, 9.531256e-01, 2.232519e-02])
 res_ord_probit.thresholds = np.array([1.2968, 2.5028])
 res_ord_probit.prob_pred = np.array([[0.5514181, 0.3576848, 0.09089707],
                                      [0.3260107, 0.4488799, 0.22510933],
@@ -93,7 +102,8 @@
 res_ord_cloglog.coefficients_val = np.array([0.5166455, 0.1081131, 0.3343895])
 res_ord_cloglog.coefficients_stdE = np.array([0.1613525, 0.1680675, 0.1542065])
 res_ord_cloglog.coefficients_tval = np.array([3.2019668, 0.6432721, 2.1684534])
-res_ord_cloglog.coefficients_pval = np.array([1.364927e-03, 5.200475e-01, 3.012421e-02])
+res_ord_cloglog.coefficients_pval = \
+    np.array([1.364927e-03, 5.200475e-01, 3.012421e-02])
 res_ord_cloglog.thresholds = np.array([0.8705304, 1.9744660])
 res_ord_cloglog.prob_pred = np.array([[0.5519526, 0.3592524, 0.08879500],
                                       [0.3855287, 0.3842645, 0.23020682],
diff --git a/statsmodels/miscmodels/tests/test_ordinal_model.py b/statsmodels/miscmodels/tests/test_ordinal_model.py
index 8fbba09973f..4bb86392afa 100644
--- a/statsmodels/miscmodels/tests/test_ordinal_model.py
+++ b/statsmodels/miscmodels/tests/test_ordinal_model.py
@@ -18,12 +18,19 @@ def test_basic(self):
         res1 = self.res1
         res2 = self.res2
         # coefficients values, standard errors, t & p values
-        assert_allclose(res1.params[:-n_cat + 1], res2.coefficients_val, atol=2e-4)
-        assert_allclose(res1.bse[:-n_cat + 1], res2.coefficients_stdE, rtol=0.003, atol=1e-5)
-        assert_allclose(res1.tvalues[:-n_cat + 1], res2.coefficients_tval, rtol=0.003, atol=7e-4)
-        assert_allclose(res1.pvalues[:-n_cat + 1], res2.coefficients_pval, rtol=0.009, atol=1e-5)
-        # thresholds are given with exponentiated increments from the first threshold
-        assert_allclose(res1.model.transform_threshold_params(res1.params)[1:-1], res2.thresholds, atol=4e-4)
+        assert_allclose(res1.params[:-n_cat + 1],
+                        res2.coefficients_val, atol=2e-4)
+        assert_allclose(res1.bse[:-n_cat + 1],
+                        res2.coefficients_stdE, rtol=0.003, atol=1e-5)
+        assert_allclose(res1.tvalues[:-n_cat + 1],
+                        res2.coefficients_tval, rtol=0.003, atol=7e-4)
+        assert_allclose(res1.pvalues[:-n_cat + 1],
+                        res2.coefficients_pval, rtol=0.009, atol=1e-5)
+        # thresholds are given with exponentiated increments
+        # from the first threshold
+        assert_allclose(
+            res1.model.transform_threshold_params(res1.params)[1:-1],
+            res2.thresholds, atol=4e-4)
 
         # probabilities
         assert_allclose(res1.predict()[:7, :],
@@ -82,17 +89,19 @@ def setup_class(cls):
                             distr='logit')
         resp = modp.fit(method='bfgs', disp=False)
         # fit with formula
-        modf = OrderedModel.from_formula("apply ~ pared + public + gpa - 1",
-                                         data={"apply": data['apply'].values.codes,
-                                               "pared": data['pared'],
-                                               "public": data['public'],
-                                               "gpa": data['gpa']},
-                                         distr='logit')
+        modf = OrderedModel.from_formula(
+            "apply ~ pared + public + gpa - 1",
+            data={"apply": data['apply'].values.codes,
+                  "pared": data['pared'],
+                  "public": data['public'],
+                  "gpa": data['gpa']},
+            distr='logit')
         resf = modf.fit(method='bfgs', disp=False)
         # fit on data with ordered=False
-        modu = OrderedModel(data_unordered['apply'].values.codes,
-                            np.asarray(data_unordered[['pared', 'public', 'gpa']], float),
-                            distr='logit')
+        modu = OrderedModel(
+            data_unordered['apply'].values.codes,
+            np.asarray(data_unordered[['pared', 'public', 'gpa']], float),
+            distr='logit')
         resu = modu.fit(method='bfgs', disp=False)
 
         from .results.results_ordinal_model import res_ord_logit as res2
@@ -120,17 +129,19 @@ def setup_class(cls):
                             distr='probit')
         resp = modp.fit(method='bfgs', disp=False)
 
-        modf = OrderedModel.from_formula("apply ~ pared + public + gpa - 1",
-                                         data={"apply": data['apply'].values.codes,
-                                               "pared": data['pared'],
-                                               "public": data['public'],
-                                               "gpa": data['gpa']},
-                                         distr='probit')
+        modf = OrderedModel.from_formula(
+            "apply ~ pared + public + gpa - 1",
+            data={"apply": data['apply'].values.codes,
+                  "pared": data['pared'],
+                  "public": data['public'],
+                  "gpa": data['gpa']},
+            distr='probit')
         resf = modf.fit(method='bfgs', disp=False)
 
-        modu = OrderedModel(data_unordered['apply'].values.codes,
-                            np.asarray(data_unordered[['pared', 'public', 'gpa']], float),
-                            distr='probit')
+        modu = OrderedModel(
+            data_unordered['apply'].values.codes,
+            np.asarray(data_unordered[['pared', 'public', 'gpa']], float),
+            distr='probit')
         resu = modu.fit(method='bfgs', disp=False)
 
         from .results.results_ordinal_model import res_ord_probit as res2
@@ -168,17 +179,19 @@ def _cdf(self, x):
                             distr=cloglog)
         resp = modp.fit(method='bfgs', disp=False)
 
-        modf = OrderedModel.from_formula("apply ~ pared + public + gpa - 1",
-                                         data={"apply": data['apply'].values.codes,
-                                               "pared": data['pared'],
-                                               "public": data['public'],
-                                               "gpa": data['gpa']},
-                                         distr=cloglog)
+        modf = OrderedModel.from_formula(
+            "apply ~ pared + public + gpa - 1",
+            data={"apply": data['apply'].values.codes,
+                  "pared": data['pared'],
+                  "public": data['public'],
+                  "gpa": data['gpa']},
+            distr=cloglog)
         resf = modf.fit(method='bfgs', disp=False)
 
-        modu = OrderedModel(data_unordered['apply'].values.codes,
-                            np.asarray(data_unordered[['pared', 'public', 'gpa']], float),
-                            distr=cloglog)
+        modu = OrderedModel(
+            data_unordered['apply'].values.codes,
+            np.asarray(data_unordered[['pared', 'public', 'gpa']], float),
+            distr=cloglog)
         resu = modu.fit(method='bfgs', disp=False)
 
         from .results.results_ordinal_model import res_ord_cloglog as res2

From e00c2d583d095a917f7f28bfaf04a48cf867fdf1 Mon Sep 17 00:00:00 2001
From: Bolzano-Weierstrass <thomas.p.marchand@gmail.com>
Date: Tue, 25 Aug 2020 12:19:35 +0200
Subject: [PATCH 12/24] ENH: small fixes + example notebook

---
 examples/notebooks/ordinal_regression.ipynb | 247 ++++++++++++++++++++
 statsmodels/miscmodels/ordinal_model.py     |   7 +-
 2 files changed, 250 insertions(+), 4 deletions(-)
 create mode 100644 examples/notebooks/ordinal_regression.ipynb

diff --git a/examples/notebooks/ordinal_regression.ipynb b/examples/notebooks/ordinal_regression.ipynb
new file mode 100644
index 00000000000..818b93080aa
--- /dev/null
+++ b/examples/notebooks/ordinal_regression.ipynb
@@ -0,0 +1,247 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Ordinal Regression"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import scipy.stats as stats\n",
+    "\n",
+    "from statsmodels.miscmodels.ordinal_model import OrderedModel"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Loading a stata data file from the UCLA website.This notebook is inspired by https://stats.idre.ucla.edu/r/dae/ordinal-logistic-regression/ which is a R notebook from UCLA."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "url = \"https://stats.idre.ucla.edu/stat/data/ologit.dta\"\n",
+    "data_student = pd.read_stata(url)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_student.head(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_student.dtypes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_student['apply'].dtype"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This dataset is about the probability for undergraduate students to apply to graduate school given three exogenous variables:\n",
+    "- their grade point average(`gpa`), a float between 0 and 4.\n",
+    "- `pared`, a binary that indicates if at least one parent went to graduate school.\n",
+    "- and `public`, a binary that indicates if the current undergraduate institution of the student is public or private.\n",
+    "\n",
+    "`apply`, the target variable is categorical with ordered categories: `unlikely` < `somewhat likely` < `very likely`. It is a `pd.Serie` of categorical type, this is preferred over NumPy arrays."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The model is based on a numerical latent variable $y_{latent}$ that we cannot observe but that we can compute thanks to exogenous variables.\n",
+    "Moreover we can use this $y_{latent}$ to define $y$ that we can observe.\n",
+    "\n",
+    "For more details see the the Documentation of OrderedModel,  [the UCLA webpage](https://stats.idre.ucla.edu/r/dae/ordinal-logistic-regression/) or this [book](https://onlinelibrary.wiley.com/doi/book/10.1002/9780470594001).\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Probit ordinal regression:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mod_prob = OrderedModel(data_student['apply'],\n",
+    "                        data_student[['pared', 'public', 'gpa']],\n",
+    "                        distr='logit')\n",
+    "\n",
+    "res_prob = mod_prob.fit(method='bfgs')\n",
+    "res_prob.summary()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In our model, we have 3 exogenous variables(the $\\beta$s if we keep the documentation's notations) so we have 3 coefficients that need to be estimated.\n",
+    "\n",
+    "Those 3 estimations and their standard errors can be retrieved in the summary table.\n",
+    "\n",
+    "Since there are 3 categories in the target variable(`unlikely`, `somewhat likely`, `very likely`), we have two thresholds to estimate. \n",
+    "As explained in the doc of the method `OrderedModel.transform_threshold_params`, the first estimated threshold is the actual value and all the other thresholds are in terms of cumulative exponentiated increments. Actual thresholds values can be computed as follows:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "num_of_thresholds = 2\n",
+    "mod_prob.transform_threshold_params(res_prob.params[-num_of_thresholds:])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Logit ordinal regression:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mod_log = OrderedModel(data_student['apply'],\n",
+    "                        data_student[['pared', 'public', 'gpa']],\n",
+    "                        distr='logit')\n",
+    "\n",
+    "res_log = mod_log.fit(method='bfgs', disp=False)\n",
+    "res_log.summary()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "predicted = res_log.model.predict(res_log.params, exog=data_student[['pared', 'public', 'gpa']])\n",
+    "predicted"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pred_choice = predicted.argmax(1)\n",
+    "print('Fraction of correct choice predictions')\n",
+    "print((np.asarray(data_student['apply'].values.codes) == pred_choice).mean())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Ordinal regression with a custom cumulative cLogLog distribution:"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In addition to `logit` and `probit` regression, any continuous distribution from `SciPy.stats` package can be used for the `distr` argument. Alternatively, one can define its own distribution simply creating a subclass from `rv_continuous` and implementing a few methods."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# using a SciPy distribution\n",
+    "res_exp = OrderedModel(data_student['apply'],\n",
+    "                           data_student[['pared', 'public', 'gpa']],\n",
+    "                           distr=stats.expon).fit(method='bfgs', disp=False)\n",
+    "res_exp.summary()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# minimal definition of a custom scipy distribution.\n",
+    "class CLogLog(stats.rv_continuous):\n",
+    "    def _ppf(self, q):\n",
+    "        return np.log(-np.log(1 - q))\n",
+    "\n",
+    "    def _cdf(self, x):\n",
+    "        return 1 - np.exp(-np.exp(x))\n",
+    "\n",
+    "\n",
+    "cloglog = CLogLog()\n",
+    "\n",
+    "# definition of the model and fitting\n",
+    "res_cloglog = OrderedModel(data_student['apply'],\n",
+    "                           data_student[['pared', 'public', 'gpa']],\n",
+    "                           distr=cloglog).fit(method='bfgs', disp=False)\n",
+    "res_cloglog.summary()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/statsmodels/miscmodels/ordinal_model.py b/statsmodels/miscmodels/ordinal_model.py
index 679a60e53a1..e6a5a1d19fc 100644
--- a/statsmodels/miscmodels/ordinal_model.py
+++ b/statsmodels/miscmodels/ordinal_model.py
@@ -106,7 +106,7 @@ def _check_inputs(self, endog, exog):
 
         # Pandas' support
         if (isinstance(exog, pd.DataFrame)) or (isinstance(exog, pd.Series)):
-            exog_name = (exog.name if isinstance(exog, pd.Series)
+            exog_name = ([exog.name] if isinstance(exog, pd.Series)
                          else exog.columns.tolist())
             names['xname'] = exog_name
             exog = np.asarray(exog)
@@ -243,7 +243,6 @@ def fit(self, start_params=None, method='nm', maxiter=500, full_output=1,
 
 class OrderedResults(GenericLikelihoodModelResults):
     @Appender(GenericLikelihoodModelResults.summary.__doc__)
-    def summary(self, yname=None, xname=None, title=None, alpha=.05, **kwargs):
+    def summary(self, yname=None, xname=None, title=None, alpha=.05):
         names = self.model.names
-        print(names)
-        return super(OrderedResults, self).summary(**names, **kwargs)
+        return super(OrderedResults, self).summary(**names)

From 7a5f16ada5abe3a18d4b6c087b24ade85bde44cf Mon Sep 17 00:00:00 2001
From: Josef Perktold <josef.pktd@gmail.com>
Date: Thu, 3 Sep 2020 19:22:21 -0400
Subject: [PATCH 13/24] ENH/REF: add offset, scoreobs, DRY loglike and related

---
 statsmodels/miscmodels/ordinal_model.py       | 99 ++++++++++++++++---
 .../miscmodels/tests/test_ordinal_model.py    | 21 ++++
 2 files changed, 107 insertions(+), 13 deletions(-)

diff --git a/statsmodels/miscmodels/ordinal_model.py b/statsmodels/miscmodels/ordinal_model.py
index e6a5a1d19fc..94ea42da8f4 100644
--- a/statsmodels/miscmodels/ordinal_model.py
+++ b/statsmodels/miscmodels/ordinal_model.py
@@ -70,7 +70,7 @@ class OrderedModel(GenericLikelihoodModel):
 
     """
 
-    def __init__(self, endog, exog, distr='probit', **kwds):
+    def __init__(self, endog, exog, offset=None, distr='probit', **kwds):
 
         if distr == 'probit':
             self.distr = stats.norm
@@ -79,6 +79,12 @@ def __init__(self, endog, exog, distr='probit', **kwds):
         else:
             self.distr = distr
 
+        if offset is not None:
+            offset = np.asarray(offset)
+
+        # TODO: check if super can handle offset
+        self.offset = offset
+
         self.names, endog, exog = self._check_inputs(endog, exog)
 
         super(OrderedModel, self).__init__(endog, exog, **kwds)
@@ -88,7 +94,10 @@ def __init__(self, endog, exog, distr='probit', **kwds):
         self.endog = index
         self.labels = unique
 
-        self.k_vars = self.exog.shape[1]
+        if self.exog is not None:
+            self.nobs, self.k_vars = self.exog.shape
+        else:  # no exog in model
+            self.nobs, self.k_vars = self.endog.shape[0], 0
         self.results_class = OrderedResults
 
     def _check_inputs(self, endog, exog):
@@ -140,6 +149,11 @@ def cdf(self, x):
         """
         return self.distr.cdf(x)
 
+    def pdf(self, x):
+        """pdf evaluated at x
+        """
+        return self.distr.pdf(x)
+
     def prob(self, low, upp):
         """interval probability
         """
@@ -184,6 +198,8 @@ def predict(self, params, exog=None):
 
 
         """
+        if exog is None:
+            exog = self.exog
         # structure of params = [beta, constants_or_thresholds]
 
         # explicit in several steps to avoid bugs
@@ -191,32 +207,89 @@ def predict(self, params, exog=None):
         thresh = np.concatenate((th_params[:1],
                                  np.exp(th_params[1:]))).cumsum()
         thresh = np.concatenate(([-np.inf], thresh, [np.inf]))
-        xb = self.exog.dot(params[:-(self.k_levels - 1)])[:, None]
+        xb = exog.dot(params[:-(self.k_levels - 1)])[:, None]
         low = thresh[:-1] - xb
         upp = thresh[1:] - xb
         prob = self.prob(low, upp)
         return prob
 
-    def loglike(self, params):
+    def _linpred(self, params, exog=None, offset=None):
+        """linear prediction of latent variable `x b`
 
-        # structure of params = [beta, constants_or_thresholds]
+        currently only for exog from estimation sample (in-sample)
+        """
+        if exog is None:
+            exog = self.exog
+        if offset is None:
+            offset = self.offset
+        if exog is not None:
+            linpred = self.exog.dot(params[:-(self.k_levels - 1)])
+        else:  # means self.exog is also None
+            linpred = np.zeros(self.nobs)
+        if offset is not None:
+            linpred += offset
+        return linpred
+
+    def _bounds(self, params):
+        thresh = self.transform_threshold_params(params)
 
-        thresh = np.concatenate(([-np.inf],
-                                 params[-(self.k_levels - 1):], [np.inf]))
+        thresh_i_low = thresh[self.endog]
+        thresh_i_upp = thresh[self.endog + 1]
+        xb = self._linpred(params)
+        low = thresh_i_low - xb
+        upp = thresh_i_upp - xb
+        return low, upp
+
+    def loglike(self, params):
+
+        thresh = self.transform_threshold_params(params)
 
-        # explicit in several steps to avoid bugs
-        th_params = params[-(self.k_levels - 1):]
-        thresh = np.concatenate((th_params[:1],
-                                 np.exp(th_params[1:]))).cumsum()
-        thresh = np.concatenate(([-np.inf], thresh, [np.inf]))
         thresh_i_low = thresh[self.endog]
         thresh_i_upp = thresh[self.endog + 1]
-        xb = self.exog.dot(params[:-(self.k_levels - 1)])
+        xb = self._linpred(params)
         low = thresh_i_low - xb
         upp = thresh_i_upp - xb
         prob = self.prob(low, upp)
         return np.log(prob + 1e-20).sum()
 
+    def loglikeobs(self, params):
+
+        low, upp = self._bounds(params)
+        prob = self.prob(low, upp)
+        return np.log(prob + 1e-20)
+
+    def score_obs_(self, params):
+        """score, first derivative of loglike for each observations
+
+        This currently only implements the derivative with respect to the
+        exog parameters, but not with respect to threshold-constant parameters.
+
+        """
+        low, upp = self._bounds(params)
+
+        prob = self.prob(low, upp)
+        pdf_upp = self.pdf(upp)
+        pdf_low = self.pdf(low)
+
+        # TODO the following doesn't work yet because of the incremental exp
+        # parameterization. The following was written base on Greene for the
+        # simple non-incremental parameterization.
+        # k = self.k_levels - 1
+        # idx = self.endog
+        # score_factor = np.zeros((self.nobs, k + 1 + 2)) #+2 avoids idx bounds
+        #
+        # rows = np.arange(self.nobs)
+        # shift = 1
+        # score_factor[rows, shift + idx-1] = -pdf_low
+        # score_factor[rows, shift + idx] = pdf_upp
+        # score_factor[:, 0] = pdf_upp - pdf_low
+        score_factor = (pdf_upp - pdf_low)[:, None]
+        score_factor /= prob[:, None]
+
+        so = np.column_stack((-score_factor[:, :1] * self.exog,
+                              score_factor[:, 1:]))
+        return so
+
     @property
     def start_params(self):
         # start params based on model without exog
diff --git a/statsmodels/miscmodels/tests/test_ordinal_model.py b/statsmodels/miscmodels/tests/test_ordinal_model.py
index 4bb86392afa..7e4ecf4a027 100644
--- a/statsmodels/miscmodels/tests/test_ordinal_model.py
+++ b/statsmodels/miscmodels/tests/test_ordinal_model.py
@@ -151,6 +151,27 @@ def setup_class(cls):
         cls.resf = resf
         cls.resu = resu
 
+    def test_loglikerelated(self):
+
+        res1 = self.res1
+        # res2 = self.res2
+
+        mod = res1.model
+        fact = 1.1  # evaluate away from optimum
+        score1 = mod.score(res1.params * fact)
+        score_obs_numdiff = mod.score_obs(res1.params * fact)
+        score_obs_exog = mod.score_obs_(res1.params * fact)
+        assert_allclose(score_obs_numdiff.sum(0), score1, rtol=1e-8)
+        assert_allclose(score_obs_exog.sum(0), score1[:mod.k_vars], rtol=1e-7)
+
+        # null model
+        mod_null = OrderedModel(mod.endog, None,
+                                offset=np.zeros(mod.nobs),
+                                distr='probit')
+        null_params = mod.start_params
+        res_null = mod_null.fit(method='bfgs', disp=False)
+        assert_allclose(res_null.params, null_params[mod.k_vars:], rtol=1e-8)
+
 
 class TestCLogLogModel(CheckOrdinalModelMixin):
 

From 3e232ba57d194308e17b68b946e3384e115dcffb Mon Sep 17 00:00:00 2001
From: Josef Perktold <josef.pktd@gmail.com>
Date: Fri, 4 Sep 2020 22:29:57 -0400
Subject: [PATCH 14/24] ENH/TST: add pred_table, adjust and add unit tests

---
 statsmodels/examples/ex_ordered_model.py      |  4 +--
 statsmodels/miscmodels/ordinal_model.py       | 18 +++++++++-
 .../miscmodels/tests/results/__init__.py      |  0
 .../miscmodels/tests/test_ordinal_model.py    | 33 +++++++++++++++++--
 4 files changed, 49 insertions(+), 6 deletions(-)
 create mode 100644 statsmodels/miscmodels/tests/results/__init__.py

diff --git a/statsmodels/examples/ex_ordered_model.py b/statsmodels/examples/ex_ordered_model.py
index b06c3c569ae..a03f25a48c8 100644
--- a/statsmodels/examples/ex_ordered_model.py
+++ b/statsmodels/examples/ex_ordered_model.py
@@ -24,8 +24,8 @@
 print(np.bincount(y))
 
 mod = OrderedModel(y, x)
-start_params = np.ones(k_vars + 4)
-start_params = np.concatenate((np.ones(k_vars), np.arange(4)))
+# start_params = np.ones(k_vars + 4)
+# start_params = np.concatenate((np.ones(k_vars), np.arange(4)))
 start_ppf = stats.norm.ppf((np.bincount(y) / len(y)).cumsum())
 start_threshold = np.concatenate((start_ppf[:1],
                                   np.log(np.diff(start_ppf[:-1]))))
diff --git a/statsmodels/miscmodels/ordinal_model.py b/statsmodels/miscmodels/ordinal_model.py
index 94ea42da8f4..c0922dad0bd 100644
--- a/statsmodels/miscmodels/ordinal_model.py
+++ b/statsmodels/miscmodels/ordinal_model.py
@@ -118,7 +118,7 @@ def _check_inputs(self, endog, exog):
             exog_name = ([exog.name] if isinstance(exog, pd.Series)
                          else exog.columns.tolist())
             names['xname'] = exog_name
-            exog = np.asarray(exog)
+            # exog = np.asarray(exog)
 
         if isinstance(endog, pd.Series):
             if isinstance(endog.dtypes, CategoricalDtype):
@@ -315,6 +315,22 @@ def fit(self, start_params=None, method='nm', maxiter=500, full_output=1,
 
 
 class OrderedResults(GenericLikelihoodModelResults):
+
+    def pred_table(self):
+        """prediction table
+
+        returns pandas DataFrame
+
+        """
+        # todo: add category labels
+        categories = np.arange(self.model.k_levels)
+        observed = pd.Categorical(self.model.endog,
+                                  categories=categories, ordered=True)
+        predicted = pd.Categorical(self.predict().argmax(1),
+                                   categories=categories, ordered=True)
+        table = pd.crosstab(observed, predicted, margins=True, dropna=False)
+        return table
+
     @Appender(GenericLikelihoodModelResults.summary.__doc__)
     def summary(self, yname=None, xname=None, title=None, alpha=.05):
         names = self.model.names
diff --git a/statsmodels/miscmodels/tests/results/__init__.py b/statsmodels/miscmodels/tests/results/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/statsmodels/miscmodels/tests/test_ordinal_model.py b/statsmodels/miscmodels/tests/test_ordinal_model.py
index 7e4ecf4a027..28e14403660 100644
--- a/statsmodels/miscmodels/tests/test_ordinal_model.py
+++ b/statsmodels/miscmodels/tests/test_ordinal_model.py
@@ -5,7 +5,7 @@
 import numpy as np
 import scipy.stats as stats
 
-from numpy.testing import assert_allclose
+from numpy.testing import assert_allclose, assert_equal
 from .results.results_ordinal_model import data_store as ds
 from statsmodels.miscmodels.ordinal_model import OrderedModel
 
@@ -70,6 +70,27 @@ def test_unordered(self):
         assert_allclose(res1.model.endog, resf.model.endog, rtol=1e-10)
         assert_allclose(res1.model.exog, resf.model.exog, rtol=1e-10)
 
+    def test_results_other(self):
+
+        res1 = self.res1
+
+        # results
+        if hasattr(self, "pred_table"):
+            table = res1.pred_table()
+            assert_equal(table.values, self.pred_table)
+
+        # smoke test
+        res1.summary()
+
+        # inherited
+        tt = res1.t_test(np.eye(len(res1.params)))
+        assert_allclose(tt.pvalue, res1.pvalues, rtol=1e-13)
+        # TODO: test using string definition of constraints
+
+        pred = res1.predict(exog=res1.model.exog[-5:])
+        fitted = res1.predict()
+        assert_allclose(pred, fitted[-5:], rtol=1e-13)
+
 
 class TestLogitModel(CheckOrdinalModelMixin):
 
@@ -151,6 +172,12 @@ def setup_class(cls):
         cls.resf = resf
         cls.resu = resu
 
+        # regression numbers
+        cls.pred_table = np.array([[202,  18,   0, 220],
+                                   [112,  28,   0, 140],
+                                   [ 27,  13,   0,  40],  # noqa
+                                   [341,  59,   0, 400]], dtype=np.int64)
+
     def test_loglikerelated(self):
 
         res1 = self.res1
@@ -161,8 +188,8 @@ def test_loglikerelated(self):
         score1 = mod.score(res1.params * fact)
         score_obs_numdiff = mod.score_obs(res1.params * fact)
         score_obs_exog = mod.score_obs_(res1.params * fact)
-        assert_allclose(score_obs_numdiff.sum(0), score1, rtol=1e-8)
-        assert_allclose(score_obs_exog.sum(0), score1[:mod.k_vars], rtol=1e-7)
+        assert_allclose(score_obs_numdiff.sum(0), score1, atol=1e-7)
+        assert_allclose(score_obs_exog.sum(0), score1[:mod.k_vars], atol=1e-7)
 
         # null model
         mod_null = OrderedModel(mod.endog, None,

From 7152d11f6242fb0c5863f60620c1adca8e316370 Mon Sep 17 00:00:00 2001
From: Josef Perktold <josef.pktd@gmail.com>
Date: Sat, 5 Sep 2020 17:01:48 -0400
Subject: [PATCH 15/24] ENH: properly connect param_names, give pandas codes
 endog to super

---
 statsmodels/miscmodels/ordinal_model.py       | 59 ++++++++++---------
 .../miscmodels/tests/test_ordinal_model.py    | 17 +++++-
 2 files changed, 44 insertions(+), 32 deletions(-)

diff --git a/statsmodels/miscmodels/ordinal_model.py b/statsmodels/miscmodels/ordinal_model.py
index c0922dad0bd..590c1000216 100644
--- a/statsmodels/miscmodels/ordinal_model.py
+++ b/statsmodels/miscmodels/ordinal_model.py
@@ -85,19 +85,32 @@ def __init__(self, endog, exog, offset=None, distr='probit', **kwds):
         # TODO: check if super can handle offset
         self.offset = offset
 
-        self.names, endog, exog = self._check_inputs(endog, exog)
+        endog, labels, is_pandas = self._check_inputs(endog, exog)
 
         super(OrderedModel, self).__init__(endog, exog, **kwds)
 
-        unique, index = np.unique(self.endog, return_inverse=True)
-        self.k_levels = len(unique)
-        self.endog = index
-        self.labels = unique
+        if not is_pandas:
+            unique, index = np.unique(self.endog, return_inverse=True)
+            self.endog = index
+            labels = unique
+
+        self.labels = labels
+        self.k_levels = len(labels)
 
         if self.exog is not None:
             self.nobs, self.k_vars = self.exog.shape
         else:  # no exog in model
             self.nobs, self.k_vars = self.endog.shape[0], 0
+
+        threshold_names = [str(x) + '/' + str(y)
+                           for x, y in zip(labels[:-1], labels[1:])]
+
+        # from GenericLikelihoodModel.fit
+        if self.exog is not None:
+            self.exog_names.extend(threshold_names)
+        else:
+            self.data.xnames = threshold_names
+
         self.results_class = OrderedResults
 
     def _check_inputs(self, endog, exog):
@@ -106,20 +119,14 @@ def _check_inputs(self, endog, exog):
         support for endog and exog. Also retrieves columns & categories
         names for .summary() of the results class.
         """
-        names = {}
         if not isinstance(self.distr, stats.rv_continuous):
             msg = (
                 f"{self.distr.name} must be a scipy.stats distribution."
             )
             raise ValueError(msg)
 
-        # Pandas' support
-        if (isinstance(exog, pd.DataFrame)) or (isinstance(exog, pd.Series)):
-            exog_name = ([exog.name] if isinstance(exog, pd.Series)
-                         else exog.columns.tolist())
-            names['xname'] = exog_name
-            # exog = np.asarray(exog)
-
+        labels = None
+        is_pandas = False
         if isinstance(endog, pd.Series):
             if isinstance(endog.dtypes, CategoricalDtype):
                 if not endog.dtype.ordered:
@@ -129,20 +136,19 @@ def _check_inputs(self, endog, exog):
                                   "categories. ordered == True preferred.",
                                   Warning)
                 endog_name = endog.name
-                threshold_name = [str(x) + '/' + str(y)
-                                  for x, y in zip(endog.values.categories[:-1],
-                                                  endog.values.categories[1:])]
-                names['yname'] = endog_name
-                names['xname'] = names['xname'] + threshold_name
-                endog = np.asarray(endog.values.codes)
+                labels = endog.values.categories
+                endog = endog.cat.codes
+                if endog.min() == -1:  # means there is a missing value
+                    raise ValueError("missing values in categorical endog are "
+                                     "not supported")
+                endog.name = endog_name
+                is_pandas = True
             else:
-                msg = (
-                    "If the endog is a pandas.Serie "
-                    "it must be of categoricalDtype."
-                )
+                msg = ("If endog is a pandas.Series, "
+                       "it must be of CategoricalDtype.")
                 raise ValueError(msg)
 
-        return names, endog, exog
+        return endog, labels, is_pandas
 
     def cdf(self, x):
         """cdf evaluated at x
@@ -330,8 +336,3 @@ def pred_table(self):
                                    categories=categories, ordered=True)
         table = pd.crosstab(observed, predicted, margins=True, dropna=False)
         return table
-
-    @Appender(GenericLikelihoodModelResults.summary.__doc__)
-    def summary(self, yname=None, xname=None, title=None, alpha=.05):
-        names = self.model.names
-        return super(OrderedResults, self).summary(**names)
diff --git a/statsmodels/miscmodels/tests/test_ordinal_model.py b/statsmodels/miscmodels/tests/test_ordinal_model.py
index 28e14403660..9fe9612b86d 100644
--- a/statsmodels/miscmodels/tests/test_ordinal_model.py
+++ b/statsmodels/miscmodels/tests/test_ordinal_model.py
@@ -72,7 +72,16 @@ def test_unordered(self):
 
     def test_results_other(self):
 
-        res1 = self.res1
+        res1 = self.res1  # numpy
+        resp = self.resp  # pandas
+
+        param_names_np = ['x1', 'x2', 'x3', '0/1', '1/2']
+        param_names_pd = ['pared', 'public', 'gpa', 'unlikely/somewhat likely',
+                          'somewhat likely/very likely']
+
+        assert res1.model.data.param_names == param_names_np
+        assert self.resp.model.data.param_names == param_names_pd
+        assert self.resp.model.endog_names == "apply"
 
         # results
         if hasattr(self, "pred_table"):
@@ -85,7 +94,9 @@ def test_results_other(self):
         # inherited
         tt = res1.t_test(np.eye(len(res1.params)))
         assert_allclose(tt.pvalue, res1.pvalues, rtol=1e-13)
-        # TODO: test using string definition of constraints
+
+        tt = resp.t_test(['pared', 'public', 'gpa'])  # pandas names
+        assert_allclose(tt.pvalue, res1.pvalues[:3], rtol=1e-13)
 
         pred = res1.predict(exog=res1.model.exog[-5:])
         fitted = res1.predict()
@@ -194,7 +205,7 @@ def test_loglikerelated(self):
         # null model
         mod_null = OrderedModel(mod.endog, None,
                                 offset=np.zeros(mod.nobs),
-                                distr='probit')
+                                distr=mod.distr)
         null_params = mod.start_params
         res_null = mod_null.fit(method='bfgs', disp=False)
         assert_allclose(res_null.params, null_params[mod.k_vars:], rtol=1e-8)

From dcff9416cad85ea50314c744f7108908a3e90280 Mon Sep 17 00:00:00 2001
From: Josef Perktold <josef.pktd@gmail.com>
Date: Tue, 15 Sep 2020 11:14:51 -0400
Subject: [PATCH 16/24] ENH/REF: support pd ordered Categorical in formula

---
 statsmodels/miscmodels/ordinal_model.py       | 87 +++++++++++++++----
 .../miscmodels/tests/test_ordinal_model.py    | 25 ++++++
 2 files changed, 93 insertions(+), 19 deletions(-)

diff --git a/statsmodels/miscmodels/ordinal_model.py b/statsmodels/miscmodels/ordinal_model.py
index 590c1000216..10f61d986f9 100644
--- a/statsmodels/miscmodels/ordinal_model.py
+++ b/statsmodels/miscmodels/ordinal_model.py
@@ -10,8 +10,8 @@
 import pandas as pd
 from pandas.api.types import CategoricalDtype
 from scipy import stats
-from statsmodels.base.model import GenericLikelihoodModel, \
-    GenericLikelihoodModelResults
+from statsmodels.base.model import (
+    GenericLikelihoodModel, GenericLikelihoodModelResults, LikelihoodModel)
 from statsmodels.compat.pandas import Appender
 
 
@@ -23,7 +23,8 @@ class OrderedModel(GenericLikelihoodModel):
     The mode assumes that the endogenous variable is ordered but that the
     labels have no numeric interpretation besides the ordering.
 
-    The model is based on a latent linear variable, where we observe only
+    The model is based on a latent linear variable, where we observe only a
+    discretization.
 
     y_latent = X beta + u
 
@@ -46,18 +47,18 @@ class OrderedModel(GenericLikelihoodModel):
     distribution. We use standardized distributions to avoid identifiability
     problems.
 
-
     Parameters
     ----------
     endog : array_like
-        endogenous or dependent ordered categorical variable with k levels.
+        Endogenous or dependent ordered categorical variable with k levels.
         Labels or values of endog will internally transformed to consecutive
         integers, 0, 1, 2, ...
         pd.Series with Categorical as dtype should be preferred as it gives
         the order relation between the levels.
+        If endog is not a pandas Categorical, then categories are
+        sorted in lexicographic order (by numpy.unique).
     exog : array_like
-        exogenous explanatory variables. This should not include an intercept.
-        (TODO: verify)
+        Exogenous, explanatory variables. This should not include an intercept.
         pd.DataFrame are also accepted.
     distr : string 'probit' or 'logit', or a distribution instance
         The default is currently 'probit' which uses the normal distribution
@@ -69,6 +70,7 @@ class OrderedModel(GenericLikelihoodModel):
     Status: initial version, subclasses `GenericLikelihoodModel`
 
     """
+    _formula_max_endog = np.inf
 
     def __init__(self, endog, exog, offset=None, distr='probit', **kwds):
 
@@ -87,12 +89,24 @@ def __init__(self, endog, exog, offset=None, distr='probit', **kwds):
 
         endog, labels, is_pandas = self._check_inputs(endog, exog)
 
+        frame = kwds.pop("frame", None)
         super(OrderedModel, self).__init__(endog, exog, **kwds)
 
+        if frame is not None:
+            self.data.frame = frame
+
         if not is_pandas:
-            unique, index = np.unique(self.endog, return_inverse=True)
-            self.endog = index
-            labels = unique
+            # TODO: maybe handle 2-dim endog obtained from formula
+            if self.endog.ndim == 1:
+                unique, index = np.unique(self.endog, return_inverse=True)
+                self.endog = index
+                labels = unique
+            elif self.endog.ndim == 2:
+                endog_, labels, ynames = self._handle_formula_categorical()
+                # replace endog with categorical
+                self.endog = endog_
+                # fix yname
+                self.data.ynames = ynames
 
         self.labels = labels
         self.k_levels = len(labels)
@@ -114,11 +128,13 @@ def __init__(self, endog, exog, offset=None, distr='probit', **kwds):
         self.results_class = OrderedResults
 
     def _check_inputs(self, endog, exog):
+        """handle endog that is pandas Categorical
+
+        checks if self.distrib is legal and does the Pandas Categorical
+        support for endog.
         """
-        checks if self.distrib is legal and does the Pandas
-        support for endog and exog. Also retrieves columns & categories
-        names for .summary() of the results class.
-        """
+
+        # TOCO: maybe remove this if we want to have duck distributions
         if not isinstance(self.distr, stats.rv_continuous):
             msg = (
                 f"{self.distr.name} must be a scipy.stats distribution."
@@ -135,6 +151,7 @@ def _check_inputs(self, endog, exog):
                                   "risk of capturing a wrong order for the "
                                   "categories. ordered == True preferred.",
                                   Warning)
+
                 endog_name = endog.name
                 labels = endog.values.categories
                 endog = endog.cat.codes
@@ -143,13 +160,45 @@ def _check_inputs(self, endog, exog):
                                      "not supported")
                 endog.name = endog_name
                 is_pandas = True
-            else:
-                msg = ("If endog is a pandas.Series, "
-                       "it must be of CategoricalDtype.")
-                raise ValueError(msg)
+#             else:
+#                 msg = ("If endog is a pandas.Series, "
+#                        "it must be of CategoricalDtype.")
+#                 raise ValueError(msg)
 
         return endog, labels, is_pandas
 
+    def _handle_formula_categorical(self):
+        """handle 2dim endog,
+
+        raise if not from formula with pandas ordered Categorical endog
+
+        """
+        # get info about formula and original data
+        if not hasattr(self.data.orig_endog, "design_info"):
+            msg = "2-dim endog are not supported"
+            raise ValueError(msg)
+
+        di_endog = self.data.orig_endog.design_info
+        if len(di_endog.terms) > 1:
+            raise ValueError("more than one term in endog")
+
+        factor = list(di_endog.factor_infos.values())[0]
+        labels = factor.categories
+        name = factor.state["eval_code"]
+        original_endog = self.data.frame[name]
+        if not (isinstance(original_endog.dtype, CategoricalDtype)
+                and original_endog.dtype.ordered):
+            msg = ("Only ordered pandas Categorical are supported as endog "
+                   "in formulas")
+            raise ValueError(msg)
+
+        # Now we should only have an ordered pandas Categorical
+
+        endog = self.endog.argmax(1)
+        # fix yname
+        ynames = name
+        return endog, labels, ynames
+
     def cdf(self, x):
         """cdf evaluated at x
         """
@@ -268,7 +317,7 @@ def score_obs_(self, params):
         """score, first derivative of loglike for each observations
 
         This currently only implements the derivative with respect to the
-        exog parameters, but not with respect to threshold-constant parameters.
+        exog parameters, but not with respect to threshold parameters.
 
         """
         low, upp = self._bounds(params)
diff --git a/statsmodels/miscmodels/tests/test_ordinal_model.py b/statsmodels/miscmodels/tests/test_ordinal_model.py
index 9fe9612b86d..180f6834058 100644
--- a/statsmodels/miscmodels/tests/test_ordinal_model.py
+++ b/statsmodels/miscmodels/tests/test_ordinal_model.py
@@ -4,6 +4,7 @@
 
 import numpy as np
 import scipy.stats as stats
+import pytest
 
 from numpy.testing import assert_allclose, assert_equal
 from .results.results_ordinal_model import data_store as ds
@@ -210,6 +211,30 @@ def test_loglikerelated(self):
         res_null = mod_null.fit(method='bfgs', disp=False)
         assert_allclose(res_null.params, null_params[mod.k_vars:], rtol=1e-8)
 
+    def test_formula_categorical(self):
+
+        resp = self.resp
+        data = ds.df
+
+        "apply ~ pared + public + gpa - 1"
+        modf2 = OrderedModel.from_formula("apply ~ pared + public + gpa - 1",
+                                          data, distr='probit')
+        resf2 = modf2.fit(method='bfgs', disp=False)
+        assert_allclose(resf2.params, resp.params, atol=1e-8)
+        assert modf2.exog_names == resp.model.exog_names
+        assert modf2.data.ynames == resp.model.data.ynames
+        assert hasattr(modf2.data, "frame")
+        assert not hasattr(modf2, "frame")
+
+        with pytest.raises(ValueError):
+            OrderedModel.from_formula(
+                "apply ~ pared + public + gpa - 1",
+                data={"apply": np.asarray(data['apply']),
+                      "pared": data['pared'],
+                      "public": data['public'],
+                      "gpa": data['gpa']},
+                distr='probit')
+
 
 class TestCLogLogModel(CheckOrdinalModelMixin):
 

From 0241fd52a74d68ef9a818c5ef475f7ae9f49c8fb Mon Sep 17 00:00:00 2001
From: Josef Perktold <josef.pktd@gmail.com>
Date: Tue, 15 Sep 2020 12:54:52 -0400
Subject: [PATCH 17/24] fix formula for pandas categorical

---
 statsmodels/miscmodels/ordinal_model.py | 84 +++++++++++++++++--------
 1 file changed, 58 insertions(+), 26 deletions(-)

diff --git a/statsmodels/miscmodels/ordinal_model.py b/statsmodels/miscmodels/ordinal_model.py
index 10f61d986f9..fa72c02665e 100644
--- a/statsmodels/miscmodels/ordinal_model.py
+++ b/statsmodels/miscmodels/ordinal_model.py
@@ -89,12 +89,8 @@ def __init__(self, endog, exog, offset=None, distr='probit', **kwds):
 
         endog, labels, is_pandas = self._check_inputs(endog, exog)
 
-        frame = kwds.pop("frame", None)
         super(OrderedModel, self).__init__(endog, exog, **kwds)
 
-        if frame is not None:
-            self.data.frame = frame
-
         if not is_pandas:
             # TODO: maybe handle 2-dim endog obtained from formula
             if self.endog.ndim == 1:
@@ -102,28 +98,17 @@ def __init__(self, endog, exog, offset=None, distr='probit', **kwds):
                 self.endog = index
                 labels = unique
             elif self.endog.ndim == 2:
-                endog_, labels, ynames = self._handle_formula_categorical()
-                # replace endog with categorical
-                self.endog = endog_
-                # fix yname
-                self.data.ynames = ynames
-
-        self.labels = labels
-        self.k_levels = len(labels)
-
-        if self.exog is not None:
-            self.nobs, self.k_vars = self.exog.shape
-        else:  # no exog in model
-            self.nobs, self.k_vars = self.endog.shape[0], 0
-
-        threshold_names = [str(x) + '/' + str(y)
-                           for x, y in zip(labels[:-1], labels[1:])]
-
-        # from GenericLikelihoodModel.fit
-        if self.exog is not None:
-            self.exog_names.extend(threshold_names)
-        else:
-            self.data.xnames = threshold_names
+                labels = [str(i) for i in range(self.endog.shape[1])]
+                labels = []
+                #self.endog = self.endog.argmax(1)
+                # endog_ = self.endog.argmax(1)
+#                 endog_, labels, ynames = self._handle_formula_categorical()
+#                 # replace endog with categorical
+#                 self.endog = endog_
+#                 # fix yname
+#                 self.data.ynames = ynames
+
+        self._initialize_labels(labels)
 
         self.results_class = OrderedResults
 
@@ -167,6 +152,25 @@ def _check_inputs(self, endog, exog):
 
         return endog, labels, is_pandas
 
+    def _initialize_labels(self, labels):
+        self.labels = labels
+        self.k_levels = len(labels)
+
+        if self.exog is not None:
+            self.nobs, self.k_vars = self.exog.shape
+        else:  # no exog in model
+            self.nobs, self.k_vars = self.endog.shape[0], 0
+
+        threshold_names = [str(x) + '/' + str(y)
+                           for x, y in zip(labels[:-1], labels[1:])]
+
+        # from GenericLikelihoodModel.fit
+        if self.exog is not None:
+            # avoid extending several times
+            self.exog_names.extend(threshold_names)
+        else:
+            self.data.xnames = threshold_names
+
     def _handle_formula_categorical(self):
         """handle 2dim endog,
 
@@ -199,6 +203,34 @@ def _handle_formula_categorical(self):
         ynames = name
         return endog, labels, ynames
 
+    @classmethod
+    def from_formula(cls, formula, data, subset=None, drop_cols=None,
+                     *args, **kwargs):
+
+        if "0+" not in formula.replace(" ", ""):
+            import warnings
+            warnings.warn("Ordinal models should not include an intercept")
+
+        endog_name = formula.split("~")[0].strip()
+        original_endog = data[endog_name]
+
+        model = super(OrderedModel, cls).from_formula(
+            formula, data=data, *args, **kwargs)
+
+        if model.endog.ndim == 2:
+            if not (isinstance(original_endog.dtype, CategoricalDtype)
+                    and original_endog.dtype.ordered):
+                msg = ("Only ordered pandas Categorical are supported as endog"
+                       " in formulas")
+                raise ValueError(msg)
+
+            labels = original_endog.values.categories
+            model._initialize_labels(labels)
+            model.endog = model.endog.argmax(1)
+            model.data.ynames = endog_name
+
+        return model
+
     def cdf(self, x):
         """cdf evaluated at x
         """

From c1664d989c4eea864180e69b200bef687494a234 Mon Sep 17 00:00:00 2001
From: Josef Perktold <josef.pktd@gmail.com>
Date: Wed, 16 Sep 2020 11:06:50 -0400
Subject: [PATCH 18/24] CLN: cleanup, remove now redundant code and comments
 for formulas

---
 statsmodels/miscmodels/ordinal_model.py | 66 ++++++-------------------
 1 file changed, 14 insertions(+), 52 deletions(-)

diff --git a/statsmodels/miscmodels/ordinal_model.py b/statsmodels/miscmodels/ordinal_model.py
index fa72c02665e..188b121a12e 100644
--- a/statsmodels/miscmodels/ordinal_model.py
+++ b/statsmodels/miscmodels/ordinal_model.py
@@ -11,7 +11,7 @@
 from pandas.api.types import CategoricalDtype
 from scipy import stats
 from statsmodels.base.model import (
-    GenericLikelihoodModel, GenericLikelihoodModelResults, LikelihoodModel)
+    GenericLikelihoodModel, GenericLikelihoodModelResults)
 from statsmodels.compat.pandas import Appender
 
 
@@ -84,7 +84,6 @@ def __init__(self, endog, exog, offset=None, distr='probit', **kwds):
         if offset is not None:
             offset = np.asarray(offset)
 
-        # TODO: check if super can handle offset
         self.offset = offset
 
         endog, labels, is_pandas = self._check_inputs(endog, exog)
@@ -92,21 +91,19 @@ def __init__(self, endog, exog, offset=None, distr='probit', **kwds):
         super(OrderedModel, self).__init__(endog, exog, **kwds)
 
         if not is_pandas:
-            # TODO: maybe handle 2-dim endog obtained from formula
             if self.endog.ndim == 1:
                 unique, index = np.unique(self.endog, return_inverse=True)
                 self.endog = index
                 labels = unique
             elif self.endog.ndim == 2:
+                if not hasattr(self, "design_info"):
+                    raise ValueError("2-dim endog not supported")
+                # this branch is currently only in support of from_formula
+                # labels here are only needed to choose k_levels in initialize
                 labels = [str(i) for i in range(self.endog.shape[1])]
                 labels = []
-                #self.endog = self.endog.argmax(1)
-                # endog_ = self.endog.argmax(1)
-#                 endog_, labels, ynames = self._handle_formula_categorical()
-#                 # replace endog with categorical
-#                 self.endog = endog_
-#                 # fix yname
-#                 self.data.ynames = ynames
+                # Note: Doing the following here would break from_formula
+                # self.endog = self.endog.argmax(1)
 
         self._initialize_labels(labels)
 
@@ -119,19 +116,18 @@ def _check_inputs(self, endog, exog):
         support for endog.
         """
 
-        # TOCO: maybe remove this if we want to have duck distributions
         if not isinstance(self.distr, stats.rv_continuous):
+            import warnings
             msg = (
-                f"{self.distr.name} must be a scipy.stats distribution."
+                f"{self.distr.name} is not a scipy.stats distribution."
             )
-            raise ValueError(msg)
+            raise warnings.warn(msg)
 
         labels = None
         is_pandas = False
         if isinstance(endog, pd.Series):
             if isinstance(endog.dtypes, CategoricalDtype):
                 if not endog.dtype.ordered:
-                    import warnings
                     warnings.warn("the endog has ordered == False, "
                                   "risk of capturing a wrong order for the "
                                   "categories. ordered == True preferred.",
@@ -145,10 +141,6 @@ def _check_inputs(self, endog, exog):
                                      "not supported")
                 endog.name = endog_name
                 is_pandas = True
-#             else:
-#                 msg = ("If endog is a pandas.Series, "
-#                        "it must be of CategoricalDtype.")
-#                 raise ValueError(msg)
 
         return endog, labels, is_pandas
 
@@ -167,42 +159,12 @@ def _initialize_labels(self, labels):
         # from GenericLikelihoodModel.fit
         if self.exog is not None:
             # avoid extending several times
+            if len(self.exog_names) > self.k_vars:
+                raise RuntimeError("something wrong with exog_names, too long")
             self.exog_names.extend(threshold_names)
         else:
             self.data.xnames = threshold_names
 
-    def _handle_formula_categorical(self):
-        """handle 2dim endog,
-
-        raise if not from formula with pandas ordered Categorical endog
-
-        """
-        # get info about formula and original data
-        if not hasattr(self.data.orig_endog, "design_info"):
-            msg = "2-dim endog are not supported"
-            raise ValueError(msg)
-
-        di_endog = self.data.orig_endog.design_info
-        if len(di_endog.terms) > 1:
-            raise ValueError("more than one term in endog")
-
-        factor = list(di_endog.factor_infos.values())[0]
-        labels = factor.categories
-        name = factor.state["eval_code"]
-        original_endog = self.data.frame[name]
-        if not (isinstance(original_endog.dtype, CategoricalDtype)
-                and original_endog.dtype.ordered):
-            msg = ("Only ordered pandas Categorical are supported as endog "
-                   "in formulas")
-            raise ValueError(msg)
-
-        # Now we should only have an ordered pandas Categorical
-
-        endog = self.endog.argmax(1)
-        # fix yname
-        ynames = name
-        return endog, labels, ynames
-
     @classmethod
     def from_formula(cls, formula, data, subset=None, drop_cols=None,
                      *args, **kwargs):
@@ -220,8 +182,8 @@ def from_formula(cls, formula, data, subset=None, drop_cols=None,
         if model.endog.ndim == 2:
             if not (isinstance(original_endog.dtype, CategoricalDtype)
                     and original_endog.dtype.ordered):
-                msg = ("Only ordered pandas Categorical are supported as endog"
-                       " in formulas")
+                msg = ("Only ordered pandas Categorical are supported as "
+                       "endog in formulas")
                 raise ValueError(msg)
 
             labels = original_endog.values.categories

From db637720f06b99c3f21721843a942cd90bda3be6 Mon Sep 17 00:00:00 2001
From: Josef Perktold <josef.pktd@gmail.com>
Date: Sat, 19 Sep 2020 13:46:19 -0400
Subject: [PATCH 19/24] ENH: drop Intercept in `from_formula`, similar to
 MixedLM, fix predict design_info

---
 statsmodels/base/model.py                     |  4 +-
 statsmodels/miscmodels/ordinal_model.py       | 17 +++-
 .../miscmodels/tests/test_ordinal_model.py    | 82 +++++++++++++++++--
 3 files changed, 90 insertions(+), 13 deletions(-)

diff --git a/statsmodels/base/model.py b/statsmodels/base/model.py
index 8d0328b471d..9ec656552c9 100644
--- a/statsmodels/base/model.py
+++ b/statsmodels/base/model.py
@@ -1060,7 +1060,9 @@ def predict(self, exog=None, transform=True, *args, **kwargs):
         exog_index = exog.index if is_pandas else None
 
         if transform and hasattr(self.model, 'formula') and (exog is not None):
-            design_info = self.model.data.design_info
+            # allow both location of design_info, see #7043
+            design_info = (getattr(self.model, "design_info", None) or
+                           self.model.data.design_info)
             from patsy import dmatrix
             if isinstance(exog, pd.Series):
                 # we are guessing whether it should be column or row
diff --git a/statsmodels/miscmodels/ordinal_model.py b/statsmodels/miscmodels/ordinal_model.py
index 188b121a12e..69734d9b494 100644
--- a/statsmodels/miscmodels/ordinal_model.py
+++ b/statsmodels/miscmodels/ordinal_model.py
@@ -169,15 +169,24 @@ def _initialize_labels(self, labels):
     def from_formula(cls, formula, data, subset=None, drop_cols=None,
                      *args, **kwargs):
 
-        if "0+" not in formula.replace(" ", ""):
-            import warnings
-            warnings.warn("Ordinal models should not include an intercept")
+        # we want an explicit Intercept in the model that we can remove
+        # Removing constant with "0 +" or "- 1" does not work for categ. exog
+        # copied from PHReg
+        import re
+        terms = re.split(r"[+\-~]", formula)
+        for term in terms:
+            term = term.strip()
+            if term in ("0", "1"):
+                import warnings
+                msg = ("OrderedModel formulas should not include any '0' or "
+                       "'1' terms if those create an implicit constant.")
+                warnings.warn(msg)
 
         endog_name = formula.split("~")[0].strip()
         original_endog = data[endog_name]
 
         model = super(OrderedModel, cls).from_formula(
-            formula, data=data, *args, **kwargs)
+            formula, data=data, drop_cols=["Intercept"], *args, **kwargs)
 
         if model.endog.ndim == 2:
             if not (isinstance(original_endog.dtype, CategoricalDtype)
diff --git a/statsmodels/miscmodels/tests/test_ordinal_model.py b/statsmodels/miscmodels/tests/test_ordinal_model.py
index 180f6834058..0eabde972b8 100644
--- a/statsmodels/miscmodels/tests/test_ordinal_model.py
+++ b/statsmodels/miscmodels/tests/test_ordinal_model.py
@@ -5,6 +5,7 @@
 import numpy as np
 import scipy.stats as stats
 import pytest
+import pandas as pd
 
 from numpy.testing import assert_allclose, assert_equal
 from .results.results_ordinal_model import data_store as ds
@@ -103,6 +104,16 @@ def test_results_other(self):
         fitted = res1.predict()
         assert_allclose(pred, fitted[-5:], rtol=1e-13)
 
+        pred = resp.predict(exog=resp.model.data.orig_exog.iloc[-5:])
+        fitted = resp.predict()
+        assert_allclose(pred, fitted[-5:], rtol=1e-13)
+
+        dataf = self.resf.model.data.frame  # is a dict
+        dataf_df = pd.DataFrame.from_dict(dataf)
+        pred = self.resf.predict(exog=dataf_df.iloc[-5:])
+        fitted = self.resf.predict()
+        assert_allclose(pred, fitted[-5:], rtol=1e-13)
+
 
 class TestLogitModel(CheckOrdinalModelMixin):
 
@@ -216,7 +227,6 @@ def test_formula_categorical(self):
         resp = self.resp
         data = ds.df
 
-        "apply ~ pared + public + gpa - 1"
         modf2 = OrderedModel.from_formula("apply ~ pared + public + gpa - 1",
                                           data, distr='probit')
         resf2 = modf2.fit(method='bfgs', disp=False)
@@ -236,6 +246,61 @@ def test_formula_categorical(self):
                 distr='probit')
 
 
+class TestLogitModelFormula():
+
+    @classmethod
+    def setup_class(cls):
+        data = ds.df
+        nobs = len(data)
+        data["dummy"] = (np.arange(nobs) < (nobs / 2)).astype(float)
+        # alias to correspond to patsy name
+        data["C(dummy)[T.1.0]"] = data["dummy"]
+        cls.data = data
+
+        columns = ['C(dummy)[T.1.0]', 'pared', 'public', 'gpa']
+        # standard fit
+        mod = OrderedModel(data['apply'].values.codes,
+                           np.asarray(data[columns], float),
+                           distr='logit')
+        cls.res = mod.fit(method='bfgs', disp=False)
+        # standard fit with pandas input
+        modp = OrderedModel(data['apply'],
+                            data[columns],
+                            distr='logit')
+        cls.resp = modp.fit(method='bfgs', disp=False)
+
+    def test_setup(self):
+        data = self.data
+        resp = self.resp
+        fittedvalues = resp.predict()
+
+        formulas = ["apply ~ 1 + pared + public + gpa + C(dummy)",
+                    "apply ~ pared + public + gpa + C(dummy)"]
+        for formula in formulas:
+            modf1 = OrderedModel.from_formula(formula, data, distr='logit')
+            resf1 = modf1.fit(method='bfgs')
+            summf1 = resf1.summary()
+            summf1_str = str(summf1)
+            assert resf1.model.exog_names == resp.model.exog_names
+            assert resf1.model.data.param_names == resp.model.exog_names
+            assert all(name in summf1_str for name in
+                       resp.model.data.param_names)
+            assert_allclose(resf1.predict(data[:5]), fittedvalues[:5])
+
+        # test over parameterized model with implicit constant
+        # warns but doesn't raise
+        formula = "apply ~ 0 + pared + public + gpa + C(dummy)"
+
+        with pytest.warns(UserWarning):
+            modf2 = OrderedModel.from_formula(formula, data, distr='logit')
+
+        with pytest.warns(UserWarning):
+            resf2 = modf2.fit(method='bfgs')
+            assert np.isnan(resf2.bse).all()
+
+        assert_allclose(resf2.predict(data[:5]), fittedvalues[:5], rtol=1e-4)
+
+
 class TestCLogLogModel(CheckOrdinalModelMixin):
 
     @classmethod
@@ -263,13 +328,14 @@ def _cdf(self, x):
                             distr=cloglog)
         resp = modp.fit(method='bfgs', disp=False)
 
-        modf = OrderedModel.from_formula(
-            "apply ~ pared + public + gpa - 1",
-            data={"apply": data['apply'].values.codes,
-                  "pared": data['pared'],
-                  "public": data['public'],
-                  "gpa": data['gpa']},
-            distr=cloglog)
+        with pytest.warns(UserWarning):
+            modf = OrderedModel.from_formula(
+                "apply ~ pared + public + gpa - 1",
+                data={"apply": data['apply'].values.codes,
+                      "pared": data['pared'],
+                      "public": data['public'],
+                      "gpa": data['gpa']},
+                distr=cloglog)
         resf = modf.fit(method='bfgs', disp=False)
 
         modu = OrderedModel(

From a1fb8c2d8d5a648bde1574ce3575ee42bf2b2f2c Mon Sep 17 00:00:00 2001
From: Josef Perktold <josef.pktd@gmail.com>
Date: Sat, 19 Sep 2020 19:42:02 -0400
Subject: [PATCH 20/24] REF: use SpecificationWarning for formula,

---
 statsmodels/miscmodels/ordinal_model.py       |  4 +-
 .../miscmodels/tests/test_ordinal_model.py    | 68 +++++++++++--------
 2 files changed, 44 insertions(+), 28 deletions(-)

diff --git a/statsmodels/miscmodels/ordinal_model.py b/statsmodels/miscmodels/ordinal_model.py
index 69734d9b494..e2779f136ce 100644
--- a/statsmodels/miscmodels/ordinal_model.py
+++ b/statsmodels/miscmodels/ordinal_model.py
@@ -10,6 +10,8 @@
 import pandas as pd
 from pandas.api.types import CategoricalDtype
 from scipy import stats
+from statsmodels.tools.sm_exceptions import (
+    SpecificationWarning)
 from statsmodels.base.model import (
     GenericLikelihoodModel, GenericLikelihoodModelResults)
 from statsmodels.compat.pandas import Appender
@@ -180,7 +182,7 @@ def from_formula(cls, formula, data, subset=None, drop_cols=None,
                 import warnings
                 msg = ("OrderedModel formulas should not include any '0' or "
                        "'1' terms if those create an implicit constant.")
-                warnings.warn(msg)
+                warnings.warn(msg, SpecificationWarning)
 
         endog_name = formula.split("~")[0].strip()
         original_endog = data[endog_name]
diff --git a/statsmodels/miscmodels/tests/test_ordinal_model.py b/statsmodels/miscmodels/tests/test_ordinal_model.py
index 0eabde972b8..c4db0e37c7b 100644
--- a/statsmodels/miscmodels/tests/test_ordinal_model.py
+++ b/statsmodels/miscmodels/tests/test_ordinal_model.py
@@ -2,12 +2,15 @@
 Test  for ordinal models
 """
 
+import warnings
 import numpy as np
 import scipy.stats as stats
-import pytest
 import pandas as pd
+import pytest
 
 from numpy.testing import assert_allclose, assert_equal
+from statsmodels.tools.sm_exceptions import (
+    HessianInversionWarning, SpecificationWarning)
 from .results.results_ordinal_model import data_store as ds
 from statsmodels.miscmodels.ordinal_model import OrderedModel
 
@@ -133,13 +136,15 @@ def setup_class(cls):
                             distr='logit')
         resp = modp.fit(method='bfgs', disp=False)
         # fit with formula
-        modf = OrderedModel.from_formula(
-            "apply ~ pared + public + gpa - 1",
-            data={"apply": data['apply'].values.codes,
-                  "pared": data['pared'],
-                  "public": data['public'],
-                  "gpa": data['gpa']},
-            distr='logit')
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore", SpecificationWarning)
+            modf = OrderedModel.from_formula(
+                "apply ~ pared + public + gpa - 1",
+                data={"apply": data['apply'].values.codes,
+                      "pared": data['pared'],
+                      "public": data['public'],
+                      "gpa": data['gpa']},
+                distr='logit')
         resf = modf.fit(method='bfgs', disp=False)
         # fit on data with ordered=False
         modu = OrderedModel(
@@ -173,13 +178,15 @@ def setup_class(cls):
                             distr='probit')
         resp = modp.fit(method='bfgs', disp=False)
 
-        modf = OrderedModel.from_formula(
-            "apply ~ pared + public + gpa - 1",
-            data={"apply": data['apply'].values.codes,
-                  "pared": data['pared'],
-                  "public": data['public'],
-                  "gpa": data['gpa']},
-            distr='probit')
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore", SpecificationWarning)
+            modf = OrderedModel.from_formula(
+                "apply ~ pared + public + gpa - 1",
+                data={"apply": data['apply'].values.codes,
+                      "pared": data['pared'],
+                      "public": data['public'],
+                      "gpa": data['gpa']},
+                distr='probit')
         resf = modf.fit(method='bfgs', disp=False)
 
         modu = OrderedModel(
@@ -227,8 +234,11 @@ def test_formula_categorical(self):
         resp = self.resp
         data = ds.df
 
-        modf2 = OrderedModel.from_formula("apply ~ pared + public + gpa - 1",
-                                          data, distr='probit')
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore", SpecificationWarning)
+            formula = "apply ~ pared + public + gpa - 1"
+            modf2 = OrderedModel.from_formula(formula,
+                                              data, distr='probit')
         resf2 = modf2.fit(method='bfgs', disp=False)
         assert_allclose(resf2.params, resp.params, atol=1e-8)
         assert modf2.exog_names == resp.model.exog_names
@@ -237,19 +247,22 @@ def test_formula_categorical(self):
         assert not hasattr(modf2, "frame")
 
         with pytest.raises(ValueError):
-            OrderedModel.from_formula(
-                "apply ~ pared + public + gpa - 1",
-                data={"apply": np.asarray(data['apply']),
-                      "pared": data['pared'],
-                      "public": data['public'],
-                      "gpa": data['gpa']},
-                distr='probit')
+            with warnings.catch_warnings():
+                warnings.simplefilter("ignore", SpecificationWarning)
+                OrderedModel.from_formula(
+                    "apply ~ pared + public + gpa - 1",
+                    data={"apply": np.asarray(data['apply']),
+                          "pared": data['pared'],
+                          "public": data['public'],
+                          "gpa": data['gpa']},
+                    distr='probit')
 
 
 class TestLogitModelFormula():
 
     @classmethod
     def setup_class(cls):
+        warnings.simplefilter("ignore", SpecificationWarning)
         data = ds.df
         nobs = len(data)
         data["dummy"] = (np.arange(nobs) < (nobs / 2)).astype(float)
@@ -291,12 +304,13 @@ def test_setup(self):
         # warns but doesn't raise
         formula = "apply ~ 0 + pared + public + gpa + C(dummy)"
 
-        with pytest.warns(UserWarning):
+        with pytest.warns(SpecificationWarning):
             modf2 = OrderedModel.from_formula(formula, data, distr='logit')
 
-        with pytest.warns(UserWarning):
+        with pytest.warns(HessianInversionWarning):
             resf2 = modf2.fit(method='bfgs')
-            assert np.isnan(resf2.bse).all()
+        assert resf2.converged is False
+        # assert np.isnan(resf2.bse).all()
 
         assert_allclose(resf2.predict(data[:5]), fittedvalues[:5], rtol=1e-4)
 

From eeb1eb20044445fc7596ddbc9f192b34228b33df Mon Sep 17 00:00:00 2001
From: Josef Perktold <josef.pktd@gmail.com>
Date: Mon, 21 Sep 2020 11:06:51 -0400
Subject: [PATCH 21/24] BUG: GenericLikelihoodModel propagate `hasconst` see
 #7049

---
 statsmodels/base/model.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/statsmodels/base/model.py b/statsmodels/base/model.py
index 9ec656552c9..03ae9b63349 100644
--- a/statsmodels/base/model.py
+++ b/statsmodels/base/model.py
@@ -789,6 +789,7 @@ def __init__(self, endog, exog=None, loglike=None, score=None,
         if hessian is not None:
             self.hessian = hessian
 
+        hasconst = kwds.pop("hasconst", None)
         self.__dict__.update(kwds)
 
         # TODO: data structures?
@@ -797,7 +798,8 @@ def __init__(self, endog, exog=None, loglike=None, score=None,
         # self.df_model = 9999
         # somewhere: CacheWriteWarning: 'df_model' cannot be overwritten
         super(GenericLikelihoodModel, self).__init__(endog, exog,
-                                                     missing=missing)
+                                                     missing=missing,
+                                                     hasconst=hasconst)
 
         # this will not work for ru2nmnl, maybe np.ndim of a dict?
         if exog is not None:

From 7f392b9d017d037cefb02794615c17ee8dea687f Mon Sep 17 00:00:00 2001
From: Josef Perktold <josef.pktd@gmail.com>
Date: Mon, 21 Sep 2020 12:52:44 -0400
Subject: [PATCH 22/24] REF: raise instead of warn if k_constant > 0

---
 statsmodels/miscmodels/ordinal_model.py       | 16 +---
 .../miscmodels/tests/test_ordinal_model.py    | 90 +++++++++----------
 2 files changed, 45 insertions(+), 61 deletions(-)

diff --git a/statsmodels/miscmodels/ordinal_model.py b/statsmodels/miscmodels/ordinal_model.py
index e2779f136ce..a92f75f069c 100644
--- a/statsmodels/miscmodels/ordinal_model.py
+++ b/statsmodels/miscmodels/ordinal_model.py
@@ -10,8 +10,7 @@
 import pandas as pd
 from pandas.api.types import CategoricalDtype
 from scipy import stats
-from statsmodels.tools.sm_exceptions import (
-    SpecificationWarning)
+
 from statsmodels.base.model import (
     GenericLikelihoodModel, GenericLikelihoodModelResults)
 from statsmodels.compat.pandas import Appender
@@ -107,6 +106,9 @@ def __init__(self, endog, exog, offset=None, distr='probit', **kwds):
                 # Note: Doing the following here would break from_formula
                 # self.endog = self.endog.argmax(1)
 
+        if self.k_constant > 0:
+            raise ValueError("there should not be a constant in the model")
+
         self._initialize_labels(labels)
 
         self.results_class = OrderedResults
@@ -173,16 +175,6 @@ def from_formula(cls, formula, data, subset=None, drop_cols=None,
 
         # we want an explicit Intercept in the model that we can remove
         # Removing constant with "0 +" or "- 1" does not work for categ. exog
-        # copied from PHReg
-        import re
-        terms = re.split(r"[+\-~]", formula)
-        for term in terms:
-            term = term.strip()
-            if term in ("0", "1"):
-                import warnings
-                msg = ("OrderedModel formulas should not include any '0' or "
-                       "'1' terms if those create an implicit constant.")
-                warnings.warn(msg, SpecificationWarning)
 
         endog_name = formula.split("~")[0].strip()
         original_endog = data[endog_name]
diff --git a/statsmodels/miscmodels/tests/test_ordinal_model.py b/statsmodels/miscmodels/tests/test_ordinal_model.py
index c4db0e37c7b..350e3e24219 100644
--- a/statsmodels/miscmodels/tests/test_ordinal_model.py
+++ b/statsmodels/miscmodels/tests/test_ordinal_model.py
@@ -9,8 +9,7 @@
 import pytest
 
 from numpy.testing import assert_allclose, assert_equal
-from statsmodels.tools.sm_exceptions import (
-    HessianInversionWarning, SpecificationWarning)
+from statsmodels.tools.sm_exceptions import HessianInversionWarning
 from .results.results_ordinal_model import data_store as ds
 from statsmodels.miscmodels.ordinal_model import OrderedModel
 
@@ -136,15 +135,13 @@ def setup_class(cls):
                             distr='logit')
         resp = modp.fit(method='bfgs', disp=False)
         # fit with formula
-        with warnings.catch_warnings():
-            warnings.simplefilter("ignore", SpecificationWarning)
-            modf = OrderedModel.from_formula(
-                "apply ~ pared + public + gpa - 1",
-                data={"apply": data['apply'].values.codes,
-                      "pared": data['pared'],
-                      "public": data['public'],
-                      "gpa": data['gpa']},
-                distr='logit')
+        modf = OrderedModel.from_formula(
+            "apply ~ pared + public + gpa - 1",
+            data={"apply": data['apply'].values.codes,
+                  "pared": data['pared'],
+                  "public": data['public'],
+                  "gpa": data['gpa']},
+            distr='logit')
         resf = modf.fit(method='bfgs', disp=False)
         # fit on data with ordered=False
         modu = OrderedModel(
@@ -178,15 +175,13 @@ def setup_class(cls):
                             distr='probit')
         resp = modp.fit(method='bfgs', disp=False)
 
-        with warnings.catch_warnings():
-            warnings.simplefilter("ignore", SpecificationWarning)
-            modf = OrderedModel.from_formula(
-                "apply ~ pared + public + gpa - 1",
-                data={"apply": data['apply'].values.codes,
-                      "pared": data['pared'],
-                      "public": data['public'],
-                      "gpa": data['gpa']},
-                distr='probit')
+        modf = OrderedModel.from_formula(
+            "apply ~ pared + public + gpa - 1",
+            data={"apply": data['apply'].values.codes,
+                  "pared": data['pared'],
+                  "public": data['public'],
+                  "gpa": data['gpa']},
+            distr='probit')
         resf = modf.fit(method='bfgs', disp=False)
 
         modu = OrderedModel(
@@ -234,11 +229,9 @@ def test_formula_categorical(self):
         resp = self.resp
         data = ds.df
 
-        with warnings.catch_warnings():
-            warnings.simplefilter("ignore", SpecificationWarning)
-            formula = "apply ~ pared + public + gpa - 1"
-            modf2 = OrderedModel.from_formula(formula,
-                                              data, distr='probit')
+        formula = "apply ~ pared + public + gpa - 1"
+        modf2 = OrderedModel.from_formula(formula,
+                                          data, distr='probit')
         resf2 = modf2.fit(method='bfgs', disp=False)
         assert_allclose(resf2.params, resp.params, atol=1e-8)
         assert modf2.exog_names == resp.model.exog_names
@@ -247,22 +240,19 @@ def test_formula_categorical(self):
         assert not hasattr(modf2, "frame")
 
         with pytest.raises(ValueError):
-            with warnings.catch_warnings():
-                warnings.simplefilter("ignore", SpecificationWarning)
-                OrderedModel.from_formula(
-                    "apply ~ pared + public + gpa - 1",
-                    data={"apply": np.asarray(data['apply']),
-                          "pared": data['pared'],
-                          "public": data['public'],
-                          "gpa": data['gpa']},
-                    distr='probit')
+            OrderedModel.from_formula(
+                "apply ~ pared + public + gpa - 1",
+                data={"apply": np.asarray(data['apply']),
+                      "pared": data['pared'],
+                      "public": data['public'],
+                      "gpa": data['gpa']},
+                distr='probit')
 
 
 class TestLogitModelFormula():
 
     @classmethod
     def setup_class(cls):
-        warnings.simplefilter("ignore", SpecificationWarning)
         data = ds.df
         nobs = len(data)
         data["dummy"] = (np.arange(nobs) < (nobs / 2)).astype(float)
@@ -301,16 +291,18 @@ def test_setup(self):
             assert_allclose(resf1.predict(data[:5]), fittedvalues[:5])
 
         # test over parameterized model with implicit constant
-        # warns but doesn't raise
         formula = "apply ~ 0 + pared + public + gpa + C(dummy)"
 
-        with pytest.warns(SpecificationWarning):
-            modf2 = OrderedModel.from_formula(formula, data, distr='logit')
+        with pytest.raises(ValueError):
+            OrderedModel.from_formula(formula, data, distr='logit')
 
-        with pytest.warns(HessianInversionWarning):
+        # ignore constant, so we get results without exception
+        modf2 = OrderedModel.from_formula(formula, data, distr='logit',
+                                          hasconst=False)
+        # we get a warning in some environments
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore", HessianInversionWarning)
             resf2 = modf2.fit(method='bfgs')
-        assert resf2.converged is False
-        # assert np.isnan(resf2.bse).all()
 
         assert_allclose(resf2.predict(data[:5]), fittedvalues[:5], rtol=1e-4)
 
@@ -342,14 +334,14 @@ def _cdf(self, x):
                             distr=cloglog)
         resp = modp.fit(method='bfgs', disp=False)
 
-        with pytest.warns(UserWarning):
-            modf = OrderedModel.from_formula(
-                "apply ~ pared + public + gpa - 1",
-                data={"apply": data['apply'].values.codes,
-                      "pared": data['pared'],
-                      "public": data['public'],
-                      "gpa": data['gpa']},
-                distr=cloglog)
+        # with pytest.warns(UserWarning):
+        modf = OrderedModel.from_formula(
+            "apply ~ pared + public + gpa - 1",
+            data={"apply": data['apply'].values.codes,
+                  "pared": data['pared'],
+                  "public": data['public'],
+                  "gpa": data['gpa']},
+            distr=cloglog)
         resf = modf.fit(method='bfgs', disp=False)
 
         modu = OrderedModel(

From 97101b395a24ef3febd705c7881a92a101ec64ce Mon Sep 17 00:00:00 2001
From: Josef Perktold <josef.pktd@gmail.com>
Date: Mon, 21 Sep 2020 13:42:12 -0400
Subject: [PATCH 23/24] ENH/BUG: correct predict with offset

---
 statsmodels/miscmodels/ordinal_model.py       | 27 +++++++-------
 .../miscmodels/tests/test_ordinal_model.py    | 37 +++++++++++++++++++
 2 files changed, 51 insertions(+), 13 deletions(-)

diff --git a/statsmodels/miscmodels/ordinal_model.py b/statsmodels/miscmodels/ordinal_model.py
index a92f75f069c..df3d98b9546 100644
--- a/statsmodels/miscmodels/ordinal_model.py
+++ b/statsmodels/miscmodels/ordinal_model.py
@@ -245,21 +245,15 @@ def transform_reverse_threshold_params(self, params):
                                         np.log(np.diff(start_ppf[:-1]))))
         return thresh_params
 
-    def predict(self, params, exog=None):
+    def predict(self, params, exog=None, offset=None):
         """predicted probabilities for each level of the ordinal endog.
 
 
         """
-        if exog is None:
-            exog = self.exog
-        # structure of params = [beta, constants_or_thresholds]
+        # note, exog and offset handling is in linpred
 
-        # explicit in several steps to avoid bugs
-        th_params = params[-(self.k_levels - 1):]
-        thresh = np.concatenate((th_params[:1],
-                                 np.exp(th_params[1:]))).cumsum()
-        thresh = np.concatenate(([-np.inf], thresh, [np.inf]))
-        xb = exog.dot(params[:-(self.k_levels - 1)])[:, None]
+        thresh = self.transform_threshold_params(params)
+        xb = self._linpred(params, exog=exog, offset=offset)[:, None]
         low = thresh[:-1] - xb
         upp = thresh[1:] - xb
         prob = self.prob(low, upp)
@@ -272,10 +266,17 @@ def _linpred(self, params, exog=None, offset=None):
         """
         if exog is None:
             exog = self.exog
-        if offset is None:
-            offset = self.offset
+            if offset is None:
+                offset = self.offset
+        else:
+            if offset is None:
+                offset = 0
+
+        if offset is not None:
+            offset = np.asarray(offset)
+
         if exog is not None:
-            linpred = self.exog.dot(params[:-(self.k_levels - 1)])
+            linpred = exog.dot(params[:-(self.k_levels - 1)])
         else:  # means self.exog is also None
             linpred = np.zeros(self.nobs)
         if offset is not None:
diff --git a/statsmodels/miscmodels/tests/test_ordinal_model.py b/statsmodels/miscmodels/tests/test_ordinal_model.py
index 350e3e24219..22d4e280439 100644
--- a/statsmodels/miscmodels/tests/test_ordinal_model.py
+++ b/statsmodels/miscmodels/tests/test_ordinal_model.py
@@ -240,6 +240,8 @@ def test_formula_categorical(self):
         assert not hasattr(modf2, "frame")
 
         with pytest.raises(ValueError):
+            # only ordered categorical or numerical endog are allowed
+            # string endog raises ValueError
             OrderedModel.from_formula(
                 "apply ~ pared + public + gpa - 1",
                 data={"apply": np.asarray(data['apply']),
@@ -248,6 +250,41 @@ def test_formula_categorical(self):
                       "gpa": data['gpa']},
                 distr='probit')
 
+    def test_offset(self):
+
+        resp = self.resp
+        data = ds.df
+        offset = np.ones(len(data))
+
+        formula = "apply ~ pared + public + gpa - 1"
+        modf2 = OrderedModel.from_formula(formula, data, offset=offset,
+                                          distr='probit')
+        resf2 = modf2.fit(method='bfgs', disp=False)
+
+        assert_allclose(resf2.params[:3], resp.params[:3], atol=2e-4)
+        assert_allclose(resf2.params[3], resp.params[3] + 1, atol=2e-4)
+
+        fitted = resp.predict()
+        fitted2 = resf2.predict()
+        assert_allclose(fitted2, fitted, atol=2e-4)
+
+        pred_ones = resf2.predict(data[:6], offset=np.ones(6))
+        assert_allclose(pred_ones, fitted[:6], atol=2e-4)
+
+        # check default is 0. if exog provided
+        pred_zero1 = resf2.predict(data[:6])
+        pred_zero2 = resf2.predict(data[:6], offset=0)
+        assert_allclose(pred_zero1, pred_zero2, atol=2e-4)
+
+        # compare with equivalent results frp, no-offset model
+        pred_zero = resp.predict(data.iloc[:6, 1:], offset=-np.ones(6))
+        assert_allclose(pred_zero1, pred_zero, atol=2e-4)
+
+        params_adj = resp.params.copy()
+        params_adj[3] += 1
+        fitted_zero = resp.model.predict(params_adj)
+        assert_allclose(pred_zero1, fitted_zero[:6], atol=2e-4)
+
 
 class TestLogitModelFormula():
 

From ce0899b1984b77e56dfd51de2b75cf1c46449477 Mon Sep 17 00:00:00 2001
From: Josef Perktold <josef.pktd@gmail.com>
Date: Mon, 21 Sep 2020 16:17:27 -0400
Subject: [PATCH 24/24] BUG: fix df_resid, unit test compared to discrete Logit

---
 statsmodels/base/model.py                     |  7 +++
 statsmodels/miscmodels/ordinal_model.py       | 23 ++++++---
 .../miscmodels/tests/test_ordinal_model.py    | 49 +++++++++++++++++++
 3 files changed, 73 insertions(+), 6 deletions(-)

diff --git a/statsmodels/base/model.py b/statsmodels/base/model.py
index 03ae9b63349..c3a3f74b44e 100644
--- a/statsmodels/base/model.py
+++ b/statsmodels/base/model.py
@@ -2492,6 +2492,13 @@ def __init__(self, model, mlefit):
         self._cache = {}
         self.__dict__.update(mlefit.__dict__)
 
+        k_params = len(mlefit.params)
+        # checks mainly for adding new models or subclassing
+        if self.df_model + self.model.k_constant != k_params:
+            warnings.warn("df_model + k_constant differs from nparams")
+        if self.df_resid != self.nobs - k_params:
+            warnings.warn("df_resid differs from nobs - nparams")
+
     def summary(self, yname=None, xname=None, title=None, alpha=.05):
         """Summarize the Regression Results
 
diff --git a/statsmodels/miscmodels/ordinal_model.py b/statsmodels/miscmodels/ordinal_model.py
index df3d98b9546..a48939c40c7 100644
--- a/statsmodels/miscmodels/ordinal_model.py
+++ b/statsmodels/miscmodels/ordinal_model.py
@@ -90,7 +90,7 @@ def __init__(self, endog, exog, offset=None, distr='probit', **kwds):
         endog, labels, is_pandas = self._check_inputs(endog, exog)
 
         super(OrderedModel, self).__init__(endog, exog, **kwds)
-
+        k_levels = None  # initialize
         if not is_pandas:
             if self.endog.ndim == 1:
                 unique, index = np.unique(self.endog, return_inverse=True)
@@ -100,8 +100,8 @@ def __init__(self, endog, exog, offset=None, distr='probit', **kwds):
                 if not hasattr(self, "design_info"):
                     raise ValueError("2-dim endog not supported")
                 # this branch is currently only in support of from_formula
-                # labels here are only needed to choose k_levels in initialize
-                labels = [str(i) for i in range(self.endog.shape[1])]
+                # we need to initialize k_levels correctly for df_resid
+                k_levels = self.endog.shape[1]
                 labels = []
                 # Note: Doing the following here would break from_formula
                 # self.endog = self.endog.argmax(1)
@@ -109,7 +109,12 @@ def __init__(self, endog, exog, offset=None, distr='probit', **kwds):
         if self.k_constant > 0:
             raise ValueError("there should not be a constant in the model")
 
-        self._initialize_labels(labels)
+        self._initialize_labels(labels, k_levels=k_levels)
+
+        # adjust df
+        self.k_extra = self.k_levels - 1
+        self.df_model = self.k_vars + self.k_extra
+        self.df_resid = self.nobs - self.df_model
 
         self.results_class = OrderedResults
 
@@ -148,9 +153,12 @@ def _check_inputs(self, endog, exog):
 
         return endog, labels, is_pandas
 
-    def _initialize_labels(self, labels):
+    def _initialize_labels(self, labels, k_levels=None):
         self.labels = labels
-        self.k_levels = len(labels)
+        if k_levels is None:
+            self.k_levels = len(labels)
+        else:
+            self.k_levels = k_levels
 
         if self.exog is not None:
             self.nobs, self.k_vars = self.exog.shape
@@ -364,6 +372,9 @@ def fit(self, start_params=None, method='nm', maxiter=500, full_output=1,
         # use the proper result class
         ordmlefit = OrderedResults(self, mlefit)
 
+        # TODO: temporary, needs better fix, modelwc adds 1 by default
+        ordmlefit.hasconst = 0
+
         return ordmlefit
 
 
diff --git a/statsmodels/miscmodels/tests/test_ordinal_model.py b/statsmodels/miscmodels/tests/test_ordinal_model.py
index 22d4e280439..582ba08d0ca 100644
--- a/statsmodels/miscmodels/tests/test_ordinal_model.py
+++ b/statsmodels/miscmodels/tests/test_ordinal_model.py
@@ -13,6 +13,9 @@
 from .results.results_ordinal_model import data_store as ds
 from statsmodels.miscmodels.ordinal_model import OrderedModel
 
+from statsmodels.discrete.discrete_model import Logit
+from statsmodels.tools.tools import add_constant
+
 
 class CheckOrdinalModelMixin(object):
 
@@ -116,6 +119,9 @@ def test_results_other(self):
         fitted = self.resf.predict()
         assert_allclose(pred, fitted[-5:], rtol=1e-13)
 
+        n, k = res1.model.exog.shape
+        assert_equal(self.resf.df_resid, n - (k + 2))
+
 
 class TestLogitModel(CheckOrdinalModelMixin):
 
@@ -393,3 +399,46 @@ def _cdf(self, x):
         cls.resp = resp
         cls.resf = resf
         cls.resu = resu
+
+
+class TestLogitBinary():
+    # compare OrderedModel with discrete Logit for binary case
+    def test_attributes(self):
+        data = ds.df
+
+        mask_drop = data['apply'] == "somewhat likely"
+        data2 = data.loc[~mask_drop, :]
+        # we need to remove the category also from the Categorical Index
+        data2['apply'].cat.remove_categories("somewhat likely", inplace=True)
+
+        # standard fit with pandas input
+        modp = OrderedModel(data2['apply'],
+                            data2[['pared', 'public', 'gpa']],
+                            distr='logit')
+        resp = modp.fit(method='bfgs', disp=False)
+
+        exog = add_constant(data2[['pared', 'public', 'gpa']], prepend=False)
+        mod_logit = Logit(data2['apply'].cat.codes, exog)
+        res_logit = mod_logit.fit()
+
+        attributes = "bse df_resid llf aic bic".split()
+        assert_allclose(resp.params[:3], res_logit.params[:3], rtol=1e-5)
+        assert_allclose(resp.params[3], -res_logit.params[3], rtol=1e-5)
+        for attr in attributes:
+            assert_allclose(getattr(resp, attr), getattr(res_logit, attr),
+                            rtol=1e-4)
+
+        resp = modp.fit(method='bfgs', disp=False,
+                        cov_type="hac", cov_kwds={"maxlags": 2})
+        res_logit = mod_logit.fit(method='bfgs', disp=False,
+                                  cov_type="hac", cov_kwds={"maxlags": 2})
+        for attr in attributes:
+            assert_allclose(getattr(resp, attr), getattr(res_logit, attr),
+                            rtol=1e-4)
+
+        resp = modp.fit(method='bfgs', disp=False, cov_type="hc1")
+        res_logit = mod_logit.fit(method='bfgs', disp=False,
+                                  cov_type="hc1")
+        for attr in attributes:
+            assert_allclose(getattr(resp, attr), getattr(res_logit, attr),
+                            rtol=1e-4)