From c05ceadb5e5de8d4480e3df938a447f43869c98e Mon Sep 17 00:00:00 2001 From: Josef Perktold Date: Sat, 5 Sep 2020 16:19:44 -0400 Subject: [PATCH] ENH: properly connect param_names, give pandas codes endog to super --- statsmodels/miscmodels/ordinal_model.py | 58 ++++++++++--------- .../miscmodels/tests/test_ordinal_model.py | 17 +++++- 2 files changed, 44 insertions(+), 31 deletions(-) diff --git a/statsmodels/miscmodels/ordinal_model.py b/statsmodels/miscmodels/ordinal_model.py index c0922dad0bd..7a146521055 100644 --- a/statsmodels/miscmodels/ordinal_model.py +++ b/statsmodels/miscmodels/ordinal_model.py @@ -85,19 +85,32 @@ def __init__(self, endog, exog, offset=None, distr='probit', **kwds): # TODO: check if super can handle offset self.offset = offset - self.names, endog, exog = self._check_inputs(endog, exog) + endog, labels, is_pandas = self._check_inputs(endog, exog) super(OrderedModel, self).__init__(endog, exog, **kwds) - unique, index = np.unique(self.endog, return_inverse=True) - self.k_levels = len(unique) - self.endog = index - self.labels = unique + if not is_pandas: + unique, index = np.unique(self.endog, return_inverse=True) + self.endog = index + labels = unique + + self.labels = labels + self.k_levels = len(labels) if self.exog is not None: self.nobs, self.k_vars = self.exog.shape else: # no exog in model self.nobs, self.k_vars = self.endog.shape[0], 0 + + threshold_names = [str(x) + '/' + str(y) + for x, y in zip(labels[:-1], labels[1:])] + + # from GenericLikelihoodModel.fit + if self.exog is not None: + self.exog_names.extend(threshold_names) + else: + self.data.xnames = threshold_names + self.results_class = OrderedResults def _check_inputs(self, endog, exog): @@ -106,20 +119,14 @@ def _check_inputs(self, endog, exog): support for endog and exog. Also retrieves columns & categories names for .summary() of the results class. """ - names = {} if not isinstance(self.distr, stats.rv_continuous): msg = ( f"{self.distr.name} must be a scipy.stats distribution." ) raise ValueError(msg) - # Pandas' support - if (isinstance(exog, pd.DataFrame)) or (isinstance(exog, pd.Series)): - exog_name = ([exog.name] if isinstance(exog, pd.Series) - else exog.columns.tolist()) - names['xname'] = exog_name - # exog = np.asarray(exog) - + labels = None + is_pandas = False if isinstance(endog, pd.Series): if isinstance(endog.dtypes, CategoricalDtype): if not endog.dtype.ordered: @@ -129,20 +136,19 @@ def _check_inputs(self, endog, exog): "categories. ordered == True preferred.", Warning) endog_name = endog.name - threshold_name = [str(x) + '/' + str(y) - for x, y in zip(endog.values.categories[:-1], - endog.values.categories[1:])] - names['yname'] = endog_name - names['xname'] = names['xname'] + threshold_name - endog = np.asarray(endog.values.codes) + labels = endog.values.categories + endog = endog.cat.codes + if endog.min() == -1: # means there is a missing value + raise ValueError("missing values in categorical endog are " + "not supported") + endog.name = endog_name + is_pandas = True else: - msg = ( - "If the endog is a pandas.Serie " - "it must be of categoricalDtype." - ) + msg = ("If endog is a pandas.Series, " + "it must be of CategoricalDtype.") raise ValueError(msg) - return names, endog, exog + return endog, labels, is_pandas def cdf(self, x): """cdf evaluated at x @@ -331,7 +337,3 @@ def pred_table(self): table = pd.crosstab(observed, predicted, margins=True, dropna=False) return table - @Appender(GenericLikelihoodModelResults.summary.__doc__) - def summary(self, yname=None, xname=None, title=None, alpha=.05): - names = self.model.names - return super(OrderedResults, self).summary(**names) diff --git a/statsmodels/miscmodels/tests/test_ordinal_model.py b/statsmodels/miscmodels/tests/test_ordinal_model.py index 28e14403660..9fe9612b86d 100644 --- a/statsmodels/miscmodels/tests/test_ordinal_model.py +++ b/statsmodels/miscmodels/tests/test_ordinal_model.py @@ -72,7 +72,16 @@ def test_unordered(self): def test_results_other(self): - res1 = self.res1 + res1 = self.res1 # numpy + resp = self.resp # pandas + + param_names_np = ['x1', 'x2', 'x3', '0/1', '1/2'] + param_names_pd = ['pared', 'public', 'gpa', 'unlikely/somewhat likely', + 'somewhat likely/very likely'] + + assert res1.model.data.param_names == param_names_np + assert self.resp.model.data.param_names == param_names_pd + assert self.resp.model.endog_names == "apply" # results if hasattr(self, "pred_table"): @@ -85,7 +94,9 @@ def test_results_other(self): # inherited tt = res1.t_test(np.eye(len(res1.params))) assert_allclose(tt.pvalue, res1.pvalues, rtol=1e-13) - # TODO: test using string definition of constraints + + tt = resp.t_test(['pared', 'public', 'gpa']) # pandas names + assert_allclose(tt.pvalue, res1.pvalues[:3], rtol=1e-13) pred = res1.predict(exog=res1.model.exog[-5:]) fitted = res1.predict() @@ -194,7 +205,7 @@ def test_loglikerelated(self): # null model mod_null = OrderedModel(mod.endog, None, offset=np.zeros(mod.nobs), - distr='probit') + distr=mod.distr) null_params = mod.start_params res_null = mod_null.fit(method='bfgs', disp=False) assert_allclose(res_null.params, null_params[mod.k_vars:], rtol=1e-8)