Skip to content

Commit

Permalink
ENH: properly connect param_names, give pandas codes endog to super
Browse files Browse the repository at this point in the history
  • Loading branch information
josef-pkt committed Sep 5, 2020
1 parent 2082ad6 commit c05cead
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 31 deletions.
58 changes: 30 additions & 28 deletions statsmodels/miscmodels/ordinal_model.py
Expand Up @@ -85,19 +85,32 @@ def __init__(self, endog, exog, offset=None, distr='probit', **kwds):
# TODO: check if super can handle offset
self.offset = offset

self.names, endog, exog = self._check_inputs(endog, exog)
endog, labels, is_pandas = self._check_inputs(endog, exog)

super(OrderedModel, self).__init__(endog, exog, **kwds)

unique, index = np.unique(self.endog, return_inverse=True)
self.k_levels = len(unique)
self.endog = index
self.labels = unique
if not is_pandas:
unique, index = np.unique(self.endog, return_inverse=True)
self.endog = index
labels = unique

self.labels = labels
self.k_levels = len(labels)

if self.exog is not None:
self.nobs, self.k_vars = self.exog.shape
else: # no exog in model
self.nobs, self.k_vars = self.endog.shape[0], 0

threshold_names = [str(x) + '/' + str(y)
for x, y in zip(labels[:-1], labels[1:])]

# from GenericLikelihoodModel.fit
if self.exog is not None:
self.exog_names.extend(threshold_names)
else:
self.data.xnames = threshold_names

self.results_class = OrderedResults

def _check_inputs(self, endog, exog):
Expand All @@ -106,20 +119,14 @@ def _check_inputs(self, endog, exog):
support for endog and exog. Also retrieves columns & categories
names for .summary() of the results class.
"""
names = {}
if not isinstance(self.distr, stats.rv_continuous):
msg = (
f"{self.distr.name} must be a scipy.stats distribution."
)
raise ValueError(msg)

# Pandas' support
if (isinstance(exog, pd.DataFrame)) or (isinstance(exog, pd.Series)):
exog_name = ([exog.name] if isinstance(exog, pd.Series)
else exog.columns.tolist())
names['xname'] = exog_name
# exog = np.asarray(exog)

labels = None
is_pandas = False
if isinstance(endog, pd.Series):
if isinstance(endog.dtypes, CategoricalDtype):
if not endog.dtype.ordered:
Expand All @@ -129,20 +136,19 @@ def _check_inputs(self, endog, exog):
"categories. ordered == True preferred.",
Warning)
endog_name = endog.name
threshold_name = [str(x) + '/' + str(y)
for x, y in zip(endog.values.categories[:-1],
endog.values.categories[1:])]
names['yname'] = endog_name
names['xname'] = names['xname'] + threshold_name
endog = np.asarray(endog.values.codes)
labels = endog.values.categories
endog = endog.cat.codes
if endog.min() == -1: # means there is a missing value
raise ValueError("missing values in categorical endog are "
"not supported")
endog.name = endog_name
is_pandas = True
else:
msg = (
"If the endog is a pandas.Serie "
"it must be of categoricalDtype."
)
msg = ("If endog is a pandas.Series, "
"it must be of CategoricalDtype.")
raise ValueError(msg)

return names, endog, exog
return endog, labels, is_pandas

def cdf(self, x):
"""cdf evaluated at x
Expand Down Expand Up @@ -331,7 +337,3 @@ def pred_table(self):
table = pd.crosstab(observed, predicted, margins=True, dropna=False)
return table

@Appender(GenericLikelihoodModelResults.summary.__doc__)
def summary(self, yname=None, xname=None, title=None, alpha=.05):
names = self.model.names
return super(OrderedResults, self).summary(**names)
17 changes: 14 additions & 3 deletions statsmodels/miscmodels/tests/test_ordinal_model.py
Expand Up @@ -72,7 +72,16 @@ def test_unordered(self):

def test_results_other(self):

res1 = self.res1
res1 = self.res1 # numpy
resp = self.resp # pandas

param_names_np = ['x1', 'x2', 'x3', '0/1', '1/2']
param_names_pd = ['pared', 'public', 'gpa', 'unlikely/somewhat likely',
'somewhat likely/very likely']

assert res1.model.data.param_names == param_names_np
assert self.resp.model.data.param_names == param_names_pd
assert self.resp.model.endog_names == "apply"

# results
if hasattr(self, "pred_table"):
Expand All @@ -85,7 +94,9 @@ def test_results_other(self):
# inherited
tt = res1.t_test(np.eye(len(res1.params)))
assert_allclose(tt.pvalue, res1.pvalues, rtol=1e-13)
# TODO: test using string definition of constraints

tt = resp.t_test(['pared', 'public', 'gpa']) # pandas names
assert_allclose(tt.pvalue, res1.pvalues[:3], rtol=1e-13)

pred = res1.predict(exog=res1.model.exog[-5:])
fitted = res1.predict()
Expand Down Expand Up @@ -194,7 +205,7 @@ def test_loglikerelated(self):
# null model
mod_null = OrderedModel(mod.endog, None,
offset=np.zeros(mod.nobs),
distr='probit')
distr=mod.distr)
null_params = mod.start_params
res_null = mod_null.fit(method='bfgs', disp=False)
assert_allclose(res_null.params, null_params[mod.k_vars:], rtol=1e-8)
Expand Down

0 comments on commit c05cead

Please sign in to comment.