Skip to content

Commit

Permalink
BUG: fix df_resid, unit test compared to discrete Logit
Browse files Browse the repository at this point in the history
  • Loading branch information
josef-pkt committed Sep 21, 2020
1 parent 97101b3 commit ce0899b
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 6 deletions.
7 changes: 7 additions & 0 deletions statsmodels/base/model.py
Expand Up @@ -2492,6 +2492,13 @@ def __init__(self, model, mlefit):
self._cache = {}
self.__dict__.update(mlefit.__dict__)

k_params = len(mlefit.params)
# checks mainly for adding new models or subclassing
if self.df_model + self.model.k_constant != k_params:
warnings.warn("df_model + k_constant differs from nparams")
if self.df_resid != self.nobs - k_params:
warnings.warn("df_resid differs from nobs - nparams")

def summary(self, yname=None, xname=None, title=None, alpha=.05):
"""Summarize the Regression Results
Expand Down
23 changes: 17 additions & 6 deletions statsmodels/miscmodels/ordinal_model.py
Expand Up @@ -90,7 +90,7 @@ def __init__(self, endog, exog, offset=None, distr='probit', **kwds):
endog, labels, is_pandas = self._check_inputs(endog, exog)

super(OrderedModel, self).__init__(endog, exog, **kwds)

k_levels = None # initialize
if not is_pandas:
if self.endog.ndim == 1:
unique, index = np.unique(self.endog, return_inverse=True)
Expand All @@ -100,16 +100,21 @@ def __init__(self, endog, exog, offset=None, distr='probit', **kwds):
if not hasattr(self, "design_info"):
raise ValueError("2-dim endog not supported")
# this branch is currently only in support of from_formula
# labels here are only needed to choose k_levels in initialize
labels = [str(i) for i in range(self.endog.shape[1])]
# we need to initialize k_levels correctly for df_resid
k_levels = self.endog.shape[1]
labels = []
# Note: Doing the following here would break from_formula
# self.endog = self.endog.argmax(1)

if self.k_constant > 0:
raise ValueError("there should not be a constant in the model")

self._initialize_labels(labels)
self._initialize_labels(labels, k_levels=k_levels)

# adjust df
self.k_extra = self.k_levels - 1
self.df_model = self.k_vars + self.k_extra
self.df_resid = self.nobs - self.df_model

self.results_class = OrderedResults

Expand Down Expand Up @@ -148,9 +153,12 @@ def _check_inputs(self, endog, exog):

return endog, labels, is_pandas

def _initialize_labels(self, labels):
def _initialize_labels(self, labels, k_levels=None):
self.labels = labels
self.k_levels = len(labels)
if k_levels is None:
self.k_levels = len(labels)
else:
self.k_levels = k_levels

if self.exog is not None:
self.nobs, self.k_vars = self.exog.shape
Expand Down Expand Up @@ -364,6 +372,9 @@ def fit(self, start_params=None, method='nm', maxiter=500, full_output=1,
# use the proper result class
ordmlefit = OrderedResults(self, mlefit)

# TODO: temporary, needs better fix, modelwc adds 1 by default
ordmlefit.hasconst = 0

return ordmlefit


Expand Down
49 changes: 49 additions & 0 deletions statsmodels/miscmodels/tests/test_ordinal_model.py
Expand Up @@ -13,6 +13,9 @@
from .results.results_ordinal_model import data_store as ds
from statsmodels.miscmodels.ordinal_model import OrderedModel

from statsmodels.discrete.discrete_model import Logit
from statsmodels.tools.tools import add_constant


class CheckOrdinalModelMixin(object):

Expand Down Expand Up @@ -116,6 +119,9 @@ def test_results_other(self):
fitted = self.resf.predict()
assert_allclose(pred, fitted[-5:], rtol=1e-13)

n, k = res1.model.exog.shape
assert_equal(self.resf.df_resid, n - (k + 2))


class TestLogitModel(CheckOrdinalModelMixin):

Expand Down Expand Up @@ -393,3 +399,46 @@ def _cdf(self, x):
cls.resp = resp
cls.resf = resf
cls.resu = resu


class TestLogitBinary():
# compare OrderedModel with discrete Logit for binary case
def test_attributes(self):
data = ds.df

mask_drop = data['apply'] == "somewhat likely"
data2 = data.loc[~mask_drop, :]
# we need to remove the category also from the Categorical Index
data2['apply'].cat.remove_categories("somewhat likely", inplace=True)

# standard fit with pandas input
modp = OrderedModel(data2['apply'],
data2[['pared', 'public', 'gpa']],
distr='logit')
resp = modp.fit(method='bfgs', disp=False)

exog = add_constant(data2[['pared', 'public', 'gpa']], prepend=False)
mod_logit = Logit(data2['apply'].cat.codes, exog)
res_logit = mod_logit.fit()

attributes = "bse df_resid llf aic bic".split()
assert_allclose(resp.params[:3], res_logit.params[:3], rtol=1e-5)
assert_allclose(resp.params[3], -res_logit.params[3], rtol=1e-5)
for attr in attributes:
assert_allclose(getattr(resp, attr), getattr(res_logit, attr),
rtol=1e-4)

resp = modp.fit(method='bfgs', disp=False,
cov_type="hac", cov_kwds={"maxlags": 2})
res_logit = mod_logit.fit(method='bfgs', disp=False,
cov_type="hac", cov_kwds={"maxlags": 2})
for attr in attributes:
assert_allclose(getattr(resp, attr), getattr(res_logit, attr),
rtol=1e-4)

resp = modp.fit(method='bfgs', disp=False, cov_type="hc1")
res_logit = mod_logit.fit(method='bfgs', disp=False,
cov_type="hc1")
for attr in attributes:
assert_allclose(getattr(resp, attr), getattr(res_logit, attr),
rtol=1e-4)

0 comments on commit ce0899b

Please sign in to comment.