Skip to content

Commit

Permalink
Merge f835d24 into 8f7bd8c
Browse files Browse the repository at this point in the history
  • Loading branch information
kshedden committed Nov 26, 2020
2 parents 8f7bd8c + f835d24 commit 6c1248b
Show file tree
Hide file tree
Showing 2 changed files with 101 additions and 12 deletions.
89 changes: 77 additions & 12 deletions statsmodels/genmod/generalized_linear_model.py
Expand Up @@ -1670,27 +1670,44 @@ def llnull(self):
freq_weights=self._freq_weights,
scale=self.scale)

@cached_value
def llf(self):
def llf_scaled(self, scale=None):
"""
Value of the loglikelihood function evalued at params.
See statsmodels.families.family for distribution-specific
loglikelihoods.
Return the log-likelihood at the given scale, using the
estimated scale if the provided scale is None. In the Gaussian
case with linear link, the concentrated log-likelihood is
returned.
"""

_modelfamily = self.family
if (isinstance(self.family, families.Gaussian) and
isinstance(self.family.link, families.links.Power) and
(self.family.link.power == 1.)):
scale = (np.power(self._endog - self.mu, 2) * self._iweights).sum()
scale /= self.model.wnobs
else:
scale = self.scale
if scale is None:
if (isinstance(self.family, families.Gaussian) and
isinstance(self.family.link, families.links.Power) and
(self.family.link.power == 1.)):
# Scale for the concentrated Gaussian log likelihood
# (profile log likelihood with the scale parameter
# profiled out).
scale = (np.power(self._endog - self.mu, 2) * self._iweights).sum()
scale /= self.model.wnobs
else:
scale = self.scale
val = _modelfamily.loglike(self._endog, self.mu,
var_weights=self._var_weights,
freq_weights=self._freq_weights,
scale=scale)
return val

@cached_value
def llf(self):
"""
Value of the loglikelihood function evalued at params.
See statsmodels.families.family for distribution-specific
loglikelihoods. The result uses the concentrated
log-likelihood if the family is Gaussian and the link is linear,
otherwise it uses the non-concentrated log-likelihood evaluated
at the estimated scale.
"""
return self.llf_scaled()

@cached_value
def aic(self):
"""
Expand Down Expand Up @@ -1759,6 +1776,54 @@ def bic_llf(self):
self.df_model+self.df_resid+1
)

def infocrit(self, ic_type, scale=None):
"""Return an information criterion for the model.
Parameters
----------
ic_type : string
One of aic, bic, or qaic
scale : float
The scale parameter estimated using the parent model,
used only for qaic.
Returns the given information criterion value.
Notes
-----
The quasi-Akaike Information criterion (qaic) is -2 *
`llf`/`scale` + 2 * (`df_model` + 1). It may not give
meaningful results except for Poisson and related models.
The QAIC (ic_type='qaic') must be evaluated with a provided
scale parameter. Two QAIC values are only comparable if they
are calculated using the same scale parameter. The scale
parameter should be estimated using the largest model among
all models being compared.
References
----------
Burnham KP, Anderson KR (2002). Model Selection and Multimodel
Inference; Springer New York.
"""

ic_type = ic_type.lower()

if ic_type == "aic":
return self.aic
elif ic_type == "bic":
return self.bic
elif ic_type == "qaic":
f = self.model.family
fl = (families.Poisson, families.NegativeBinomial,
families.Binomial)
if not isinstance(f, fl):
msg = "QAIC is only valid for Binomial, Poisson and "
msg += "Negative Binomial families."
warnings.warn(msg)
llf = self.llf_scaled(scale=1)
return -2 * llf/scale + 2 * (self.df_model + 1)

@Appender(pred.get_prediction_glm.__doc__)
def get_prediction(self, exog=None, exposure=None, offset=None,
transform=True, linear=False,
Expand Down
24 changes: 24 additions & 0 deletions statsmodels/genmod/tests/test_glm.py
Expand Up @@ -2401,3 +2401,27 @@ def test_output_exposure_null(reset_randomstate):
assert_allclose(model.llnull, null_model.llf)
# Check that they are different
assert np.abs(null_model_without_exposure.llf - model.llnull) > 1

def test_qaic():

# Example from documentation of R package MuMIn
import patsy
ldose = np.concatenate((np.arange(6), np.arange(6)))
sex = ["M"]*6 + ["F"]*6
numdead = [10, 4, 9, 12, 18, 20, 0, 2, 6, 10, 12, 16]
df = pd.DataFrame({"ldose": ldose, "sex": sex, "numdead": numdead})
df["numalive"] = 20 - df["numdead"]
df["SF"] = df["numdead"]

y = df[["numalive", "numdead"]].values
x = patsy.dmatrix("sex*ldose", data=df, return_type='dataframe')
m = GLM(y, x, family=sm.families.Binomial())
r = m.fit()
scale = 2.412699
qaic = r.infocrit(ic_type="qaic", scale=scale)

# R gives 31.13266 because it uses a df that is 1 greater,
# presumably because they count the scale parameter in df.
# This won't matter when comparing models by differencing
# QAICs.
assert_allclose(qaic, 29.13266, rtol=1e-5, atol=1e-5)

0 comments on commit 6c1248b

Please sign in to comment.