Merge f835d24 into 8f7bd8c

statsmodels · Nov 26, 2020 · 6c1248b · 6c1248b
2 parents 8f7bd8c + f835d24
commit 6c1248b
Show file tree

Hide file tree

Showing 2 changed files with 101 additions and 12 deletions.
diff --git a/statsmodels/genmod/generalized_linear_model.py b/statsmodels/genmod/generalized_linear_model.py
@@ -1670,27 +1670,44 @@ def llnull(self):
                                    freq_weights=self._freq_weights,
                                    scale=self.scale)
 
-    @cached_value
-    def llf(self):
+    def llf_scaled(self, scale=None):
         """
-        Value of the loglikelihood function evalued at params.
-        See statsmodels.families.family for distribution-specific
-        loglikelihoods.
+        Return the log-likelihood at the given scale, using the
+        estimated scale if the provided scale is None.  In the Gaussian
+        case with linear link, the concentrated log-likelihood is
+        returned.
         """
+
         _modelfamily = self.family
-        if (isinstance(self.family, families.Gaussian) and
-                isinstance(self.family.link, families.links.Power) and
-                (self.family.link.power == 1.)):
-            scale = (np.power(self._endog - self.mu, 2) * self._iweights).sum()
-            scale /= self.model.wnobs
-        else:
-            scale = self.scale
+        if scale is None:
+            if (isinstance(self.family, families.Gaussian) and
+                    isinstance(self.family.link, families.links.Power) and
+                    (self.family.link.power == 1.)):
+                # Scale for the concentrated Gaussian log likelihood
+                # (profile log likelihood with the scale parameter
+                # profiled out).
+                scale = (np.power(self._endog - self.mu, 2) * self._iweights).sum()
+                scale /= self.model.wnobs
+            else:
+                scale = self.scale
         val = _modelfamily.loglike(self._endog, self.mu,
                                    var_weights=self._var_weights,
                                    freq_weights=self._freq_weights,
                                    scale=scale)
         return val
 
+    @cached_value
+    def llf(self):
+        """
+        Value of the loglikelihood function evalued at params.
+        See statsmodels.families.family for distribution-specific
+        loglikelihoods.  The result uses the concentrated
+        log-likelihood if the family is Gaussian and the link is linear,
+        otherwise it uses the non-concentrated log-likelihood evaluated
+        at the estimated scale.
+        """
+        return self.llf_scaled()
+
     @cached_value
     def aic(self):
         """
@@ -1759,6 +1776,54 @@ def bic_llf(self):
             self.df_model+self.df_resid+1
         )
 
+    def infocrit(self, ic_type, scale=None):
+        """Return an information criterion for the model.
+
+        Parameters
+        ----------
+        ic_type : string
+            One of aic, bic, or qaic
+        scale : float
+            The scale parameter estimated using the parent model,
+            used only for qaic.
+
+        Returns the given information criterion value.
+
+        Notes
+        -----
+        The quasi-Akaike Information criterion (qaic) is -2 *
+        `llf`/`scale` + 2 * (`df_model` + 1).  It may not give
+        meaningful results except for Poisson and related models.
+
+        The QAIC (ic_type='qaic') must be evaluated with a provided
+        scale parameter.  Two QAIC values are only comparable if they
+        are calculated using the same scale parameter.  The scale
+        parameter should be estimated using the largest model among
+        all models being compared.
+
+        References
+        ----------
+        Burnham KP, Anderson KR (2002). Model Selection and Multimodel
+        Inference; Springer New York.
+        """
+
+        ic_type = ic_type.lower()
+
+        if ic_type == "aic":
+            return self.aic
+        elif ic_type == "bic":
+            return self.bic
+        elif ic_type == "qaic":
+            f = self.model.family
+            fl = (families.Poisson, families.NegativeBinomial,
+                  families.Binomial)
+            if not isinstance(f, fl):
+                msg = "QAIC is only valid for Binomial, Poisson and "
+                msg += "Negative Binomial families."
+                warnings.warn(msg)
+            llf = self.llf_scaled(scale=1)
+            return -2 * llf/scale + 2 * (self.df_model + 1)
+
     @Appender(pred.get_prediction_glm.__doc__)
     def get_prediction(self, exog=None, exposure=None, offset=None,
                        transform=True, linear=False,

diff --git a/statsmodels/genmod/tests/test_glm.py b/statsmodels/genmod/tests/test_glm.py
@@ -2401,3 +2401,27 @@ def test_output_exposure_null(reset_randomstate):
     assert_allclose(model.llnull, null_model.llf)
     # Check that they are different
     assert np.abs(null_model_without_exposure.llf - model.llnull) > 1
+
+def test_qaic():
+
+    # Example from documentation of R package MuMIn
+    import patsy
+    ldose = np.concatenate((np.arange(6), np.arange(6)))
+    sex = ["M"]*6 + ["F"]*6
+    numdead = [10, 4, 9, 12, 18, 20, 0, 2, 6, 10, 12, 16]
+    df = pd.DataFrame({"ldose": ldose, "sex": sex, "numdead": numdead})
+    df["numalive"] = 20 - df["numdead"]
+    df["SF"] = df["numdead"]
+
+    y = df[["numalive", "numdead"]].values
+    x = patsy.dmatrix("sex*ldose", data=df, return_type='dataframe')
+    m = GLM(y, x, family=sm.families.Binomial())
+    r = m.fit()
+    scale = 2.412699
+    qaic = r.infocrit(ic_type="qaic", scale=scale)
+
+    # R gives 31.13266 because it uses a df that is 1 greater,
+    # presumably because they count the scale parameter in df.
+    # This won't matter when comparing models by differencing
+    # QAICs.
+    assert_allclose(qaic, 29.13266, rtol=1e-5, atol=1e-5)