diff --git a/statsmodels/genmod/generalized_estimating_equations.py b/statsmodels/genmod/generalized_estimating_equations.py index d3e97bb5f28..6a14b276919 100644 --- a/statsmodels/genmod/generalized_estimating_equations.py +++ b/statsmodels/genmod/generalized_estimating_equations.py @@ -29,7 +29,7 @@ from scipy import stats import pandas as pd import patsy - +from collections import defaultdict from statsmodels.tools.decorators import cache_readonly import statsmodels.base.model as base # used for wrapper: @@ -516,10 +516,7 @@ def __init__(self, endog, exog, groups, time=None, family=None, self.constraint = constraint self.update_dep = update_dep - self._fit_history = {'params': [], - 'score': [], - 'dep_params': [], - 'cov_adjust': []} + self._fit_history = defaultdict(list) # Pass groups, time, offset, and dep_data so they are # processed for missing data along with endog and exog. @@ -805,6 +802,9 @@ def compare_score_test(self, submodel): if self.exog.shape[0] != submod.exog.shape[0]: msg = "Model and submodel have different numbers of cases." raise ValueError(msg) + if self.exog.shape[1] == submod.exog.shape[1]: + msg = "Model and submodel have the same number of variables" + warnings.warn(msg) if not isinstance(self.family, type(submod.family)): msg = "Model and submodel have different GLM families." warnings.warn(msg) @@ -1263,10 +1263,7 @@ def fit(self, maxiter=60, ctol=1e-6, start_params=None, self.scaling_factor = scaling_factor - self._fit_history = {'params': [], - 'score': [], - 'dep_params': [], - 'cov_adjust': []} + self._fit_history = defaultdict(list) if self.weights is not None and cov_type == 'naive': raise ValueError("when using weights, cov_type may not be naive") @@ -1368,7 +1365,7 @@ def fit(self, maxiter=60, ctol=1e-6, start_params=None, # attributes not needed during results__init__ results.fit_history = self._fit_history - delattr(self, "_fit_history") + self.fit_history = defaultdict(list) results.score_norm = del_params results.converged = (del_params < ctol) results.cov_struct = self.cov_struct @@ -1496,7 +1493,7 @@ class make sense when the model has been fit with regularization. mean_params = np.zeros(self.exog.shape[1]) self.update_cached_means(mean_params) converged = False - fit_history = {'params': []} + fit_history = defaultdict(list) # Subtract this number from the total sample size when # normalizing the scale parameter estimate. @@ -1915,11 +1912,21 @@ def qic(self, scale=None): """ Returns the QIC and QICu information criteria. - For families with a scale parameter (e.g. Gaussian), - provide as the scale argument the estimated scale - from the largest model under consideration. + For families with a scale parameter (e.g. Gaussian), provide + as the scale argument the estimated scale from the largest + model under consideration. + + If the scale parameter is not provided, the estimated scale + parameter is used. Doing this does not allow comparisons of + QIC values between models. """ + # It is easy to forget to set the scale parameter. Sometimes + # this is intentional, so we warn. + if scale is None: + msg = "QIC values obtained using scale=None are not appropriate for comparing models" + warnings.warn(msg) + if scale is None: scale = self.scale diff --git a/statsmodels/genmod/tests/test_gee.py b/statsmodels/genmod/tests/test_gee.py index af8ad30dbec..d60ef0814ae 100644 --- a/statsmodels/genmod/tests/test_gee.py +++ b/statsmodels/genmod/tests/test_gee.py @@ -635,14 +635,19 @@ def test_compare_score_test(self, cov_struct): mod_sub = gee.GEE(endog, exog_sub, group, cov_struct=cov_struct()) res_sub = mod_sub.fit() - mod = gee.GEE(endog, exog, group, cov_struct=cov_struct()) - score_results = mod.compare_score_test(res_sub) - assert_almost_equal(score_results["statistic"], - mod_lr.score_test_results["statistic"]) - assert_almost_equal(score_results["p-value"], - mod_lr.score_test_results["p-value"]) - assert_almost_equal(score_results["df"], - mod_lr.score_test_results["df"]) + + for call_fit in [False, True]: + mod = gee.GEE(endog, exog, group, cov_struct=cov_struct()) + if call_fit: + # Should work with or without fitting the parent model + mod.fit() + score_results = mod.compare_score_test(res_sub) + assert_almost_equal(score_results["statistic"], + mod_lr.score_test_results["statistic"]) + assert_almost_equal(score_results["p-value"], + mod_lr.score_test_results["p-value"]) + assert_almost_equal(score_results["df"], + mod_lr.score_test_results["df"]) def test_compare_score_test_warnings(self): @@ -682,6 +687,14 @@ def test_compare_score_test_warnings(self): mod = gee.GEE(endog, exog, group) _ = mod.compare_score_test(res_sub) + # Parent and submodel are the same dimension + with pytest.warns(UserWarning): + w = np.random.uniform(size=n) + mod_sub = gee.GEE(endog, exog, group) + res_sub = mod_sub.fit() + mod = gee.GEE(endog, exog, group) + _ = mod.compare_score_test(res_sub) + def test_constraint_covtype(self): # Test constraints with different cov types np.random.seed(6432) @@ -1883,8 +1896,10 @@ def test_ql_known(family): assert_allclose(ql1, qle1[0], rtol=1e-4) assert_allclose(ql2, qle2[0], rtol=1e-4) - qler1 = result1.qic() - qler2 = result2.qic() + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + qler1 = result1.qic() + qler2 = result2.qic() assert_equal(qler1, qle1[1:]) assert_equal(qler2, qle2[1:]) @@ -1922,3 +1937,11 @@ def test_ql_diff(family): qle2, _, _ = model2.qic(result2.params, result2.scale, result2.cov_params()) assert_allclose(qle1 - qle2, qldiff, rtol=1e-5, atol=1e-5) + +def test_qic_warnings(): + with pytest.warns(UserWarning): + fam = families.Gaussian() + y, x1, _, g = simple_qic_data(fam) + model = gee.GEE(y, x1, family=fam, groups=g) + result = model.fit() + result.qic() diff --git a/statsmodels/genmod/tests/test_glm.py b/statsmodels/genmod/tests/test_glm.py index 4acfa59c7c4..284701d179a 100644 --- a/statsmodels/genmod/tests/test_glm.py +++ b/statsmodels/genmod/tests/test_glm.py @@ -1989,7 +1989,7 @@ def test_tweedie_EQL_upper_limit(): # Un-regularized fit using gradients not IRLS fam = sm.families.Tweedie(var_power=2, eql=True) model1 = sm.GLM(y, x, family=fam) - result1 = model1.fit(method="newton") + result1 = model1.fit(method="newton", scale=scale) assert_allclose(result1.params, np.r_[4, 1, 1], atol=1e-3, rtol=1e-1)