From d5d3acb0711d5a48e67d02bba7a2a710f0973894 Mon Sep 17 00:00:00 2001 From: Kerby Shedden Date: Fri, 24 May 2019 22:34:36 -0400 Subject: [PATCH 1/8] Clear but don't delete fit_history from model --- .../generalized_estimating_equations.py | 16 +++++--------- statsmodels/genmod/tests/test_gee.py | 21 ++++++++++++------- statsmodels/genmod/tests/test_glm.py | 2 +- 3 files changed, 19 insertions(+), 20 deletions(-) diff --git a/statsmodels/genmod/generalized_estimating_equations.py b/statsmodels/genmod/generalized_estimating_equations.py index c3c9a955bb4..9090ef9e44a 100644 --- a/statsmodels/genmod/generalized_estimating_equations.py +++ b/statsmodels/genmod/generalized_estimating_equations.py @@ -29,7 +29,7 @@ from scipy import stats import pandas as pd import patsy - +from collections import defaultdict from statsmodels.tools.decorators import cache_readonly import statsmodels.base.model as base # used for wrapper: @@ -512,10 +512,7 @@ def __init__(self, endog, exog, groups, time=None, family=None, self.constraint = constraint self.update_dep = update_dep - self._fit_history = {'params': [], - 'score': [], - 'dep_params': [], - 'cov_adjust': []} + self._fit_history = defaultdict(list) # Pass groups, time, offset, and dep_data so they are # processed for missing data along with endog and exog. @@ -1259,10 +1256,7 @@ def fit(self, maxiter=60, ctol=1e-6, start_params=None, self.scaling_factor = scaling_factor - self._fit_history = {'params': [], - 'score': [], - 'dep_params': [], - 'cov_adjust': []} + self._fit_history = defaultdict(list) if self.weights is not None and cov_type == 'naive': raise ValueError("when using weights, cov_type may not be naive") @@ -1364,7 +1358,7 @@ def fit(self, maxiter=60, ctol=1e-6, start_params=None, # attributes not needed during results__init__ results.fit_history = self._fit_history - delattr(self, "_fit_history") + self.fit_history = defaultdict(list) results.score_norm = del_params results.converged = (del_params < ctol) results.cov_struct = self.cov_struct @@ -1492,7 +1486,7 @@ class make sense when the model has been fit with regularization. mean_params = np.zeros(self.exog.shape[1]) self.update_cached_means(mean_params) converged = False - fit_history = {'params': []} + fit_history = defaultdict(list) # Subtract this number from the total sample size when # normalizing the scale parameter estimate. diff --git a/statsmodels/genmod/tests/test_gee.py b/statsmodels/genmod/tests/test_gee.py index af8ad30dbec..dda12bb06a4 100644 --- a/statsmodels/genmod/tests/test_gee.py +++ b/statsmodels/genmod/tests/test_gee.py @@ -635,14 +635,19 @@ def test_compare_score_test(self, cov_struct): mod_sub = gee.GEE(endog, exog_sub, group, cov_struct=cov_struct()) res_sub = mod_sub.fit() - mod = gee.GEE(endog, exog, group, cov_struct=cov_struct()) - score_results = mod.compare_score_test(res_sub) - assert_almost_equal(score_results["statistic"], - mod_lr.score_test_results["statistic"]) - assert_almost_equal(score_results["p-value"], - mod_lr.score_test_results["p-value"]) - assert_almost_equal(score_results["df"], - mod_lr.score_test_results["df"]) + + for f in False, True: + mod = gee.GEE(endog, exog, group, cov_struct=cov_struct()) + if f: + # Should work with or without fitting the parent model + mod.fit() + score_results = mod.compare_score_test(res_sub) + assert_almost_equal(score_results["statistic"], + mod_lr.score_test_results["statistic"]) + assert_almost_equal(score_results["p-value"], + mod_lr.score_test_results["p-value"]) + assert_almost_equal(score_results["df"], + mod_lr.score_test_results["df"]) def test_compare_score_test_warnings(self): diff --git a/statsmodels/genmod/tests/test_glm.py b/statsmodels/genmod/tests/test_glm.py index 4acfa59c7c4..284701d179a 100644 --- a/statsmodels/genmod/tests/test_glm.py +++ b/statsmodels/genmod/tests/test_glm.py @@ -1989,7 +1989,7 @@ def test_tweedie_EQL_upper_limit(): # Un-regularized fit using gradients not IRLS fam = sm.families.Tweedie(var_power=2, eql=True) model1 = sm.GLM(y, x, family=fam) - result1 = model1.fit(method="newton") + result1 = model1.fit(method="newton", scale=scale) assert_allclose(result1.params, np.r_[4, 1, 1], atol=1e-3, rtol=1e-1) From 7a4418f8c3257fafc6cebb81b7f2bfd96318aacc Mon Sep 17 00:00:00 2001 From: Kerby Shedden Date: Fri, 24 May 2019 22:57:29 -0400 Subject: [PATCH 2/8] Add warning for score test with exog versus same exog --- statsmodels/genmod/generalized_estimating_equations.py | 3 +++ statsmodels/genmod/tests/test_gee.py | 8 ++++++++ 2 files changed, 11 insertions(+) diff --git a/statsmodels/genmod/generalized_estimating_equations.py b/statsmodels/genmod/generalized_estimating_equations.py index 9090ef9e44a..f8f34708812 100644 --- a/statsmodels/genmod/generalized_estimating_equations.py +++ b/statsmodels/genmod/generalized_estimating_equations.py @@ -798,6 +798,9 @@ def compare_score_test(self, submodel): if self.exog.shape[0] != submod.exog.shape[0]: msg = "Model and submodel have different numbers of cases." raise ValueError(msg) + if self.exog.shape[1] == submod.exog.shape[1]: + msg = "Model and submodel have the same number of variables" + warnings.warn(msg) if not isinstance(self.family, type(submod.family)): msg = "Model and submodel have different GLM families." warnings.warn(msg) diff --git a/statsmodels/genmod/tests/test_gee.py b/statsmodels/genmod/tests/test_gee.py index dda12bb06a4..0de3f1e4fa7 100644 --- a/statsmodels/genmod/tests/test_gee.py +++ b/statsmodels/genmod/tests/test_gee.py @@ -687,6 +687,14 @@ def test_compare_score_test_warnings(self): mod = gee.GEE(endog, exog, group) _ = mod.compare_score_test(res_sub) + # Parent and submodel are the same dimension + with assert_warns(UserWarning): + w = np.random.uniform(size=n) + mod_sub = gee.GEE(endog, exog, group) + res_sub = mod_sub.fit() + mod = gee.GEE(endog, exog, group) + _ = mod.compare_score_test(res_sub) + def test_constraint_covtype(self): # Test constraints with different cov types np.random.seed(6432) From 77c89d457095b5dc78e89fb6ff0b6bb3f9f00a14 Mon Sep 17 00:00:00 2001 From: Kerby Shedden Date: Sat, 25 May 2019 10:33:30 -0400 Subject: [PATCH 3/8] improve warnings --- .../genmod/generalized_estimating_equations.py | 16 +++++++++++++--- statsmodels/genmod/tests/test_gee.py | 14 ++++++++++++-- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/statsmodels/genmod/generalized_estimating_equations.py b/statsmodels/genmod/generalized_estimating_equations.py index f8f34708812..12ba12992a9 100644 --- a/statsmodels/genmod/generalized_estimating_equations.py +++ b/statsmodels/genmod/generalized_estimating_equations.py @@ -1908,11 +1908,21 @@ def qic(self, scale=None): """ Returns the QIC and QICu information criteria. - For families with a scale parameter (e.g. Gaussian), - provide as the scale argument the estimated scale - from the largest model under consideration. + For families with a scale parameter (e.g. Gaussian), provide + as the scale argument the estimated scale from the largest + model under consideration. + + If the scale parameter is not provided, the estimated scale + parameter is used. Doing this does not allow comparisons of + QIC values between models. """ + # It is easy to forget to set the scale parameter. Sometimes + # this is intentional, so we warn. + if isinstance(self.family, families.Gaussian) and scale is None: + msg = "QIC: Using scale=None with Gaussian family" + warnings.warn(msg) + if scale is None: scale = self.scale diff --git a/statsmodels/genmod/tests/test_gee.py b/statsmodels/genmod/tests/test_gee.py index 0de3f1e4fa7..962eb06491a 100644 --- a/statsmodels/genmod/tests/test_gee.py +++ b/statsmodels/genmod/tests/test_gee.py @@ -1896,8 +1896,10 @@ def test_ql_known(family): assert_allclose(ql1, qle1[0], rtol=1e-4) assert_allclose(ql2, qle2[0], rtol=1e-4) - qler1 = result1.qic() - qler2 = result2.qic() + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + qler1 = result1.qic() + qler2 = result2.qic() assert_equal(qler1, qle1[1:]) assert_equal(qler2, qle2[1:]) @@ -1935,3 +1937,11 @@ def test_ql_diff(family): qle2, _, _ = model2.qic(result2.params, result2.scale, result2.cov_params()) assert_allclose(qle1 - qle2, qldiff, rtol=1e-5, atol=1e-5) + +def test_qic_warnings(): + with assert_warns(UserWarning): + fam = families.Gaussian() + y, x1, x2, g = simple_qic_data(fam) + model = gee.GEE(y, x1, family=fam, groups=g) + result = model.fit() + result.qic() From 600e6de8767525795c67274e74a87b83d3f95020 Mon Sep 17 00:00:00 2001 From: Kerby Shedden Date: Sat, 25 May 2019 11:50:27 -0400 Subject: [PATCH 4/8] clean up test --- statsmodels/genmod/tests/test_gee.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/statsmodels/genmod/tests/test_gee.py b/statsmodels/genmod/tests/test_gee.py index 962eb06491a..e397f0fa054 100644 --- a/statsmodels/genmod/tests/test_gee.py +++ b/statsmodels/genmod/tests/test_gee.py @@ -636,9 +636,9 @@ def test_compare_score_test(self, cov_struct): mod_sub = gee.GEE(endog, exog_sub, group, cov_struct=cov_struct()) res_sub = mod_sub.fit() - for f in False, True: + for call_fit in [False, True]: mod = gee.GEE(endog, exog, group, cov_struct=cov_struct()) - if f: + if call_fit: # Should work with or without fitting the parent model mod.fit() score_results = mod.compare_score_test(res_sub) From fca7ad2da5868ca2bed9f382dfe63986532dec14 Mon Sep 17 00:00:00 2001 From: Kerby Shedden Date: Sat, 25 May 2019 11:57:08 -0400 Subject: [PATCH 5/8] clean up test --- statsmodels/genmod/tests/test_gee.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/statsmodels/genmod/tests/test_gee.py b/statsmodels/genmod/tests/test_gee.py index e397f0fa054..1a984bfb558 100644 --- a/statsmodels/genmod/tests/test_gee.py +++ b/statsmodels/genmod/tests/test_gee.py @@ -1941,7 +1941,7 @@ def test_ql_diff(family): def test_qic_warnings(): with assert_warns(UserWarning): fam = families.Gaussian() - y, x1, x2, g = simple_qic_data(fam) + y, x1, _, g = simple_qic_data(fam) model = gee.GEE(y, x1, family=fam, groups=g) result = model.fit() result.qic() From e7ab942e96ba72f4e91e0c6f262c807ecdd4b82f Mon Sep 17 00:00:00 2001 From: Kerby Shedden Date: Sat, 25 May 2019 14:40:06 -0400 Subject: [PATCH 6/8] fix qic warning --- statsmodels/genmod/generalized_estimating_equations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/statsmodels/genmod/generalized_estimating_equations.py b/statsmodels/genmod/generalized_estimating_equations.py index 12ba12992a9..3a95a19019c 100644 --- a/statsmodels/genmod/generalized_estimating_equations.py +++ b/statsmodels/genmod/generalized_estimating_equations.py @@ -1919,7 +1919,7 @@ def qic(self, scale=None): # It is easy to forget to set the scale parameter. Sometimes # this is intentional, so we warn. - if isinstance(self.family, families.Gaussian) and scale is None: + if (type(self.family) == families.Gaussian) and scale is None: msg = "QIC: Using scale=None with Gaussian family" warnings.warn(msg) From e816bded6b02e5b8be861b01d3fac1b61a5bc3a0 Mon Sep 17 00:00:00 2001 From: Kerby Shedden Date: Sat, 25 May 2019 21:52:02 -0400 Subject: [PATCH 7/8] modify warning --- statsmodels/genmod/generalized_estimating_equations.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/statsmodels/genmod/generalized_estimating_equations.py b/statsmodels/genmod/generalized_estimating_equations.py index 3a95a19019c..6e76c780c30 100644 --- a/statsmodels/genmod/generalized_estimating_equations.py +++ b/statsmodels/genmod/generalized_estimating_equations.py @@ -1919,8 +1919,8 @@ def qic(self, scale=None): # It is easy to forget to set the scale parameter. Sometimes # this is intentional, so we warn. - if (type(self.family) == families.Gaussian) and scale is None: - msg = "QIC: Using scale=None with Gaussian family" + if scale is None: + msg = "QIC values obtained using scale=None are not appropriate for comparing models" warnings.warn(msg) if scale is None: From 47e6a212a482f370bab05282a39ec0f4a847b608 Mon Sep 17 00:00:00 2001 From: Kerby Shedden Date: Fri, 31 May 2019 15:57:00 -0400 Subject: [PATCH 8/8] address testing issue detecting warning --- statsmodels/genmod/tests/test_gee.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/statsmodels/genmod/tests/test_gee.py b/statsmodels/genmod/tests/test_gee.py index 1a984bfb558..d60ef0814ae 100644 --- a/statsmodels/genmod/tests/test_gee.py +++ b/statsmodels/genmod/tests/test_gee.py @@ -688,7 +688,7 @@ def test_compare_score_test_warnings(self): _ = mod.compare_score_test(res_sub) # Parent and submodel are the same dimension - with assert_warns(UserWarning): + with pytest.warns(UserWarning): w = np.random.uniform(size=n) mod_sub = gee.GEE(endog, exog, group) res_sub = mod_sub.fit() @@ -1939,7 +1939,7 @@ def test_ql_diff(family): assert_allclose(qle1 - qle2, qldiff, rtol=1e-5, atol=1e-5) def test_qic_warnings(): - with assert_warns(UserWarning): + with pytest.warns(UserWarning): fam = families.Gaussian() y, x1, _, g = simple_qic_data(fam) model = gee.GEE(y, x1, family=fam, groups=g)