From d5d3acb0711d5a48e67d02bba7a2a710f0973894 Mon Sep 17 00:00:00 2001
From: Kerby Shedden <kshedden@umich.edu>
Date: Fri, 24 May 2019 22:34:36 -0400
Subject: [PATCH 1/8] Clear but don't delete fit_history from model

---
 .../generalized_estimating_equations.py       | 16 +++++---------
 statsmodels/genmod/tests/test_gee.py          | 21 ++++++++++++-------
 statsmodels/genmod/tests/test_glm.py          |  2 +-
 3 files changed, 19 insertions(+), 20 deletions(-)

diff --git a/statsmodels/genmod/generalized_estimating_equations.py b/statsmodels/genmod/generalized_estimating_equations.py
index c3c9a955bb4..9090ef9e44a 100644
--- a/statsmodels/genmod/generalized_estimating_equations.py
+++ b/statsmodels/genmod/generalized_estimating_equations.py
@@ -29,7 +29,7 @@
 from scipy import stats
 import pandas as pd
 import patsy
-
+from collections import defaultdict
 from statsmodels.tools.decorators import cache_readonly
 import statsmodels.base.model as base
 # used for wrapper:
@@ -512,10 +512,7 @@ def __init__(self, endog, exog, groups, time=None, family=None,
         self.constraint = constraint
         self.update_dep = update_dep
 
-        self._fit_history = {'params': [],
-                             'score': [],
-                             'dep_params': [],
-                             'cov_adjust': []}
+        self._fit_history = defaultdict(list)
 
         # Pass groups, time, offset, and dep_data so they are
         # processed for missing data along with endog and exog.
@@ -1259,10 +1256,7 @@ def fit(self, maxiter=60, ctol=1e-6, start_params=None,
 
         self.scaling_factor = scaling_factor
 
-        self._fit_history = {'params': [],
-                             'score': [],
-                             'dep_params': [],
-                             'cov_adjust': []}
+        self._fit_history = defaultdict(list)
 
         if self.weights is not None and cov_type == 'naive':
             raise ValueError("when using weights, cov_type may not be naive")
@@ -1364,7 +1358,7 @@ def fit(self, maxiter=60, ctol=1e-6, start_params=None,
 
         # attributes not needed during results__init__
         results.fit_history = self._fit_history
-        delattr(self, "_fit_history")
+        self.fit_history = defaultdict(list)
         results.score_norm = del_params
         results.converged = (del_params < ctol)
         results.cov_struct = self.cov_struct
@@ -1492,7 +1486,7 @@ class make sense when the model has been fit with regularization.
         mean_params = np.zeros(self.exog.shape[1])
         self.update_cached_means(mean_params)
         converged = False
-        fit_history = {'params': []}
+        fit_history = defaultdict(list)
 
         # Subtract this number from the total sample size when
         # normalizing the scale parameter estimate.
diff --git a/statsmodels/genmod/tests/test_gee.py b/statsmodels/genmod/tests/test_gee.py
index af8ad30dbec..dda12bb06a4 100644
--- a/statsmodels/genmod/tests/test_gee.py
+++ b/statsmodels/genmod/tests/test_gee.py
@@ -635,14 +635,19 @@ def test_compare_score_test(self, cov_struct):
 
         mod_sub = gee.GEE(endog, exog_sub, group, cov_struct=cov_struct())
         res_sub = mod_sub.fit()
-        mod = gee.GEE(endog, exog, group, cov_struct=cov_struct())
-        score_results = mod.compare_score_test(res_sub)
-        assert_almost_equal(score_results["statistic"],
-            mod_lr.score_test_results["statistic"])
-        assert_almost_equal(score_results["p-value"],
-            mod_lr.score_test_results["p-value"])
-        assert_almost_equal(score_results["df"],
-            mod_lr.score_test_results["df"])
+
+        for f in False, True:
+            mod = gee.GEE(endog, exog, group, cov_struct=cov_struct())
+            if f:
+                # Should work with or without fitting the parent model
+                mod.fit()
+            score_results = mod.compare_score_test(res_sub)
+            assert_almost_equal(score_results["statistic"],
+                mod_lr.score_test_results["statistic"])
+            assert_almost_equal(score_results["p-value"],
+                mod_lr.score_test_results["p-value"])
+            assert_almost_equal(score_results["df"],
+                mod_lr.score_test_results["df"])
 
     def test_compare_score_test_warnings(self):
 
diff --git a/statsmodels/genmod/tests/test_glm.py b/statsmodels/genmod/tests/test_glm.py
index 4acfa59c7c4..284701d179a 100644
--- a/statsmodels/genmod/tests/test_glm.py
+++ b/statsmodels/genmod/tests/test_glm.py
@@ -1989,7 +1989,7 @@ def test_tweedie_EQL_upper_limit():
         # Un-regularized fit using gradients not IRLS
         fam = sm.families.Tweedie(var_power=2, eql=True)
         model1 = sm.GLM(y, x, family=fam)
-        result1 = model1.fit(method="newton")
+        result1 = model1.fit(method="newton", scale=scale)
         assert_allclose(result1.params, np.r_[4, 1, 1], atol=1e-3, rtol=1e-1)
 
 

From 7a4418f8c3257fafc6cebb81b7f2bfd96318aacc Mon Sep 17 00:00:00 2001
From: Kerby Shedden <kshedden@umich.edu>
Date: Fri, 24 May 2019 22:57:29 -0400
Subject: [PATCH 2/8] Add warning for score test with exog versus same exog

---
 statsmodels/genmod/generalized_estimating_equations.py | 3 +++
 statsmodels/genmod/tests/test_gee.py                   | 8 ++++++++
 2 files changed, 11 insertions(+)

diff --git a/statsmodels/genmod/generalized_estimating_equations.py b/statsmodels/genmod/generalized_estimating_equations.py
index 9090ef9e44a..f8f34708812 100644
--- a/statsmodels/genmod/generalized_estimating_equations.py
+++ b/statsmodels/genmod/generalized_estimating_equations.py
@@ -798,6 +798,9 @@ def compare_score_test(self, submodel):
         if self.exog.shape[0] != submod.exog.shape[0]:
             msg = "Model and submodel have different numbers of cases."
             raise ValueError(msg)
+        if self.exog.shape[1] == submod.exog.shape[1]:
+            msg = "Model and submodel have the same number of variables"
+            warnings.warn(msg)
         if not isinstance(self.family, type(submod.family)):
             msg = "Model and submodel have different GLM families."
             warnings.warn(msg)
diff --git a/statsmodels/genmod/tests/test_gee.py b/statsmodels/genmod/tests/test_gee.py
index dda12bb06a4..0de3f1e4fa7 100644
--- a/statsmodels/genmod/tests/test_gee.py
+++ b/statsmodels/genmod/tests/test_gee.py
@@ -687,6 +687,14 @@ def test_compare_score_test_warnings(self):
             mod = gee.GEE(endog, exog, group)
             _ = mod.compare_score_test(res_sub)
 
+        # Parent and submodel are the same dimension
+        with assert_warns(UserWarning):
+            w = np.random.uniform(size=n)
+            mod_sub = gee.GEE(endog, exog, group)
+            res_sub = mod_sub.fit()
+            mod = gee.GEE(endog, exog, group)
+            _ = mod.compare_score_test(res_sub)
+
     def test_constraint_covtype(self):
         # Test constraints with different cov types
         np.random.seed(6432)

From 77c89d457095b5dc78e89fb6ff0b6bb3f9f00a14 Mon Sep 17 00:00:00 2001
From: Kerby Shedden <kshedden@umich.edu>
Date: Sat, 25 May 2019 10:33:30 -0400
Subject: [PATCH 3/8] improve warnings

---
 .../genmod/generalized_estimating_equations.py   | 16 +++++++++++++---
 statsmodels/genmod/tests/test_gee.py             | 14 ++++++++++++--
 2 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/statsmodels/genmod/generalized_estimating_equations.py b/statsmodels/genmod/generalized_estimating_equations.py
index f8f34708812..12ba12992a9 100644
--- a/statsmodels/genmod/generalized_estimating_equations.py
+++ b/statsmodels/genmod/generalized_estimating_equations.py
@@ -1908,11 +1908,21 @@ def qic(self, scale=None):
         """
         Returns the QIC and QICu information criteria.
 
-        For families with a scale parameter (e.g. Gaussian),
-        provide as the scale argument the estimated scale
-        from the largest model under consideration.
+        For families with a scale parameter (e.g. Gaussian), provide
+        as the scale argument the estimated scale from the largest
+        model under consideration.
+
+        If the scale parameter is not provided, the estimated scale
+        parameter is used.  Doing this does not allow comparisons of
+        QIC values between models.
         """
 
+        # It is easy to forget to set the scale parameter.  Sometimes
+        # this is intentional, so we warn.
+        if isinstance(self.family, families.Gaussian) and scale is None:
+            msg = "QIC: Using scale=None with Gaussian family"
+            warnings.warn(msg)
+
         if scale is None:
             scale = self.scale
 
diff --git a/statsmodels/genmod/tests/test_gee.py b/statsmodels/genmod/tests/test_gee.py
index 0de3f1e4fa7..962eb06491a 100644
--- a/statsmodels/genmod/tests/test_gee.py
+++ b/statsmodels/genmod/tests/test_gee.py
@@ -1896,8 +1896,10 @@ def test_ql_known(family):
     assert_allclose(ql1, qle1[0], rtol=1e-4)
     assert_allclose(ql2, qle2[0], rtol=1e-4)
 
-    qler1 = result1.qic()
-    qler2 = result2.qic()
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore")
+        qler1 = result1.qic()
+        qler2 = result2.qic()
     assert_equal(qler1, qle1[1:])
     assert_equal(qler2, qle2[1:])
 
@@ -1935,3 +1937,11 @@ def test_ql_diff(family):
     qle2, _, _ = model2.qic(result2.params, result2.scale, result2.cov_params())
 
     assert_allclose(qle1 - qle2, qldiff, rtol=1e-5, atol=1e-5)
+
+def test_qic_warnings():
+    with assert_warns(UserWarning):
+        fam = families.Gaussian()
+        y, x1, x2, g = simple_qic_data(fam)
+        model = gee.GEE(y, x1, family=fam, groups=g)
+        result = model.fit()
+        result.qic()

From 600e6de8767525795c67274e74a87b83d3f95020 Mon Sep 17 00:00:00 2001
From: Kerby Shedden <kshedden@umich.edu>
Date: Sat, 25 May 2019 11:50:27 -0400
Subject: [PATCH 4/8] clean up test

---
 statsmodels/genmod/tests/test_gee.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/statsmodels/genmod/tests/test_gee.py b/statsmodels/genmod/tests/test_gee.py
index 962eb06491a..e397f0fa054 100644
--- a/statsmodels/genmod/tests/test_gee.py
+++ b/statsmodels/genmod/tests/test_gee.py
@@ -636,9 +636,9 @@ def test_compare_score_test(self, cov_struct):
         mod_sub = gee.GEE(endog, exog_sub, group, cov_struct=cov_struct())
         res_sub = mod_sub.fit()
 
-        for f in False, True:
+        for call_fit in [False, True]:
             mod = gee.GEE(endog, exog, group, cov_struct=cov_struct())
-            if f:
+            if call_fit:
                 # Should work with or without fitting the parent model
                 mod.fit()
             score_results = mod.compare_score_test(res_sub)

From fca7ad2da5868ca2bed9f382dfe63986532dec14 Mon Sep 17 00:00:00 2001
From: Kerby Shedden <kshedden@umich.edu>
Date: Sat, 25 May 2019 11:57:08 -0400
Subject: [PATCH 5/8] clean up test

---
 statsmodels/genmod/tests/test_gee.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/statsmodels/genmod/tests/test_gee.py b/statsmodels/genmod/tests/test_gee.py
index e397f0fa054..1a984bfb558 100644
--- a/statsmodels/genmod/tests/test_gee.py
+++ b/statsmodels/genmod/tests/test_gee.py
@@ -1941,7 +1941,7 @@ def test_ql_diff(family):
 def test_qic_warnings():
     with assert_warns(UserWarning):
         fam = families.Gaussian()
-        y, x1, x2, g = simple_qic_data(fam)
+        y, x1, _, g = simple_qic_data(fam)
         model = gee.GEE(y, x1, family=fam, groups=g)
         result = model.fit()
         result.qic()

From e7ab942e96ba72f4e91e0c6f262c807ecdd4b82f Mon Sep 17 00:00:00 2001
From: Kerby Shedden <kshedden@umich.edu>
Date: Sat, 25 May 2019 14:40:06 -0400
Subject: [PATCH 6/8] fix qic warning

---
 statsmodels/genmod/generalized_estimating_equations.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/statsmodels/genmod/generalized_estimating_equations.py b/statsmodels/genmod/generalized_estimating_equations.py
index 12ba12992a9..3a95a19019c 100644
--- a/statsmodels/genmod/generalized_estimating_equations.py
+++ b/statsmodels/genmod/generalized_estimating_equations.py
@@ -1919,7 +1919,7 @@ def qic(self, scale=None):
 
         # It is easy to forget to set the scale parameter.  Sometimes
         # this is intentional, so we warn.
-        if isinstance(self.family, families.Gaussian) and scale is None:
+        if (type(self.family) == families.Gaussian) and scale is None:
             msg = "QIC: Using scale=None with Gaussian family"
             warnings.warn(msg)
 

From e816bded6b02e5b8be861b01d3fac1b61a5bc3a0 Mon Sep 17 00:00:00 2001
From: Kerby Shedden <kshedden@umich.edu>
Date: Sat, 25 May 2019 21:52:02 -0400
Subject: [PATCH 7/8] modify warning

---
 statsmodels/genmod/generalized_estimating_equations.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/statsmodels/genmod/generalized_estimating_equations.py b/statsmodels/genmod/generalized_estimating_equations.py
index 3a95a19019c..6e76c780c30 100644
--- a/statsmodels/genmod/generalized_estimating_equations.py
+++ b/statsmodels/genmod/generalized_estimating_equations.py
@@ -1919,8 +1919,8 @@ def qic(self, scale=None):
 
         # It is easy to forget to set the scale parameter.  Sometimes
         # this is intentional, so we warn.
-        if (type(self.family) == families.Gaussian) and scale is None:
-            msg = "QIC: Using scale=None with Gaussian family"
+        if scale is None:
+            msg = "QIC values obtained using scale=None are not appropriate for comparing models"
             warnings.warn(msg)
 
         if scale is None:

From 47e6a212a482f370bab05282a39ec0f4a847b608 Mon Sep 17 00:00:00 2001
From: Kerby Shedden <kshedden@umich.edu>
Date: Fri, 31 May 2019 15:57:00 -0400
Subject: [PATCH 8/8] address testing issue detecting warning

---
 statsmodels/genmod/tests/test_gee.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/statsmodels/genmod/tests/test_gee.py b/statsmodels/genmod/tests/test_gee.py
index 1a984bfb558..d60ef0814ae 100644
--- a/statsmodels/genmod/tests/test_gee.py
+++ b/statsmodels/genmod/tests/test_gee.py
@@ -688,7 +688,7 @@ def test_compare_score_test_warnings(self):
             _ = mod.compare_score_test(res_sub)
 
         # Parent and submodel are the same dimension
-        with assert_warns(UserWarning):
+        with pytest.warns(UserWarning):
             w = np.random.uniform(size=n)
             mod_sub = gee.GEE(endog, exog, group)
             res_sub = mod_sub.fit()
@@ -1939,7 +1939,7 @@ def test_ql_diff(family):
     assert_allclose(qle1 - qle2, qldiff, rtol=1e-5, atol=1e-5)
 
 def test_qic_warnings():
-    with assert_warns(UserWarning):
+    with pytest.warns(UserWarning):
         fam = families.Gaussian()
         y, x1, _, g = simple_qic_data(fam)
         model = gee.GEE(y, x1, family=fam, groups=g)