Skip to content

Commit

Permalink
Merge pull request #5789 from kshedden/gee_fit_history
Browse files Browse the repository at this point in the history
BUG: GEE fit_history
  • Loading branch information
kshedden committed May 31, 2019
2 parents ada73d9 + 47e6a21 commit 4f17158
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 25 deletions.
35 changes: 21 additions & 14 deletions statsmodels/genmod/generalized_estimating_equations.py
Expand Up @@ -29,7 +29,7 @@
from scipy import stats
import pandas as pd
import patsy

from collections import defaultdict
from statsmodels.tools.decorators import cache_readonly
import statsmodels.base.model as base
# used for wrapper:
Expand Down Expand Up @@ -516,10 +516,7 @@ def __init__(self, endog, exog, groups, time=None, family=None,
self.constraint = constraint
self.update_dep = update_dep

self._fit_history = {'params': [],
'score': [],
'dep_params': [],
'cov_adjust': []}
self._fit_history = defaultdict(list)

# Pass groups, time, offset, and dep_data so they are
# processed for missing data along with endog and exog.
Expand Down Expand Up @@ -805,6 +802,9 @@ def compare_score_test(self, submodel):
if self.exog.shape[0] != submod.exog.shape[0]:
msg = "Model and submodel have different numbers of cases."
raise ValueError(msg)
if self.exog.shape[1] == submod.exog.shape[1]:
msg = "Model and submodel have the same number of variables"
warnings.warn(msg)
if not isinstance(self.family, type(submod.family)):
msg = "Model and submodel have different GLM families."
warnings.warn(msg)
Expand Down Expand Up @@ -1263,10 +1263,7 @@ def fit(self, maxiter=60, ctol=1e-6, start_params=None,

self.scaling_factor = scaling_factor

self._fit_history = {'params': [],
'score': [],
'dep_params': [],
'cov_adjust': []}
self._fit_history = defaultdict(list)

if self.weights is not None and cov_type == 'naive':
raise ValueError("when using weights, cov_type may not be naive")
Expand Down Expand Up @@ -1368,7 +1365,7 @@ def fit(self, maxiter=60, ctol=1e-6, start_params=None,

# attributes not needed during results__init__
results.fit_history = self._fit_history
delattr(self, "_fit_history")
self.fit_history = defaultdict(list)
results.score_norm = del_params
results.converged = (del_params < ctol)
results.cov_struct = self.cov_struct
Expand Down Expand Up @@ -1496,7 +1493,7 @@ class make sense when the model has been fit with regularization.
mean_params = np.zeros(self.exog.shape[1])
self.update_cached_means(mean_params)
converged = False
fit_history = {'params': []}
fit_history = defaultdict(list)

# Subtract this number from the total sample size when
# normalizing the scale parameter estimate.
Expand Down Expand Up @@ -1915,11 +1912,21 @@ def qic(self, scale=None):
"""
Returns the QIC and QICu information criteria.
For families with a scale parameter (e.g. Gaussian),
provide as the scale argument the estimated scale
from the largest model under consideration.
For families with a scale parameter (e.g. Gaussian), provide
as the scale argument the estimated scale from the largest
model under consideration.
If the scale parameter is not provided, the estimated scale
parameter is used. Doing this does not allow comparisons of
QIC values between models.
"""

# It is easy to forget to set the scale parameter. Sometimes
# this is intentional, so we warn.
if scale is None:
msg = "QIC values obtained using scale=None are not appropriate for comparing models"
warnings.warn(msg)

if scale is None:
scale = self.scale

Expand Down
43 changes: 33 additions & 10 deletions statsmodels/genmod/tests/test_gee.py
Expand Up @@ -635,14 +635,19 @@ def test_compare_score_test(self, cov_struct):

mod_sub = gee.GEE(endog, exog_sub, group, cov_struct=cov_struct())
res_sub = mod_sub.fit()
mod = gee.GEE(endog, exog, group, cov_struct=cov_struct())
score_results = mod.compare_score_test(res_sub)
assert_almost_equal(score_results["statistic"],
mod_lr.score_test_results["statistic"])
assert_almost_equal(score_results["p-value"],
mod_lr.score_test_results["p-value"])
assert_almost_equal(score_results["df"],
mod_lr.score_test_results["df"])

for call_fit in [False, True]:
mod = gee.GEE(endog, exog, group, cov_struct=cov_struct())
if call_fit:
# Should work with or without fitting the parent model
mod.fit()
score_results = mod.compare_score_test(res_sub)
assert_almost_equal(score_results["statistic"],
mod_lr.score_test_results["statistic"])
assert_almost_equal(score_results["p-value"],
mod_lr.score_test_results["p-value"])
assert_almost_equal(score_results["df"],
mod_lr.score_test_results["df"])

def test_compare_score_test_warnings(self):

Expand Down Expand Up @@ -682,6 +687,14 @@ def test_compare_score_test_warnings(self):
mod = gee.GEE(endog, exog, group)
_ = mod.compare_score_test(res_sub)

# Parent and submodel are the same dimension
with pytest.warns(UserWarning):
w = np.random.uniform(size=n)
mod_sub = gee.GEE(endog, exog, group)
res_sub = mod_sub.fit()
mod = gee.GEE(endog, exog, group)
_ = mod.compare_score_test(res_sub)

def test_constraint_covtype(self):
# Test constraints with different cov types
np.random.seed(6432)
Expand Down Expand Up @@ -1883,8 +1896,10 @@ def test_ql_known(family):
assert_allclose(ql1, qle1[0], rtol=1e-4)
assert_allclose(ql2, qle2[0], rtol=1e-4)

qler1 = result1.qic()
qler2 = result2.qic()
with warnings.catch_warnings():
warnings.simplefilter("ignore")
qler1 = result1.qic()
qler2 = result2.qic()
assert_equal(qler1, qle1[1:])
assert_equal(qler2, qle2[1:])

Expand Down Expand Up @@ -1922,3 +1937,11 @@ def test_ql_diff(family):
qle2, _, _ = model2.qic(result2.params, result2.scale, result2.cov_params())

assert_allclose(qle1 - qle2, qldiff, rtol=1e-5, atol=1e-5)

def test_qic_warnings():
with pytest.warns(UserWarning):
fam = families.Gaussian()
y, x1, _, g = simple_qic_data(fam)
model = gee.GEE(y, x1, family=fam, groups=g)
result = model.fit()
result.qic()
2 changes: 1 addition & 1 deletion statsmodels/genmod/tests/test_glm.py
Expand Up @@ -1989,7 +1989,7 @@ def test_tweedie_EQL_upper_limit():
# Un-regularized fit using gradients not IRLS
fam = sm.families.Tweedie(var_power=2, eql=True)
model1 = sm.GLM(y, x, family=fam)
result1 = model1.fit(method="newton")
result1 = model1.fit(method="newton", scale=scale)
assert_allclose(result1.params, np.r_[4, 1, 1], atol=1e-3, rtol=1e-1)


Expand Down

0 comments on commit 4f17158

Please sign in to comment.