Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: GEE fit_history #5789

Merged
merged 8 commits into from May 31, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
35 changes: 21 additions & 14 deletions statsmodels/genmod/generalized_estimating_equations.py
Expand Up @@ -29,7 +29,7 @@
from scipy import stats
import pandas as pd
import patsy

from collections import defaultdict
from statsmodels.tools.decorators import cache_readonly
import statsmodels.base.model as base
# used for wrapper:
Expand Down Expand Up @@ -512,10 +512,7 @@ def __init__(self, endog, exog, groups, time=None, family=None,
self.constraint = constraint
self.update_dep = update_dep

self._fit_history = {'params': [],
'score': [],
'dep_params': [],
'cov_adjust': []}
self._fit_history = defaultdict(list)

# Pass groups, time, offset, and dep_data so they are
# processed for missing data along with endog and exog.
Expand Down Expand Up @@ -801,6 +798,9 @@ def compare_score_test(self, submodel):
if self.exog.shape[0] != submod.exog.shape[0]:
msg = "Model and submodel have different numbers of cases."
raise ValueError(msg)
if self.exog.shape[1] == submod.exog.shape[1]:
msg = "Model and submodel have the same number of variables"
warnings.warn(msg)
if not isinstance(self.family, type(submod.family)):
msg = "Model and submodel have different GLM families."
warnings.warn(msg)
Expand Down Expand Up @@ -1259,10 +1259,7 @@ def fit(self, maxiter=60, ctol=1e-6, start_params=None,

self.scaling_factor = scaling_factor

self._fit_history = {'params': [],
'score': [],
'dep_params': [],
'cov_adjust': []}
self._fit_history = defaultdict(list)

if self.weights is not None and cov_type == 'naive':
raise ValueError("when using weights, cov_type may not be naive")
Expand Down Expand Up @@ -1364,7 +1361,7 @@ def fit(self, maxiter=60, ctol=1e-6, start_params=None,

# attributes not needed during results__init__
results.fit_history = self._fit_history
delattr(self, "_fit_history")
self.fit_history = defaultdict(list)
results.score_norm = del_params
results.converged = (del_params < ctol)
results.cov_struct = self.cov_struct
Expand Down Expand Up @@ -1492,7 +1489,7 @@ class make sense when the model has been fit with regularization.
mean_params = np.zeros(self.exog.shape[1])
self.update_cached_means(mean_params)
converged = False
fit_history = {'params': []}
fit_history = defaultdict(list)

# Subtract this number from the total sample size when
# normalizing the scale parameter estimate.
Expand Down Expand Up @@ -1911,11 +1908,21 @@ def qic(self, scale=None):
"""
Returns the QIC and QICu information criteria.

For families with a scale parameter (e.g. Gaussian),
provide as the scale argument the estimated scale
from the largest model under consideration.
For families with a scale parameter (e.g. Gaussian), provide
as the scale argument the estimated scale from the largest
model under consideration.

If the scale parameter is not provided, the estimated scale
parameter is used. Doing this does not allow comparisons of
QIC values between models.
"""

# It is easy to forget to set the scale parameter. Sometimes
# this is intentional, so we warn.
if scale is None:
msg = "QIC values obtained using scale=None are not appropriate for comparing models"
warnings.warn(msg)

if scale is None:
scale = self.scale

Expand Down
43 changes: 33 additions & 10 deletions statsmodels/genmod/tests/test_gee.py
Expand Up @@ -635,14 +635,19 @@ def test_compare_score_test(self, cov_struct):

mod_sub = gee.GEE(endog, exog_sub, group, cov_struct=cov_struct())
res_sub = mod_sub.fit()
mod = gee.GEE(endog, exog, group, cov_struct=cov_struct())
score_results = mod.compare_score_test(res_sub)
assert_almost_equal(score_results["statistic"],
mod_lr.score_test_results["statistic"])
assert_almost_equal(score_results["p-value"],
mod_lr.score_test_results["p-value"])
assert_almost_equal(score_results["df"],
mod_lr.score_test_results["df"])

for call_fit in [False, True]:
mod = gee.GEE(endog, exog, group, cov_struct=cov_struct())
if call_fit:
# Should work with or without fitting the parent model
mod.fit()
score_results = mod.compare_score_test(res_sub)
assert_almost_equal(score_results["statistic"],
mod_lr.score_test_results["statistic"])
assert_almost_equal(score_results["p-value"],
mod_lr.score_test_results["p-value"])
assert_almost_equal(score_results["df"],
mod_lr.score_test_results["df"])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

future reference, the lint-compatible version of this is one of:

assert_almost_equal(score_results["df"],
                                    mod_lr.score_test_results["df"])

assert_almost_equal(
    score_results["df"],
    mod_lr.score_test_results["df"])



def test_compare_score_test_warnings(self):

Expand Down Expand Up @@ -682,6 +687,14 @@ def test_compare_score_test_warnings(self):
mod = gee.GEE(endog, exog, group)
_ = mod.compare_score_test(res_sub)

# Parent and submodel are the same dimension
with pytest.warns(UserWarning):
w = np.random.uniform(size=n)
mod_sub = gee.GEE(endog, exog, group)
res_sub = mod_sub.fit()
mod = gee.GEE(endog, exog, group)
_ = mod.compare_score_test(res_sub)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could part of this block be outside of the pytest.warns context?


def test_constraint_covtype(self):
# Test constraints with different cov types
np.random.seed(6432)
Expand Down Expand Up @@ -1883,8 +1896,10 @@ def test_ql_known(family):
assert_allclose(ql1, qle1[0], rtol=1e-4)
assert_allclose(ql2, qle2[0], rtol=1e-4)

qler1 = result1.qic()
qler2 = result2.qic()
with warnings.catch_warnings():
warnings.simplefilter("ignore")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does this warn consistently and/or intentionally? i.e. could be pytest.warns?

qler1 = result1.qic()
qler2 = result2.qic()
assert_equal(qler1, qle1[1:])
assert_equal(qler2, qle2[1:])

Expand Down Expand Up @@ -1922,3 +1937,11 @@ def test_ql_diff(family):
qle2, _, _ = model2.qic(result2.params, result2.scale, result2.cov_params())

assert_allclose(qle1 - qle2, qldiff, rtol=1e-5, atol=1e-5)

def test_qic_warnings():
with pytest.warns(UserWarning):
fam = families.Gaussian()
y, x1, _, g = simple_qic_data(fam)
model = gee.GEE(y, x1, family=fam, groups=g)
result = model.fit()
result.qic()
2 changes: 1 addition & 1 deletion statsmodels/genmod/tests/test_glm.py
Expand Up @@ -1989,7 +1989,7 @@ def test_tweedie_EQL_upper_limit():
# Un-regularized fit using gradients not IRLS
fam = sm.families.Tweedie(var_power=2, eql=True)
model1 = sm.GLM(y, x, family=fam)
result1 = model1.fit(method="newton")
result1 = model1.fit(method="newton", scale=scale)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this related to the rest of the Pr?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

before this fades into distant memory, any thoughts here?

assert_allclose(result1.params, np.r_[4, 1, 1], atol=1e-3, rtol=1e-1)


Expand Down