Skip to content

Commit

Permalink
Merge d912e79 into 7473965
Browse files Browse the repository at this point in the history
  • Loading branch information
kshedden committed Oct 20, 2014
2 parents 7473965 + d912e79 commit 0081a20
Show file tree
Hide file tree
Showing 2 changed files with 112 additions and 54 deletions.
151 changes: 107 additions & 44 deletions statsmodels/genmod/generalized_estimating_equations.py
Expand Up @@ -40,7 +40,6 @@
CovStruct)
import statsmodels.genmod.families.varfuncs as varfuncs
from statsmodels.genmod.families.links import Link
from statsmodels.genmod.families import Family

from statsmodels.tools.sm_exceptions import (ConvergenceWarning,
IterationLimitWarning)
Expand Down Expand Up @@ -195,10 +194,7 @@ def unpack_cov(self, bcov):
dependence structures to define similarity relationships among
observations within a cluster.
family : family class instance
The default is Gaussian. To specify the binomial
distribution family = sm.family.Binomial(). Each family can
take a link instance as an argument. See
statsmodels.family.family for more information.
%(family_doc)s
cov_struct : CovStruct class instance
The default is Independence. To specify an exchangeable
structure use cov_struct = Exchangeable(). See
Expand Down Expand Up @@ -258,6 +254,21 @@ def unpack_cov(self, bcov):
%(example)s
"""

_gee_family_doc = """\
The default is Gaussian. To specify the binomial
distribution use `family=sm.family.Binomial()`. Each family
can take a link instance as an argument. See
statsmodels.family.family for more information."""

_gee_ordinal_family_doc = """\
The only family supported is `Binomial`. The default `Logit`
link may be replaced with `probit` if desired."""

_gee_nominal_family_doc = """\
The default value `None` uses a multinomial logit family
specifically designed for use with GEE. Setting this
argument to a non-default value is not currently supported."""

_gee_fit_doc = """
Fits a marginal regression model using generalized estimating
equations (GEE).
Expand Down Expand Up @@ -346,51 +357,88 @@ def unpack_cov(self, bcov):
_gee_example = """
Logistic regression with autoregressive working dependence:
>>> family = Binomial()
>>> va = Autoregressive()
>>> mod = GEE(endog, exog, group, family=family, cov_struct=va)
>>> rslt = mod.fit()
>>> print rslt.summary()
>>> import statsmodels.api as sm
>>> family = sm.families.Binomial()
>>> va = sm.cov_struct.Autoregressive()
>>> model = sm.GEE(endog, exog, group, family=family, cov_struct=va)
>>> result = model.fit()
>>> print result.summary()
Use formulas to fit a Poisson GLM with independent working
dependence:
>>> fam = Poisson()
>>> ind = Independence()
>>> mod = GEE.from_formula("y ~ age + trt + base", "subject",
data, cov_struct=ind, family=fam)
>>> rslt = mod.fit()
>>> print rslt.summary()
>>> import statsmodels.api as sm
>>> fam = sm.families.Poisson()
>>> ind = sm.cov_struct.Independence()
>>> model = sm.GEE.from_formula("y ~ age + trt + base", "subject",
data, cov_struct=ind, family=fam)
>>> result = model.fit()
>>> print result.summary()
Equivalent, using the formula API:
>>> import statsmodels.api as sm
>>> import statsmodels.formula.api as smf
>>> fam = sm.families.Poisson()
>>> ind = sm.cov_struct.Independence()
>>> model = smf.gee("y ~ age + trt + base", "subject",
data, cov_struct=ind, family=fam)
>>> result = model.fit()
>>> print result.summary()
"""

_gee_ordinal_example = """
>>> family = Binomial()
>>> gor = GlobalOddsRatio("ordinal")
>>> mod = OrdinalGEE(endog, exog, groups, None, family, gor)
>>> rslt = mod.fit()
>>> print rslt.summary()
Use formulas:
>>> mod = GEE.from_formula("y ~ x1 + x2", groups, data,
cov_struct=gor, family=family)
>>> rslt = mod.fit()
>>> print rslt.summary()
Fit an ordinal regression model using GEE, with "global
odds ratio" dependence:
>>> import statsmodels.api as sm
>>> gor = sm.families.GlobalOddsRatio("ordinal")
>>> model = sm.OrdinalGEE(endog, exog, groups, cov_struct=gor)
>>> result = model.fit()
>>> print result.summary()
Using formulas:
>>> import statsmodels.api as sm
>>> model = sm.OrdinalGEE.from_formula("y ~ x1 + x2", groups,
data, cov_struct=gor)
>>> result = model.fit()
>>> print result.summary()
Equivalent, using the formula API:
>>> import statsmodels.formula.api as smf
>>> model = smf.ordinal_gee("y ~ x1 + x2", groups, data,
cov_struct=gor)
>>> result = model.fit()
>>> print result.summary()
"""

_gee_nominal_example = """
>>> family = Multinomial(3)
>>> gor = GlobalOddsRatio("nominal")
>>> mod = NominalGEE(endog, exog, groups, None, family, gor)
>>> rslt = mod.fit()
>>> print rslt.summary()
Use formulas:
>>> mod = GEE.from_formula("y ~ x1 + x2", groups, data,
cov_struct=gor, family=family)
>>> rslt = mod.fit()
>>> print rslt.summary()
Fit a nominal regression model using GEE:
>>> import statsmodels.api as sm
>>> import statsmodels.formula.api as smf
>>> gor = sm.families.GlobalOddsRatio("nominal")
>>> model = sm.NominalGEE(endog, exog, groups, cov_struct=gor)
>>> result = model.fit()
>>> print result.summary()
Using formulas:
>>> import statsmodels.api as sm
>>> model = sm.NominalGEE.from_formula("y ~ x1 + x2", groups,
data, cov_struct=gor)
>>> result = model.fit()
>>> print result.summary()
Using the formula API:
>>> import statsmodels.formula.api as smf
>>> model = smf.nominal_gee("y ~ x1 + x2", groups, data,
cov_struct=gor)
>>> result = model.fit()
>>> print result.summary()
"""


Expand All @@ -400,6 +448,7 @@ class GEE(base.Model):
" Estimation of marginal regression models using Generalized\n"
" Estimating Equations (GEE).\n" + _gee_init_doc %
{'extra_params': base._missing_param_doc,
'family_doc': _gee_family_doc,
'example': _gee_example})

cached_means = None
Expand Down Expand Up @@ -1606,12 +1655,19 @@ class OrdinalGEE(GEE):
" Estimation of ordinal response marginal regression models\n"
" using Generalized Estimating Equations (GEE).\n" +
_gee_init_doc % {'extra_params': base._missing_param_doc,
'family_doc': _gee_ordinal_family_doc,
'example': _gee_ordinal_example})

def __init__(self, endog, exog, groups, time=None, family=None,
cov_struct=None, missing='none', offset=None,
dep_data=None, constraint=None):

if family is None:
family = families.Binomial()
else:
if not isinstance(family, families.Binomial):
raise ValueError("ordinal GEE must use a Binomial family")

endog, exog, groups, time, offset = self.setup_ordinal(endog,
exog, groups, time, offset)

Expand Down Expand Up @@ -1821,6 +1877,7 @@ class NominalGEE(GEE):
" Estimation of nominal response marginal regression models\n"
" using Generalized Estimating Equations (GEE).\n" +
_gee_init_doc % {'extra_params': base._missing_param_doc,
'family_doc': _gee_nominal_family_doc,
'example': _gee_nominal_example})

def __init__(self, endog, exog, groups, time=None, family=None,
Expand All @@ -1830,6 +1887,9 @@ def __init__(self, endog, exog, groups, time=None, family=None,
endog, exog, groups, time, offset = self.setup_nominal(endog,
exog, groups, time, offset)

if family is None:
family = _Multinomial(self.ncut+1)

super(NominalGEE, self).__init__(endog, exog, groups,
time, family, cov_struct, missing, offset, dep_data,
constraint)
Expand Down Expand Up @@ -1864,6 +1924,7 @@ def setup_nominal(self, endog, exog, groups, time, offset):
self.endog_values = np.unique(endog)
endog_cuts = self.endog_values[0:-1]
ncut = len(endog_cuts)
self.ncut = ncut

nrows = len(endog_cuts) * exog.shape[0]
ncols = len(endog_cuts) * exog.shape[1]
Expand Down Expand Up @@ -2027,7 +2088,7 @@ class NominalGEEResultsWrapper(GEEResultsWrapper):
wrap.populate_wrapper(NominalGEEResultsWrapper, NominalGEEResults)


class MultinomialLogit(Link):
class _MultinomialLogit(Link):
"""
The multinomial logit transform, only for use with GEE.
Expand Down Expand Up @@ -2163,13 +2224,13 @@ def mean_deriv_exog(self, exog, params):
return dmat


class Multinomial(Family):
class _Multinomial(families.Family):
"""
Pseudo-link function for fitting nominal multinomial models with
GEE. Not for use outside the GEE class.
"""

links = [MultinomialLogit,]
links = [_MultinomialLogit,]
variance = varfuncs.binary

def __init__(self, nlevels):
Expand All @@ -2180,9 +2241,11 @@ def __init__(self, nlevels):
The number of distinct categories for the multinomial
distribution.
"""
self.initialize(nlevels)

def initialize(self, nlevels):
self.ncut = nlevels - 1
self.link = MultinomialLogit(self.ncut)
self.link = _MultinomialLogit(self.ncut)



Expand Down
15 changes: 5 additions & 10 deletions statsmodels/genmod/tests/test_gee.py
Expand Up @@ -15,7 +15,7 @@
from numpy.testing import (assert_almost_equal, assert_equal, assert_allclose,
assert_array_less, assert_raises, assert_)
from statsmodels.genmod.generalized_estimating_equations import (GEE,
OrdinalGEE, NominalGEE, GEEMargins, Multinomial,
OrdinalGEE, NominalGEE, GEEMargins,
NominalGEEResults, OrdinalGEEResults,
NominalGEEResultsWrapper, OrdinalGEEResultsWrapper)
from statsmodels.genmod.families import Gaussian, Binomial, Poisson
Expand Down Expand Up @@ -576,14 +576,12 @@ def test_ordinal(self):

def test_nominal(self):

family = Multinomial(3)

endog, exog, groups = load_data("gee_nominal_1.csv",
icept=False)

# Test with independence correlation
va = Independence()
mod1 = NominalGEE(endog, exog, groups, None, family, va)
mod1 = NominalGEE(endog, exog, groups, cov_struct=va)
rslt1 = mod1.fit()

# Regression test
Expand All @@ -594,7 +592,7 @@ def test_nominal(self):

# Test with global odds ratio dependence
va = GlobalOddsRatio("nominal")
mod2 = NominalGEE(endog, exog, groups, None, family, va)
mod2 = NominalGEE(endog, exog, groups, cov_struct=va)
rslt2 = mod2.fit(start_params=rslt1.params)

# Regression test
Expand Down Expand Up @@ -1100,14 +1098,12 @@ class TestGEEMultinomialCovType(CheckConsistency):
@classmethod
def setup_class(cls):

family = Multinomial(3)

endog, exog, groups = load_data("gee_nominal_1.csv",
icept=False)

# Test with independence correlation
va = Independence()
cls.mod = NominalGEE(endog, exog, groups, None, family, va)
cls.mod = NominalGEE(endog, exog, groups, cov_struct=va)
cls.start_params = np.array([0.44944752, 0.45569985, -0.92007064,
-0.46766728])

Expand All @@ -1120,9 +1116,8 @@ def test_wrapper(self):
exog = pd.DataFrame(exog)
groups = pd.Series(groups, name='the_group')

family = Multinomial(3)
va = Independence()
mod = NominalGEE(endog, exog, groups, None, family, va)
mod = NominalGEE(endog, exog, groups, cov_struct=va)
rslt2 = mod.fit()

check_wrapper(rslt2)
Expand Down

0 comments on commit 0081a20

Please sign in to comment.