diff --git a/statsmodels/genmod/dependence_structures/covstruct.py b/statsmodels/genmod/dependence_structures/covstruct.py
index 0502e0b2c4b..06d3b08fd97 100644
--- a/statsmodels/genmod/dependence_structures/covstruct.py
+++ b/statsmodels/genmod/dependence_structures/covstruct.py
@@ -1,21 +1,28 @@
 from statsmodels.compat.python import iterkeys, itervalues, zip, range
 import numpy as np
+from scipy import linalg as spl
 
 
 class CovStruct(object):
     """
-    The base class for correlation and covariance structures of
-    cluster data.
+    A base class for correlation and covariance structures of grouped
+    data.
 
     Each implementation of this class takes the residuals from a
-    regression model that has been fitted to clustered data, and uses
-    them to estimate the within-cluster variance and dependence
-    structure of the random errors in the model.
+    regression model that has been fitted to grouped data, and uses
+    them to estimate the within-group dependence structure of the
+    random errors in the model.
+
+    The state of the covariance structure is represented through the
+    value of the class variable `dep_params`.  The default state of a
+    newly-created instance should correspond to the identity
+    correlation matrix.
     """
 
-    # Parameters describing the dependency structure
-    dep_params = None
+    def __init__(self):
 
+        # Parameters describing the dependency structure
+        self.dep_params = None
 
     def initialize(self, model):
         """
@@ -29,7 +36,6 @@ def initialize(self, model):
         """
         self.model = model
 
-
     def update(self, params):
         """
         Updates the association parameter values based on the current
@@ -42,7 +48,6 @@ def update(self, params):
         """
         raise NotImplementedError
 
-
     def covariance_matrix(self, endog_expval, index):
         """Returns the working covariance or correlation matrix for a
         given cluster of data.
@@ -66,6 +71,61 @@ def covariance_matrix(self, endog_expval, index):
         """
         raise NotImplementedError
 
+    def covariance_matrix_solve(self, expval, index, stdev, rhs):
+        """
+        Solves matrix equations of the form `covmat * soln = rhs` and
+        returns the values of `soln`, where `covmat` is the covariance
+        matrix represented by this class.
+
+        Parameters
+        ----------
+        expval: array-like
+           The expected value of endog for each observed value in the
+           group.
+        index: integer
+           The group index.
+        stdev : array-like
+            The standard deviation of endog for each observation in
+            the group.
+        rhs : list/tuple of array-like
+            A set of right-hand sides; each defines a matrix equation
+            to be solved.
+
+        Returns
+        -------
+        soln : list/tuple of array-like
+            The solutions to the matrix equations.
+
+        Notes
+        -----
+        Returns None if the solver fails.
+
+        Some dependence structures do not use `expval` and/or `index`
+        to determine the correlation matrix.  Some families
+        (e.g. binomial) do not use the `stdev` parameter when forming
+        the covariance matrix.
+
+        Systems of linear equations with the covariance matrix as the
+        left hand side (LHS) are solved for different right hand sides
+        (RHS); the LHS is only factorized once to save time.
+
+        This is a default implementation, it can be reimplemented in
+        subclasses to optimize the linear algebra according to the
+        struture of the covariance matrix.
+        """
+
+        vmat, is_cor = self.covariance_matrix(expval, index)
+
+        if is_cor:
+            vmat *= np.outer(stdev, stdev)
+
+        try:
+            vco = spl.cho_factor(vmat)
+        except np.linalg.LinAlgError:
+            return None
+
+        soln = [spl.cho_solve(vco, x) for x in rhs]
+        return soln
 
     def summary(self):
         """
@@ -80,17 +140,27 @@ class Independence(CovStruct):
     An independence working dependence structure.
     """
 
-    dep_params = np.r_[[]]
-
     # Nothing to update
     def update(self, params):
         return
 
-
     def covariance_matrix(self, expval, index):
         dim = len(expval)
         return np.eye(dim, dtype=np.float64), True
 
+    def covariance_matrix_solve(self, expval, index, stdev, rhs):
+        v = stdev**2
+        rslt = []
+        for x in rhs:
+            if x.ndim == 1:
+                rslt.append(x / v)
+            else:
+                rslt.append(x / v[:, None])
+        return rslt
+
+    update.__doc__ = CovStruct.update.__doc__
+    covariance_matrix.__doc__ = CovStruct.covariance_matrix.__doc__
+    covariance_matrix_solve.__doc__ = CovStruct.covariance_matrix_solve.__doc__
 
     def summary(self):
         return "Observations within a cluster are independent."
@@ -101,9 +171,12 @@ class Exchangeable(CovStruct):
     An exchangeable working dependence structure.
     """
 
-    # The correlation between any two values in the same cluster
-    dep_params = 0
+    def __init__(self):
+
+        super(Exchangeable, self).__init__()
 
+        # The correlation between any two values in the same cluster
+        self.dep_params = 0.
 
     def update(self, params):
 
@@ -119,13 +192,9 @@ def update(self, params):
         residsq_sum, scale, nterm = 0, 0, 0
         for i in range(self.model.num_group):
 
-            if len(endog[i]) == 0:
-                continue
-
             expval, _ = cached_means[i]
-
-            sdev = np.sqrt(varfunc(expval))
-            resid = (endog[i] - expval) / sdev
+            stdev = np.sqrt(varfunc(expval))
+            resid = (endog[i] - expval) / stdev
 
             ngrp = len(resid)
             residsq = np.outer(resid, resid)
@@ -137,21 +206,45 @@ def update(self, params):
         scale /= (nobs - dim)
         self.dep_params = residsq_sum / (scale * (nterm - dim))
 
-
     def covariance_matrix(self, expval, index):
         dim = len(expval)
         dp = self.dep_params * np.ones((dim, dim), dtype=np.float64)
-        return  dp + (1 - self.dep_params) * np.eye(dim), True
+        return  dp + (1. - self.dep_params) * np.eye(dim), True
+
+    def covariance_matrix_solve(self, expval, index, stdev, rhs):
+
+        k = len(expval)
+        c = self.dep_params / (1. - self.dep_params)
+        c /= 1. + self.dep_params * (k - 1)
+
+        rslt = []
+        for x in rhs:
+            if x.ndim == 1:
+                x1 = x / stdev
+                y = x1 / (1. - self.dep_params)
+                y -= c * sum(x1)
+                y /= stdev
+            else:
+                x1 = x / stdev[:, None]
+                y = x1 / (1. - self.dep_params)
+                y -= c * x1.sum(0)
+                y /= stdev[:, None]
+            rslt.append(y)
+
+        return rslt
+
+    update.__doc__ = CovStruct.update.__doc__
+    covariance_matrix.__doc__ = CovStruct.covariance_matrix.__doc__
+    covariance_matrix_solve.__doc__ = CovStruct.covariance_matrix_solve.__doc__
 
     def summary(self):
         return ("The correlation between two observations in the " +
                 "same cluster is %.3f" % self.dep_params)
 
 
-
-
 class Nested(CovStruct):
-    """A nested working dependence structure.
+    """
+    A nested working dependence structure.
 
     A working dependence structure that captures a nested hierarchy of
     groups, each level of which contributes to the random error term
@@ -200,26 +293,6 @@ class Nested(CovStruct):
     variance components are shared by r and r'.
     """
 
-    # The regression design matrix for estimating the variance
-    # components
-    designx = None
-
-    # Matrices containing labels that indicate which covariance
-    # parameters are constrained to be equal
-    ilabels = None
-
-    # The SVD of designx
-    designx_u = None
-    designx_s = None
-    designx_v = None
-
-    # The scale parameter
-    scale = None
-
-    # The regression coefficients for estimating the variance
-    # components
-    vcomp_coeff = None
-
     def initialize(self, model):
         """
         Called on the first call to update
@@ -282,7 +355,6 @@ def initialize(self, model):
         self.designx_s = svd[1]
         self.designx_v = svd[2].T
 
-
     def update(self, params):
 
         endog = self.model.endog_li
@@ -302,14 +374,10 @@ def update(self, params):
         scale = 0.
         for i in range(self.model.num_group):
 
-            # Needed?
-            if len(endog[i]) == 0:
-                continue
-
             expval, _ = cached_means[i]
 
-            sdev = np.sqrt(varfunc(expval))
-            resid = (endog[i] - offset[i] - expval) / sdev
+            stdev = np.sqrt(varfunc(expval))
+            resid = (endog[i] - offset[i] - expval) / stdev
 
             ix1, ix2 = np.tril_indices(len(resid), -1)
             dvmat.append(resid[ix1] * resid[ix2])
@@ -329,7 +397,6 @@ def update(self, params):
 
         self.dep_params = self.vcomp_coeff.copy()
 
-
     def covariance_matrix(self, expval, index):
 
         dim = len(expval)
@@ -345,6 +412,8 @@ def covariance_matrix(self, expval, index):
         vmat /= self.scale
         return vmat, True
 
+    update.__doc__ = CovStruct.update.__doc__
+    covariance_matrix.__doc__ = CovStruct.covariance_matrix.__doc__
 
     def summary(self):
         """
@@ -363,27 +432,28 @@ def summary(self):
 
 class Autoregressive(CovStruct):
     """
-    An autoregressive working dependence structure.  The dependence is
-    defined in terms of the `time` component of the parent GEE class.
-    Time represents a potentially multidimensional index from which
-    distances between pairs of obsercations can be determined.  The
-    correlation between two observations in the same cluster is
-    dep_params**distance, where `dep_params` is the autocorrelation
-    parameter to be estimated, and distance is the distance between
-    the two observations, calculated from their corresponding time
-    values.  `time` is stored as an n_obs x k matrix, where `k`
-    represents the number of dimensions in the time index.
+    An autoregressive working dependence structure.
+
+    The dependence is defined in terms of the `time` component of the
+    parent GEE class.  Time represents a potentially multidimensional
+    index from which distances between pairs of observations can be
+    determined.  The correlation between two observations in the same
+    cluster is dep_params^distance, where `dep_params` is the
+    autocorrelation parameter to be estimated, and `distance` is the
+    distance between the two observations, calculated from their
+    corresponding time values.  `time` is stored as an n_obs x k
+    matrix, where `k` represents the number of dimensions in the time
+    index.
 
     The autocorrelation parameter is estimated using weighted
     nonlinear least squares, regressing each value within a cluster on
-    each preceeding value within the same cluster.
+    each preceeding value in the same cluster.
 
     Parameters
     ----------
     dist_func: function from R^k x R^k to R^+, optional
-       A function that takes the time vector for two observations and
-       computed the distance between the two observations based on the
-       time vector.
+       A function that computes the distance between the two
+       observations based on their `time` values.
 
     Reference
     ---------
@@ -392,28 +462,22 @@ class Autoregressive(CovStruct):
     in medicine. Vol 7, 59-71, 1988.
     """
 
-    # The autoregression parameter
-    dep_params = 0
-
-    designx = None
-
-    # The function for determining distances based on time
-    dist_func = None
-
-
     def __init__(self, dist_func=None):
 
+        super(Autoregressive, self).__init__()
+
+        # The function for determining distances based on time
         if dist_func is None:
             self.dist_func = lambda x, y: np.abs(x - y).sum()
         else:
             self.dist_func = dist_func
 
+        self.designx = None
 
-    def update(self, params):
+        # The autocorrelation parameter
+        self.dep_params = 0.
 
-        if self.model.time is None:
-            raise ValueError("GEE: time must be provided to GEE if "
-                             "using AR dependence structure")
+    def update(self, params):
 
         endog = self.model.endog_li
         time = self.model.time_li
@@ -439,27 +503,21 @@ def update(self, params):
             self.designx = designx
 
         scale = self.model.estimate_scale()
-
         varfunc = self.model.family.variance
-
         cached_means = self.model.cached_means
 
         # Weights
-        var = (1 - self.dep_params**(2 * designx)) /\
-            (1 - self.dep_params**2)
-        wts = 1 / var
+        var = 1. - self.dep_params**(2*designx)
+        var /= 1. - self.dep_params**2
+        wts = 1. / var
         wts /= wts.sum()
 
         residmat = []
         for i in range(self.model.num_group):
 
-            if len(endog[i]) == 0:
-                continue
-
             expval, _ = cached_means[i]
-
-            sdev = np.sqrt(scale * varfunc(expval))
-            resid = (endog[i] - expval) / sdev
+            stdev = np.sqrt(scale * varfunc(expval))
+            resid = (endog[i] - expval) / stdev
 
             ngrp = len(resid)
             for j1 in range(ngrp):
@@ -488,16 +546,15 @@ def fitfunc(a):
         # Right bracket point
         b_rgt, f_rgt = 0.75, fitfunc(0.75)
         while f_rgt < f_ctr:
-            b_rgt = b_rgt + (1 - b_rgt) / 2
+            b_rgt = b_rgt + (1. - b_rgt) / 2
             f_rgt = fitfunc(b_rgt)
-            if b_rgt > 1 - 1e-6:
+            if b_rgt > 1. - 1e-6:
                 raise ValueError(
                     "Autoregressive: unable to find right bracket")
 
         from scipy.optimize import brent
         self.dep_params = brent(fitfunc, brack=[b_lft, b_ctr, b_rgt])
 
-
     def covariance_matrix(self, endog_expval, index):
         ngrp = len(endog_expval)
         if self.dep_params == 0:
@@ -506,6 +563,68 @@ def covariance_matrix(self, endog_expval, index):
         cmat = self.dep_params**np.abs(idx[:, None] - idx[None, :])
         return cmat, True
 
+    def covariance_matrix_solve(self, expval, index, stdev, rhs):
+        # The inverse of an AR(1) covariance matrix is tri-diagonal.
+
+        k = len(expval)
+        soln = []
+
+        # LHS has 1 column
+        if k == 1:
+            return [x / stdev**2 for x in rhs]
+
+        # LHS has 2 columns
+        if k == 2:
+            mat = np.array([[1, -self.dep_params], [-self.dep_params, 1]])
+            mat /= (1. - self.dep_params**2)
+            for x in rhs:
+                if x.ndim == 1:
+                    x1 = x / stdev
+                else:
+                    x1 = x / stdev[:, None]
+                x1 = np.dot(mat, x1)
+                if x.ndim == 1:
+                    x1 /= stdev
+                else:
+                    x1 /= stdev[:, None]
+                soln.append(x1)
+            return soln
+
+        # LHS has >= 3 columns: values c0, c1, c2 defined below give
+        # the inverse.  c0 is on the diagonal, except for the first
+        # and last position.  c1 is on the first and last position of
+        # the diagonal.  c2 is on the sub/super diagonal.
+        c0 = (1. + self.dep_params**2) / (1. - self.dep_params**2)
+        c1 = 1. / (1. - self.dep_params**2)
+        c2 = -self.dep_params / (1. - self.dep_params**2)
+        soln = []
+        for x in rhs:
+            flatten = False
+            if x.ndim == 1:
+                x = x[:, None]
+                flatten = True
+            x1 = x / stdev[:, None]
+
+            z0 = np.zeros((1, x.shape[1]))
+            rhs1 = np.concatenate((x[1:,:], z0), axis=0)
+            rhs2 = np.concatenate((z0, x[0:-1,:]), axis=0)
+
+            y = c0*x + c2*rhs1 + c2*rhs2
+            y[0, :] = c1*x[0, :] + c2*x[1, :]
+            y[-1, :] = c1*x[-1, :] + c2*x[-2, :]
+
+            y /= stdev[:, None]
+
+            if flatten:
+                y = np.squeeze(y)
+
+            soln.append(y)
+
+        return soln
+
+    update.__doc__ = CovStruct.update.__doc__
+    covariance_matrix.__doc__ = CovStruct.covariance_matrix.__doc__
+    covariance_matrix_solve.__doc__ = CovStruct.covariance_matrix_solve.__doc__
 
     def summary(self):
 
@@ -513,7 +632,6 @@ def summary(self):
                 self.dep_params)
 
 
-
 class GlobalOddsRatio(CovStruct):
     """
     Estimate the global odds ratio for a GEE with ordinal or nominal
@@ -543,21 +661,10 @@ class GlobalOddsRatio(CovStruct):
     from the given cut points.
     """
 
-    # The current estimate of the odds ratio
-    dep_params = [None,]
-
-    # The current estimate of the crude odds ratio
-    crude_or = None
-
-    # See docstring
-    ibd = None
-
-    # See docstring
-    cpp = None
-
     def __init__(self, endog_type):
         super(GlobalOddsRatio, self).__init__()
         self.endog_type = endog_type
+        self.dep_params = 0.
 
     def initialize(self, model):
 
@@ -587,7 +694,7 @@ def initialize(self, model):
             cpp1 = {}
             for k1 in range(self.ncut):
                 for k2 in range(k1+1):
-                    v1, v2 = np.nonzero((j1==k1) & (j2==k2))
+                    v1, v2 = np.nonzero((j1 == k1) & (j2 == k2))
                     cpp1[(k2, k1)] = \
                         np.hstack((v2[:, None], v1[:, None]))
             cpp.append(cpp1)
@@ -595,7 +702,7 @@ def initialize(self, model):
 
         # Initialize the dependence parameters
         self.crude_or = self.observed_crude_oddsratio()
-        self.dep_params[0] = self.crude_or
+        self.dep_params = self.crude_or
 
 
     def pooled_odds_ratio(self, tables):
@@ -625,14 +732,12 @@ def pooled_odds_ratio(self, tables):
 
         return np.exp(log_pooled_or)
 
-
     def covariance_matrix(self, expected_value, index):
 
         vmat = self.get_eyy(expected_value, index)
         vmat -= np.outer(expected_value, expected_value)
         return vmat, False
 
-
     def observed_crude_oddsratio(self):
         """The crude odds ratio is obtained by pooling all data
         corresponding to a given pair of cut points (c,c'), then
@@ -653,15 +758,12 @@ def observed_crude_oddsratio(self):
         # Get the observed crude OR
         for i in range(len(endog)):
 
-            if len(endog[i]) == 0:
-                continue
-
             # The observed joint values for the current cluster
             yvec = endog[i]
             endog_11 = np.outer(yvec, yvec)
-            endog_10 = np.outer(yvec, 1 - yvec)
-            endog_01 = np.outer(1 - yvec, yvec)
-            endog_00 = np.outer(1 - yvec, 1 - yvec)
+            endog_10 = np.outer(yvec, 1. - yvec)
+            endog_01 = np.outer(1. - yvec, yvec)
+            endog_00 = np.outer(1. - yvec, 1. - yvec)
 
             cpp1 = cpp[i]
             for ky in iterkeys(cpp1):
@@ -673,8 +775,6 @@ def observed_crude_oddsratio(self):
 
         return self.pooled_odds_ratio(list(itervalues(tables)))
 
-
-
     def get_eyy(self, endog_expval, index):
         """
         Returns a matrix V such that V[i,j] is the joint probability
@@ -682,7 +782,7 @@ def get_eyy(self, endog_expval, index):
         probabilities of endog and the odds ratio `current_or`.
         """
 
-        current_or = self.dep_params[0]
+        current_or = self.dep_params
         ibd = self.ibd[index]
 
         # The between-observation joint probabilities
@@ -691,10 +791,10 @@ def get_eyy(self, endog_expval, index):
         else:
             psum = endog_expval[:, None] + endog_expval[None, :]
             pprod = endog_expval[:, None] * endog_expval[None, :]
-            pfac = np.sqrt((1 + psum * (current_or - 1))**2 +
-                           4 * current_or * (1 - current_or) * pprod)
-            vmat = 1 +  psum * (current_or - 1) - pfac
-            vmat /= 2 * (current_or - 1)
+            pfac = np.sqrt((1. + psum * (current_or - 1.))**2 +
+                           4 * current_or * (1. - current_or) * pprod)
+            vmat = 1. +  psum * (current_or - 1.) - pfac
+            vmat /= 2. * (current_or - 1)
 
         # Fix E[YY'] for elements that belong to same observation
         for bdl in ibd:
@@ -709,7 +809,6 @@ def get_eyy(self, endog_expval, index):
 
         return vmat
 
-
     def update(self, params):
         """Update the global odds ratio based on the current value of
         params."""
@@ -729,15 +828,12 @@ def update(self, params):
 
         for i in range(self.model.num_group):
 
-            if len(endog[i]) == 0:
-                continue
-
             endog_expval, _ = cached_means[i]
 
             emat_11 = self.get_eyy(endog_expval, i)
             emat_10 = endog_expval[:, None] - emat_11
             emat_01 = -emat_11 + endog_expval
-            emat_00 = 1 - (emat_11 + emat_10 + emat_01)
+            emat_00 = 1. - (emat_11 + emat_10 + emat_01)
 
             cpp1 = cpp[i]
             for ky in iterkeys(cpp1):
@@ -747,12 +843,13 @@ def update(self, params):
                 tables[ky][0, 1] += emat_01[ix[:, 0], ix[:, 1]].sum()
                 tables[ky][0, 0] += emat_00[ix[:, 0], ix[:, 1]].sum()
 
-
         cor_expval = self.pooled_odds_ratio(list(itervalues(tables)))
 
-        self.dep_params[0] *= self.crude_or / cor_expval
+        self.dep_params *= self.crude_or / cor_expval
 
+    update.__doc__ = CovStruct.update.__doc__
+    covariance_matrix.__doc__ = CovStruct.covariance_matrix.__doc__
 
     def summary(self):
 
-        return "Global odds ratio: %.3f\n" % self.dep_params[0]
+        return "Global odds ratio: %.3f\n" % self.dep_params
diff --git a/statsmodels/genmod/generalized_estimating_equations.py b/statsmodels/genmod/generalized_estimating_equations.py
index c41f3ee9bd6..948f16ad2bc 100644
--- a/statsmodels/genmod/generalized_estimating_equations.py
+++ b/statsmodels/genmod/generalized_estimating_equations.py
@@ -25,7 +25,6 @@
 from statsmodels.compat.python import iterkeys, range, lrange, lzip, zip
 import numpy as np
 from scipy import stats
-from scipy import linalg as spl
 import pandas as pd
 
 from statsmodels.tools.decorators import (cache_readonly,
@@ -171,9 +170,9 @@ class GEE(base.Model):
     __doc__ = """
     Generalized Estimating Equations Models
 
-    GEE estimates Generalized Linear Models when the data has a
-    cluster structure and the observations are possibly correlated
-    within but not across clusters
+    GEE estimates Generalized Linear Models when the data have a
+    grouped structure, and the observations are possibly correlated
+    within groups but not between groups.
 
     Parameters
     ----------
@@ -185,7 +184,7 @@ class GEE(base.Model):
         intercept is not included by default and should be added
         by the user. See `statsmodels.tools.add_constant`.
     groups : array-like
-        A 1d array of length `nobs` containing the cluster labels.
+        A 1d array of length `nobs` containing the group labels.
     time : array-like
         A 2d array of time (or other index) values, used by some
         dependence structures to define similarity relationships among
@@ -396,6 +395,12 @@ def mean_deriv_exog(exog, params):
 
             self.mean_deriv_exog = mean_deriv_exog
 
+        # Skip the covariance updates if all groups have a single
+        # observation (reduces to fitting a GLM).
+        self._do_cov_update = True
+        if max([len(x) for x in self.endog_li]) == 1:
+            self._do_cov_update = False
+
     def cluster_list(self, array):
         """
         Returns `array` split into subarrays corresponding to the
@@ -467,28 +472,18 @@ def _update_mean_params(self):
         bmat, score = 0, 0
         for i in range(self.num_group):
 
-            if len(endog[i]) == 0:
-                continue
-
             expval, lpr = cached_means[i]
-
+            resid = endog[i] - expval
             dmat = self.mean_deriv(exog[i], lpr)
-
             sdev = np.sqrt(varfunc(expval))
-            vmat, is_cor = self.cov_struct.covariance_matrix(expval, i)
-            if is_cor:
-                vmat *= np.outer(sdev, sdev)
 
-            try:
-                vco = spl.cho_factor(vmat)
-            except np.linalg.LinAlgError:
+            rslt = self.cov_struct.covariance_matrix_solve(expval, i,
+                                                sdev, (dmat, resid))
+            if rslt is None:
                 return None, None
+            vinv_d, vinv_resid = tuple(rslt)
 
-            vinv_d = spl.cho_solve(vco, dmat)
             bmat += np.dot(dmat.T, vinv_d)
-
-            resid = endog[i] - expval
-            vinv_resid = spl.cho_solve(vco, resid)
             score += np.dot(dmat.T, vinv_resid)
 
         update = np.linalg.solve(bmat, score)
@@ -498,7 +493,7 @@ def _update_mean_params(self):
     def update_cached_means(self, mean_params):
         """
         cached_means should always contain the most recent calculation
-        of the cluster-wise mean vectors.  This function should be
+        of the group-wise mean vectors.  This function should be
         called every time the regression parameters are changed, to
         keep the cached means up to date.
         """
@@ -555,32 +550,18 @@ def _covmat(self):
         bmat, cmat = 0, 0
         for i in range(self.num_group):
 
-            if len(endog[i]) == 0:
-                continue
-
             expval, lpr = cached_means[i]
-
+            resid = endog[i] - expval
             dmat = self.mean_deriv(exog[i], lpr)
-
             sdev = np.sqrt(varfunc(expval))
-            vmat, is_cor = self.cov_struct.covariance_matrix(expval, i)
-            if is_cor:
-                vmat *= np.outer(sdev, sdev)
-
-            try:
-                vco = spl.cho_factor(vmat)
-            except np.linalg.LinAlgError:
-                warnings.warn("Singular matrix encountered in GEE "
-                              "covariance estimation",
-                              ConvergenceWarning)
-                return None, None, None, None
 
-            vinv_d = spl.cho_solve(vco, dmat)
+            rslt = self.cov_struct.covariance_matrix_solve(expval, i,
+                                                sdev, (dmat, resid))
+            if rslt is None:
+                return None, None, None, None
+            vinv_d, vinv_resid = tuple(rslt)
 
             bmat += np.dot(dmat.T, vinv_d)
-
-            resid = endog[i] - expval
-            vinv_resid = spl.cho_solve(vco, resid)
             dvinv_resid = np.dot(dmat.T, vinv_resid)
             cmat += np.outer(dvinv_resid, dvinv_resid)
 
@@ -596,38 +577,35 @@ def _covmat(self):
         bcm = 0
         for i in range(self.num_group):
 
-            if len(endog[i]) == 0:
-                continue
-
             expval, lpr = cached_means[i]
-
+            resid = endog[i] - expval
             dmat = self.mean_deriv(exog[i], lpr)
-
             sdev = np.sqrt(varfunc(expval))
-            vmat, is_cor = self.cov_struct.covariance_matrix(expval, i)
-            if is_cor:
-                vmat *= np.outer(sdev, sdev)
-            vmat *= scale
-
-            try:
-                vco = spl.cho_factor(vmat)
-            except np.linalg.LinAlgError:
-                return None, None
 
-            vinv_d = spl.cho_solve(vco, dmat)
+            rslt = self.cov_struct.covariance_matrix_solve(expval,
+                                                  i, sdev, (dmat,))
+            if rslt is None:
+                return None, None, None, None
+            vinv_d = rslt[0]
+            vinv_d /= scale
+
             hmat = np.dot(vinv_d, naive_covariance)
             hmat = np.dot(hmat, dmat.T).T
 
-            resid = endog[i] - expval
             aresid = np.linalg.solve(np.eye(len(resid)) - hmat, resid)
-            srt = np.dot(dmat.T, spl.cho_solve(vco, aresid))
+            rslt = self.cov_struct.covariance_matrix_solve(expval, i,
+                                                     sdev, (aresid,))
+            if rslt is None:
+                return None, None, None, None
+            srt = rslt[0]
+            srt = np.dot(dmat.T, srt) / scale
             bcm += np.outer(srt, srt)
 
         robust_covariance_bc = np.dot(naive_covariance,
                                       np.dot(bcm, naive_covariance))
 
-        return robust_covariance, naive_covariance, \
-            robust_covariance_bc, cmat
+        return (robust_covariance, naive_covariance,
+            robust_covariance_bc, cmat)
 
     def predict(self, params, exog=None, offset=None, linear=False):
         """
@@ -692,6 +670,7 @@ def _starting_params(self):
 
 
     def fit(self, maxiter=60, ctol=1e-6, start_params=None,
+            params_niter=1, first_dep_update=0,
             covariance_type='robust'):
         """
         Fits a GEE model.
@@ -706,12 +685,31 @@ def fit(self, maxiter=60, ctol=1e-6, start_params=None,
         start_params : array-like
             A vector of starting values for the regression
             coefficients.  If None, a default is chosen.
+        params_niter : integer
+            The number of Gauss-Seidel updates of the mean structure
+            parameters that take place prior to each update of the
+            dependence structure.
+        first_dep_update : integer
+            No dependence structure updates occur before this
+            iteration number.
         covariance_type : string
             One of "robust", "naive", or "bias_reduced".
 
         Returns
         -------
         An instance of the GEEResults class
+
+        Notes
+        -----
+        If convergence difficulties occur, increase the values of
+        `first_dep_update` and/or `params_niter`.  Setting
+        `first_dep_update` to a greater value (e.g. ~10-20) causes the
+        algorithm to move close to the GLM solution before attempting
+        to identify the dependence structure.
+
+        For the Gaussian family, there is no benefit to setting
+        `params_niter` to a value greater than 1, since the mean
+        structure parameters converge in one step.
         """
 
         self.fit_history = {'params': [],
@@ -725,19 +723,21 @@ def fit(self, maxiter=60, ctol=1e-6, start_params=None,
 
         self.update_cached_means(mean_params)
 
-        # Define here in case singularity encountered on first
-        # iteration.
-        fitlack = -1.
-
+        del_params = -1.
+        num_assoc_updates = 0
         for itr in range(maxiter):
+
             update, score = self._update_mean_params()
             if update is None:
-                warnings.warn("Singular matrix encountered in GEE  update",
+                warnings.warn("Singular matrix encountered in GEE update",
                               ConvergenceWarning)
                 break
             mean_params += update
             self.update_cached_means(mean_params)
-            fitlack = np.sqrt(np.sum(score**2))
+
+            # L2 norm of the change in mean structure parameters at
+            # this iteration.
+            del_params = np.sqrt(np.sum(score**2))
 
             self.fit_history['params'].append(mean_params.copy())
             self.fit_history['score'].append(score)
@@ -746,11 +746,15 @@ def fit(self, maxiter=60, ctol=1e-6, start_params=None,
 
             # Don't exit until the association parameters have been
             # updated at least once.
-            if fitlack < ctol and itr > 0:
+            if del_params < ctol and num_assoc_updates > 0:
                 break
-            self._update_assoc(mean_params)
 
-        if fitlack >= ctol:
+            if self._do_cov_update and (itr % params_niter) == 0\
+                   and (itr >= first_dep_update):
+                self._update_assoc(mean_params)
+                num_assoc_updates += 1
+
+        if del_params >= ctol:
             warnings.warn("Iteration limit reached prior to convergence",
                           IterationLimitWarning)
 
@@ -761,8 +765,8 @@ def fit(self, maxiter=60, ctol=1e-6, start_params=None,
 
         bcov, ncov, bc_cov, _ = self._covmat()
         if bcov is None:
-            warnings.warn("Unable to determine covariance structure "
-                          "for GEE estimates", ConvergenceWarning)
+            warnings.warn("Estimated covariance structure for GEE "
+                          "estimates is singular", ConvergenceWarning)
             return None
 
         if self.constraint is not None:
@@ -776,15 +780,15 @@ def fit(self, maxiter=60, ctol=1e-6, start_params=None,
 
         # The superclass constructor will multiply the covariance
         # matrix argument bcov by scale, which we don't want, so we
-        # divide bvov by the scale parameter here
+        # divide bcov by the scale parameter here
         results = GEEResults(self, mean_params, bcov / scale, scale)
 
         results.covariance_type = covariance_type
         results.fit_history = self.fit_history
         results.naive_covariance = ncov
         results.robust_covariance_bc = bc_cov
-        results.score_norm = fitlack
-        results.converged = (fitlack < ctol)
+        results.score_norm = del_params
+        results.converged = (del_params < ctol)
         results.cov_struct = self.cov_struct
 
         return results
diff --git a/statsmodels/genmod/tests/test_gee.py b/statsmodels/genmod/tests/test_gee.py
index 4d7114b4d6b..32e6f42f64c 100644
--- a/statsmodels/genmod/tests/test_gee.py
+++ b/statsmodels/genmod/tests/test_gee.py
@@ -11,14 +11,13 @@
 from __future__ import print_function
 from statsmodels.compat import lrange
 import numpy as np
-
 import os
 from numpy.testing import assert_almost_equal
-from statsmodels.genmod.generalized_estimating_equations import GEE,\
-    GEEMargins, Multinomial
-from statsmodels.genmod.families import Gaussian,Binomial,Poisson
-from statsmodels.genmod.dependence_structures import Exchangeable,\
-    Independence,GlobalOddsRatio,Autoregressive,Nested
+from statsmodels.genmod.generalized_estimating_equations import (GEE,
+    GEEMargins, Multinomial)
+from statsmodels.genmod.families import Gaussian, Binomial, Poisson
+from statsmodels.genmod.dependence_structures import (Exchangeable,
+    Independence, GlobalOddsRatio, Autoregressive, Nested)
 import pandas as pd
 import statsmodels.formula.api as sm
 
@@ -251,6 +250,50 @@ def test_logistic(self):
         # Check for run-time exceptions in summary
         # print(mdf.summary())
 
+    def test_autoregressive(self):
+
+        dep_params_true = [0, 0.589208623896, 0.559823804948]
+
+        params_true = [[1.08043787, 1.12709319, 0.90133927],
+                       [0.9613677, 1.05826987, 0.90832055],
+                       [1.05370439, 0.96084864, 0.93923374]]
+
+        np.random.seed(342837482)
+
+        num_group = 100
+        ar_param = 0.5
+        k = 3
+
+        ga = Gaussian()
+
+        for gsize in 1,2,3:
+
+            ix = np.arange(gsize)[:,None] - np.arange(gsize)[None,:]
+            ix = np.abs(ix)
+            cmat = ar_param ** ix
+            cmat_r = np.linalg.cholesky(cmat)
+
+            endog = []
+            exog = []
+            groups = []
+            for i in range(num_group):
+                x = np.random.normal(size=(gsize,k))
+                exog.append(x)
+                expval = x.sum(1)
+                errors = np.dot(cmat_r, np.random.normal(size=gsize))
+                endog.append(expval + errors)
+                groups.append(i*np.ones(gsize))
+
+            endog = np.concatenate(endog)
+            groups = np.concatenate(groups)
+            exog = np.concatenate(exog, axis=0)
+
+            ar = Autoregressive()
+            md = GEE(endog, exog, groups, family=ga, cov_struct = ar)
+            mdf = md.fit()
+
+            assert_almost_equal(ar.dep_params, dep_params_true[gsize-1])
+            assert_almost_equal(mdf.params, params_true[gsize-1])
 
     def test_post_estimation(self):
 
@@ -316,7 +359,7 @@ def test_linear(self):
               [0.0440369906460754,0.0480069787567662,
                0.049519758758187,0.0479760443027526]]
 
-        for j,v in enumerate((vi,ve)):
+        for j,v in enumerate((vi, ve)):
             md = GEE(endog, exog, group, None, family, v)
             mdf = md.fit()
             assert_almost_equal(mdf.params, cf[j], decimal=10)