From 5efbaa774f567b54090402a04f0888b92a2b7cb6 Mon Sep 17 00:00:00 2001
From: Kerby Shedden <kshedden@umich.edu>
Date: Mon, 4 May 2015 23:10:52 -0400
Subject: [PATCH 01/27] Initial commit of generic elastic net code

---
 statsmodels/base/elastic_net.py               | 255 ++++++++++++++
 statsmodels/datasets/cpunish/R_cpunish.s      |   4 +
 statsmodels/duration/hazard_regression.py     | 172 +++-------
 statsmodels/duration/tests/test_phreg.py      |   2 +-
 statsmodels/genmod/families/family.py         |   2 +-
 .../genmod/generalized_linear_model.py        | 121 ++++++-
 .../genmod/tests/results/results_glm.py       |  16 +
 statsmodels/genmod/tests/test_glm.py          |  47 ++-
 statsmodels/regression/linear_model.py        | 321 +++++++++++-------
 .../regression/tests/lasso_r_results.R        |   2 -
 .../regression/tests/test_regression.py       |   3 +
 11 files changed, 660 insertions(+), 285 deletions(-)
 create mode 100644 statsmodels/base/elastic_net.py

diff --git a/statsmodels/base/elastic_net.py b/statsmodels/base/elastic_net.py
new file mode 100644
index 00000000000..1b3d340b018
--- /dev/null
+++ b/statsmodels/base/elastic_net.py
@@ -0,0 +1,255 @@
+import numpy as np
+import statsmodels.base.wrapper as wrap
+
+
+# All the negative smooth penalized log-likelihood functions.  These
+# functions only account for the likelihod and any smooth penalty.
+# The non-smooth penalty is not accounted for here.
+def _gen_npfuncs(k, L1_wt, alpha, loglike_kwds, score_kwds, hess_kwds):
+
+    def nploglike(params, model):
+        nobs = model.nobs
+        pen = alpha[k] * (1 - L1_wt) * np.sum(params**2) / 2
+        llf = model.loglike(np.r_[params], **loglike_kwds)
+        return - llf / nobs + pen
+
+    def npscore(params, model):
+        nobs = model.nobs
+        l2_grad = alpha[k] * (1 - L1_wt) * params
+        gr = -model.score(np.r_[params], **score_kwds)[0] / nobs
+        return gr + l2_grad
+
+    def nphess(params, model):
+        nobs = model.nobs
+        pen_hess = alpha[k] * (1 - L1_wt)
+        return -model.hessian(np.r_[params], **hess_kwds)[0,0] / nobs + pen_hess
+
+    return nploglike, npscore, nphess
+
+
+
+def _fit(model, method="coord_descent", maxiter=100, alpha=0.,
+         L1_wt=1., start_params=None, cnvrg_tol=1e-7, zero_tol=1e-8,
+         return_object=False, loglike_kwds=None, score_kwds=None,
+         hess_kwds=None, **kwargs):
+    """
+    Return a regularized fit to a regression model.
+
+    Parameters
+    ----------
+    model : ...
+        ...
+    method :
+        Only the coordinate descent algorithm is implemented.
+    maxiter : integer
+        The maximum number of iteration cycles (an iteration cycle
+        involves running coordinate descent on all variables).
+    alpha : scalar or array-like
+        The penalty weight.  If a scalar, the same penalty weight
+        applies to all variables in the model.  If a vector, it
+        must have the same length as `params`, and contains a
+        penalty weight for each coefficient.
+    L1_wt : scalar
+        The fraction of the penalty given to the L1 penalty term.
+        Must be between 0 and 1 (inclusive).  If 0, the fit is
+        a ridge fit, if 1 it is a lasso fit.
+    start_params : array-like
+        Starting values for `params`.
+    cnvrg_tol : scalar
+        If `params` changes by less than this amount (in sup-norm)
+        in once iteration cycle, the algorithm terminates with
+        convergence.
+    zero_tol : scalar
+        Any estimated coefficient smaller than this value is
+        replaced with zero.
+    return_object : bool
+        If False, only the parameter estimates are returned.
+
+    Returns
+    -------
+    A results object of the same type returned by `fit`.
+
+    Notes
+    -----
+    The ``elastic net`` penalty is a combination of L1 and L2
+    penalties.
+
+    The function that is minimized is:
+
+    -loglike/n + alpha*((1-L1_wt)*|params|_2^2/2 + L1_wt*|params|_1)
+
+    where |*|_1 and |*|_2 are the L1 and L2 norms.
+
+    The computational approach used here is to obtain a quadratic
+    approximation to the smooth part of the target function:
+
+    -loglike/n + alpha*(1-L1_wt)*|params|_2^2/2
+
+    then optimize the L1 penalized version of this function along
+    a coordinate axis.
+
+    This is a generic implementation that may be reimplemented in
+    specific models for better performance.
+    """
+
+    k_exog = model.exog.shape[1]
+    n_exog = model.exog.shape[0]
+
+    loglike_kwds = {} if loglike_kwds is None else loglike_kwds
+    score_kwds = {} if score_kwds is None else score_kwds
+    hess_kwds = {} if hess_kwds is None else hess_kwds
+
+    if np.isscalar(alpha):
+        alpha = alpha * np.ones(k_exog)
+
+    # Define starting params
+    if start_params is None:
+        params = np.zeros(k_exog)
+    else:
+        params = start_params.copy()
+
+    converged = False
+    btol = 1e-8
+    params_zero = np.zeros(len(params), dtype=bool)
+
+    init_args = {k : getattr(model, k) for k in model._init_keys
+                 if k != "offset" and hasattr(model, k)}
+
+    fgh_list = [_gen_npfuncs(k, L1_wt, alpha, loglike_kwds, score_kwds, hess_kwds)
+                for k in range(k_exog)]
+
+    for itr in range(maxiter):
+
+        # Sweep through the parameters
+        params_save = params.copy()
+        for k in range(k_exog):
+
+            # Under the active set method, if a parameter becomes
+            # zero we don't try to change it again.
+            if params_zero[k]:
+                continue
+
+            # Set the offset to account for the variables that are
+            # being held fixed in the current coordinate
+            # optimization.
+            params0 = params.copy()
+            params0[k] = 0
+            offset = np.dot(model.exog, params0)
+            if hasattr(model, "offset") and model.offset is not None:
+                offset += model.offset
+
+            # Create a one-variable model for optimization.
+            model_1var = model.__class__(model.endog, model.exog[:, k], offset=offset,
+                                         **init_args)
+
+            func, grad, hess = fgh_list[k]
+            params[k] = _opt_1d(func, grad, hess, model_1var, params[k], alpha[k]*L1_wt, tol=btol)
+
+            # Update the active set
+            if itr > 0 and np.abs(params[k]) < zero_tol:
+                params_zero[k] = True
+                params[k] = 0.
+
+        # Check for convergence
+        pchange = np.max(np.abs(params - params_save))
+        if pchange < cnvrg_tol:
+            converged = True
+            break
+
+    # Set approximate zero coefficients to be exactly zero
+    params *= np.abs(params) >= zero_tol
+
+    if not return_object:
+        return params
+
+    # Fit the reduced model to get standard errors and other
+    # post-estimation results.
+    ii = np.flatnonzero(params)
+    cov = np.zeros((k_exog, k_exog))
+    if len(ii) > 0:
+        model1 = model.__class__(model.endog, model.exog[:, ii],
+                               **kwargs)
+        rslt = model1.fit()
+        cov[np.ix_(ii, ii)] = rslt.normalized_cov_params
+    else:
+        model1 = model.__class__(model.endog, model.exog[:, 0], **kwargs)
+        rslt = model1.fit()
+        cov[np.ix_(ii, ii)] = rslt.normalized_cov_params
+
+    # fit may return a results or a results wrapper
+    if issubclass(rslt.__class__, wrap.ResultsWrapper):
+        klass = rslt._results.__class__
+    else:
+        klass = rslt.__class__
+
+    # Not all models have a scale
+    if hasattr(rslt, 'scale'):
+        scale = rslt.scale
+    else:
+        scale = 1.
+
+    refit = klass(model, params, cov, scale=scale)
+    refit.regularized = True
+
+    return refit
+
+
+def _opt_1d(func, grad, hess, model, start, L1_wt, tol):
+    """
+    One-dimensional helper for elastic net.
+
+    Parameters:
+    -----------
+    func : function
+        A smooth function of a single variable to be optimized
+        with L1 penaty.
+    grad : function
+        The gradient of `func`.
+    hess : function
+        The Hessian of `func`.
+    model : statsmodels model
+        The model being fit.
+    start : real
+        A starting value for the function argument
+    L1_wt : non-negative real
+        The weight for the L1 penalty function.
+    tol : non-negative real
+        A convergence threshold.
+
+    Notes
+    -----
+    ``func``, ``grad``, and ``hess`` have arguments (x, model), where
+    ``x`` is a point in the parameter space and ``model`` is the model
+    being fit.
+
+    If the log-likelihood for the model is exactly quadratic, the
+    global minimum is returned in one step.  Otherwise numerical
+    bisection is used.
+
+    Returns:
+    --------
+    The argmin of the objective function.
+    """
+
+    from scipy.optimize import brent
+
+    x = start
+    f = func(x, model)
+    b = grad(x, model)
+    c = hess(x, model)
+    d = b - c*x
+
+    if L1_wt > np.abs(d):
+        return 0.
+
+    if d >= 0:
+        h = (L1_wt - b) / c
+    elif d < 0:
+        h = -(L1_wt + b) / c
+
+    f1 = func(x + h, model) + L1_wt*np.abs(x + h)
+    if f1 <= f + L1_wt*np.abs(x) + 1e-10:
+        return x + h
+
+    x_opt = brent(func, args=(model,), brack=(x-1, x+1), tol=tol)
+    return x_opt
diff --git a/statsmodels/datasets/cpunish/R_cpunish.s b/statsmodels/datasets/cpunish/R_cpunish.s
index 6eedd015a44..077bd7d0610 100644
--- a/statsmodels/datasets/cpunish/R_cpunish.s
+++ b/statsmodels/datasets/cpunish/R_cpunish.s
@@ -9,3 +9,7 @@ results <- summary.glm(m1)
 results
 results['coefficients']
 
+# Model with exposure
+m2 <- glm(EXECUTIONS ~ INCOME + PERPOVERTY + PERBLACK + LN_VC100k96 + SOUTH + DEGREE,
+    family=poisson, offset=rep(log(100), length(EXECUTIONS)))
+results2 <- summary.glm(m2)
diff --git a/statsmodels/duration/hazard_regression.py b/statsmodels/duration/hazard_regression.py
index fc574464f62..de3312ced62 100644
--- a/statsmodels/duration/hazard_regression.py
+++ b/statsmodels/duration/hazard_regression.py
@@ -322,8 +322,10 @@ def __init__(self, endog, exog, status=None, entry=None,
             self.offset = np.asarray(self.offset)
 
         self.surv = PHSurvivalTime(self.endog, self.status,
-                                   self.exog, self.strata,
-                                   self.entry, self.offset)
+                                    self.exog, self.strata,
+                                    self.entry, self.offset)
+        self.nobs = len(self.endog)
+        self.groups = None
 
         # TODO: not used?
         self.missing = missing
@@ -444,37 +446,22 @@ def fit(self, groups=None, **args):
         return results
 
 
-    def fit_regularized(self, method="coord_descent", maxiter=100,
-                        alpha=0., L1_wt=1., start_params=None,
-                        cnvrg_tol=1e-7, zero_tol=1e-8, **kwargs):
+    def fit_regularized(self, method="elastic_net", alpha=0.,
+                        start_params=None, **kwargs):
         """
         Return a regularized fit to a linear regression model.
 
         Parameters
         ----------
         method :
-            Only the coordinate descent algorithm is implemented.
-        maxiter : integer
-            The maximum number of iteration cycles (an iteration cycle
-            involves running coordinate descent on all variables).
+            Only the `elastic_net` approach is currently implemented.
         alpha : scalar or array-like
             The penalty weight.  If a scalar, the same penalty weight
             applies to all variables in the model.  If a vector, it
             must have the same length as `params`, and contains a
             penalty weight for each coefficient.
-        L1_wt : scalar
-            The fraction of the penalty given to the L1 penalty term.
-            Must be between 0 and 1 (inclusive).  If 0, the fit is
-            a ridge fit, if 1 it is a lasso fit.
         start_params : array-like
             Starting values for `params`.
-        cnvrg_tol : scalar
-            If `params` changes by less than this amount (in sup-norm)
-            in once iteration cycle, the algorithm terminates with
-            convergence.
-        zero_tol : scalar
-            Any estimated coefficient smaller than this value is
-            replaced with zero.
 
         Returns
         -------
@@ -482,8 +469,8 @@ def fit_regularized(self, method="coord_descent", maxiter=100,
 
         Notes
         -----
-        The penalty is the"elastic net" penalty, which
-        is a convex combination of L1 and L2 penalties.
+        The penalty is the ``elastic net`` penalty, which is a
+        combination of L1 and L2 penalties.
 
         The function that is minimized is: ..math::
 
@@ -493,121 +480,37 @@ def fit_regularized(self, method="coord_descent", maxiter=100,
 
         Post-estimation results are based on the same data used to
         select variables, hence may be subject to overfitting biases.
-        """
 
-        k_exog = self.exog.shape[1]
-        n_exog = self.exog.shape[0]
+        The elastic_net method uses the following keyword arguments:
 
-        if np.isscalar(alpha):
-            alpha = alpha * np.ones(k_exog, dtype=np.float64)
+        maxiter : int
+            Maximum number of iterations
+        L1_wt  : float
+            Must be in [0, 1].  The L1 penalty has weight L1_wt and the
+            L2 penalty has weight 1 - L1_wt.
+        cnvrg_tol : float
+            Convergence threshold for line searches
+        zero_tol : float
+            Coefficients below this threshold are treated as zero.
+        """
 
-        # regularization cannot be used with groups
-        self.groups = None
+        from statsmodels.base import elastic_net
 
-        # Define starting params
-        if start_params is None:
-            params = np.zeros(k_exog, dtype=np.float64)
-        else:
-            params = start_params.copy()
+        if method != "elastic_net":
+            raise ValueError("method for fit_regularied must be elastic_net")
 
-        # Maybe could be a shallow copy, but just in case...
-        import copy
-        surv = copy.deepcopy(self.surv)
+        defaults = {"maxiter" : 50, "L1_wt" : 1, "cnvrg_tol" : 1e-10,
+                    "zero_tol" : 1e-10}
+        for ky in defaults:
+            if ky not in kwargs:
+                kwargs[ky] = defaults[ky]
+
+        return elastic_net._fit(self, method=method,
+                                alpha=alpha,
+                                start_params=start_params,
+                                return_object=True,
+                                **kwargs)
 
-        # This is the base offset, onto which the effects of
-        # constrained variables are added.
-        if self.offset is None:
-            offset_s_base = [np.zeros(len(x)) for x in surv.stratum_rows]
-            surv.offset_s = [x.copy() for x in offset_s_base]
-        else:
-            offset_s_base = [x.copy() for x in surv.offset_s]
-
-        # Create a model instance for optimizing a single variable
-        model_1var = copy.deepcopy(self)
-        model_1var.surv = surv
-        model_1var.ties = self.ties
-
-        # All the negative penalized loglikeihood functions.
-        def gen_npfuncs(k):
-            def nploglike(params):
-                pen = alpha[k]*((1 - L1_wt)*params**2/2 + L1_wt*np.abs(params))
-                return -model_1var.loglike(np.r_[params]) / n_exog + pen
-            def npscore(params):
-                pen_grad = alpha[k]*(1 - L1_wt)*params
-                return -model_1var.score(np.r_[params])[0] / n_exog + pen_grad
-            def nphess(params):
-                pen_hess = alpha[k]*(1 - L1_wt)
-                return -model_1var.hessian(np.r_[params])[0,0] / n_exog + pen_hess
-            return nploglike, npscore, nphess
-        nploglike_funcs = [gen_npfuncs(k) for k in range(len(params))]
-
-        # 1-dimensional exog's
-        exog_s = []
-        for k in range(k_exog):
-            ex = [x[:, k][:, None] for x in surv.exog_s]
-            exog_s.append(ex)
-
-        converged = False
-        btol = 1e-8
-        params_zero = np.zeros(len(params), dtype=bool)
-
-        for itr in range(maxiter):
-
-            # Sweep through the parameters
-            params_save = params.copy()
-            for k in range(k_exog):
-
-                # Under the active set method, if a parameter becomes
-                # zero we don't try to change it again.
-                if params_zero[k]:
-                    continue
-
-                # Set exog to include only the variable whose effect
-                # is being estimated.
-                surv.exog_s = exog_s[k]
-
-                # Set the offset to account for the variables that are
-                # being held fixed.
-                params0 = params.copy()
-                params0[k] = 0
-                for stx in range(self.surv.nstrat):
-                    v = np.dot(self.surv.exog_s[stx], params0)
-                    surv.offset_s[stx] = offset_s_base[stx] + v
-
-                params[k] = _opt_1d(nploglike_funcs[k], params[k],
-                                    alpha[k]*L1_wt, tol=btol)
-
-                # Update the active set
-                if itr > 0 and np.abs(params[k]) < zero_tol:
-                    params_zero[k] = True
-                    params[k] = 0.
-
-            # Check for convergence
-            pchange = np.max(np.abs(params - params_save))
-            if pchange < cnvrg_tol:
-                converged = True
-                break
-
-        # Set approximate zero coefficients to be exactly zero
-        params *= np.abs(params) >= zero_tol
-
-        # Fit the reduced model to get standard errors and other
-        # post-estimation results.
-        ii = np.flatnonzero(params)
-        cov = np.zeros((k_exog, k_exog), dtype=np.float64)
-        if len(ii) > 0:
-            model = self.__class__(self.endog, self.exog[:, ii],
-                                   status=self.status, entry=self.entry,
-                                   strata=self.strata, offset=self.offset,
-                                   ties=self.ties, missing=self.missing)
-            rslt = model.fit()
-            cov[np.ix_(ii, ii)] = rslt.normalized_cov_params
-
-        rfit = PHRegResults(self, params, cov_params=cov)
-        rfit.converged = converged
-        rfit.regularized = True
-
-        return rfit
 
     def loglike(self, params):
         """
@@ -1462,13 +1365,16 @@ class PHRegResults(base.LikelihoodModelResults):
     statsmodels.LikelihoodModelResults
     '''
 
-    def __init__(self, model, params, cov_params, covariance_type="naive"):
+    def __init__(self, model, params, cov_params, scale=1., covariance_type="naive"):
+
+        # There is no scale parameter, but we need it for
+        # meta-procedures that work with results.
 
         self.covariance_type = covariance_type
         self.df_resid = model.df_resid
         self.df_model = model.df_model
 
-        super(PHRegResults, self).__init__(model, params,
+        super(PHRegResults, self).__init__(model, params, scale=1.,
            normalized_cov_params=cov_params)
 
     @cache_readonly
diff --git a/statsmodels/duration/tests/test_phreg.py b/statsmodels/duration/tests/test_phreg.py
index 0bdb240a6e2..e27ebdefa56 100644
--- a/statsmodels/duration/tests/test_phreg.py
+++ b/statsmodels/duration/tests/test_phreg.py
@@ -350,7 +350,7 @@ def test_fit_regularized(self):
                 rslt = mod.fit_regularized(alpha=s)
 
                 # The agreement isn't very high, the issue may be on
-                # their side.  They seem to use some approximations
+                # the R side.  They seem to use some approximations
                 # that we are not using.
                 assert_allclose(rslt.params, coef, rtol=0.3)
 
diff --git a/statsmodels/genmod/families/family.py b/statsmodels/genmod/families/family.py
index c3d1d6a0357..d77e4aa2fdc 100644
--- a/statsmodels/genmod/families/family.py
+++ b/statsmodels/genmod/families/family.py
@@ -1117,7 +1117,7 @@ def resid_dev(self, endog, mu, scale=1.):
         -----
         .. math::
 
-           resid\_dev_i = sign(Y_i - \mu_i) * 
+           resid\_dev_i = sign(Y_i - \mu_i) *
                           \sqrt {(Y_i - \mu_i)^2 / (Y_i * \mu_i^2)} / scale
         """
         return np.sign(endog-mu) * np.sqrt((endog-mu)**2/(endog*mu**2))/scale
diff --git a/statsmodels/genmod/generalized_linear_model.py b/statsmodels/genmod/generalized_linear_model.py
index 8301ebe5e23..5e9249111de 100644
--- a/statsmodels/genmod/generalized_linear_model.py
+++ b/statsmodels/genmod/generalized_linear_model.py
@@ -269,6 +269,30 @@ def __init__(self, endog, exog, family=None, offset=None,
         # register kwds for __init__, offset and exposure are added by super
         self._init_keys.append('family')
 
+        self.nobs = len(endog)
+
+        # Construct a combined offset/exposure term.  Note that
+        # exposure has already been logged if present.
+        offset_exposure = 0.
+        if hasattr(self, 'offset'):
+            offset_exposure = self.offset
+        if hasattr(self, 'exposure'):
+            offset_exposure = offset_exposure + self.exposure
+        self._offset_exposure = offset_exposure
+
+        # Set here so the the likelihood, score and Hessian can be
+        # evaluated without fitting the model.
+        self.scaletype = None
+
+        # Set the data weights
+        if endog.ndim > 1 and endog.shape[1] == 2:
+            data_weights = endog.sum(1)  # weights are total trials
+        else:
+            data_weights = np.ones((endog.shape[0]))
+        self.data_weights = data_weights
+        if np.shape(self.data_weights) == () and self.data_weights > 1:
+            self.data_weights = self.data_weights * np.ones((endog.shape[0]))
+
 
     def initialize(self):
         """
@@ -489,6 +513,7 @@ def hessian_factor(self, params, scale=None, observed=True):
 
         return oim_factor
 
+
     def hessian(self, params, scale=None, observed=True):
         """Hessian, second derivative of loglikelihood function
 
@@ -661,7 +686,7 @@ def estimate_scale(self, mu):
     def estimate_tweedie_power(self, mu, method='brentq', low=1.01, high=5.):
         """
         Tweedie specific function to estimate scale and the variance parameter.
-        The variance parameter is also referred to as p, xi, or shape.        
+        The variance parameter is also referred to as p, xi, or shape.
 
         Parameters
         ----------
@@ -733,7 +758,7 @@ def predict(self, params, exog=None, exposure=None, offset=None,
 
         # Use fit offset if appropriate
         if offset is None and exog is None and hasattr(self, 'offset'):
-            offset = getattr(self, 'offset')
+            offset = self.offset
         elif offset is None:
             offset = 0.
 
@@ -744,7 +769,7 @@ def predict(self, params, exog=None, exposure=None, offset=None,
         # Use fit exposure if appropriate
         if exposure is None and exog is None and hasattr(self, 'exposure'):
             # Already logged
-            exposure = getattr(self, 'exposure')
+            exposure = self.exposure
         elif exposure is None:
             exposure = 0.
         else:
@@ -806,7 +831,6 @@ def get_distribution(self, params, scale=1, exog=None, exposure=None,
             raise ValueError("get_distribution not implemented for %s" % self.family.name)
 
 
-
     def fit(self, start_params=None, maxiter=100, method='IRLS', tol=1e-8,
             scale=None, cov_type='nonrobust', cov_kwds=None, use_t=None,
             full_output=True, disp=False, max_start_irls=3, **kwargs):
@@ -869,15 +893,6 @@ def fit(self, start_params=None, maxiter=100, method='IRLS', tol=1e-8,
 
         self.scaletype = scale
 
-        # Construct a combined offset/exposure term.  Note that
-        # exposure has already been logged if present.
-        offset_exposure = 0.
-        if hasattr(self, 'offset'):
-            offset_exposure = self.offset
-        if hasattr(self, 'exposure'):
-            offset_exposure = offset_exposure + self.exposure
-        self._offset_exposure = offset_exposure
-
         if method.lower() == "irls":
             return self._fit_irls(start_params=start_params, maxiter=maxiter,
                                   tol=tol, scale=scale, cov_type=cov_type,
@@ -999,6 +1014,77 @@ def _fit_irls(self, start_params=None, maxiter=100, tol=1e-8,
         return GLMResultsWrapper(glm_results)
 
 
+    def fit_regularized(self, method="elastic_net", alpha=0.,
+                        start_params=None, **kwargs):
+        """
+        Return a regularized fit to a linear regression model.
+
+        Parameters
+        ----------
+        method :
+            Only the `elastic_net` approach is currently implemented.
+        alpha : scalar or array-like
+            The penalty weight.  If a scalar, the same penalty weight
+            applies to all variables in the model.  If a vector, it
+            must have the same length as `params`, and contains a
+            penalty weight for each coefficient.
+        start_params : array-like
+            Starting values for `params`.
+
+        Returns
+        -------
+        A GLMResults object, of the same type returned by `fit`.
+
+        Notes
+        -----
+        The penalty is the ``elastic net`` penalty, which is a
+        combination of L1 and L2 penalties.
+
+        The function that is minimized is: ..math::
+
+            -loglike/n + alpha*((1-L1_wt)*|params|_2^2/2 + L1_wt*|params|_1)
+
+        where :math:`|*|_1` and :math:`|*|_2` are the L1 and L2 norms.
+
+        Post-estimation results are based on the same data used to
+        select variables, hence may be subject to overfitting biases.
+
+        The elastic_net method uses the following keyword arguments:
+
+        maxiter : int
+            Maximum number of iterations
+        L1_wt  : float
+            Must be in [0, 1].  The L1 penalty has weight L1_wt and the
+            L2 penalty has weight 1 - L1_wt.
+        cnvrg_tol : float
+            Convergence threshold for line searches
+        zero_tol : float
+            Coefficients below this threshold are treated as zero.
+        """
+
+        from statsmodels.base import elastic_net
+
+        if method != "elastic_net":
+            raise ValueError("method for fit_regularied must be elastic_net")
+
+        defaults = {"maxiter" : 50, "L1_wt" : 1, "cnvrg_tol" : 1e-10,
+                    "zero_tol" : 1e-10}
+        for ky in defaults:
+            if ky not in kwargs:
+                kwargs[ky] = defaults[ky]
+
+        result = elastic_net._fit(self, method=method,
+                                  alpha=alpha,
+                                  start_params=start_params,
+                                  return_object=True,
+                                  **kwargs)
+
+        self.mu = self.predict(result.params)
+        self.scale = self.estimate_scale(self.mu)
+
+        return result
+
+
     def fit_constrained(self, constraints, start_params=None, **fit_kwds):
         """fit the model subject to linear equality constraints
 
@@ -1209,11 +1295,11 @@ def resid_working(self):
 
     @cache_readonly
     def resid_anscombe(self):
-        return self.family.resid_anscombe(self._endog, self.mu)
+        return self.family.resid_anscombe(self._endog, self.fittedvalues)
 
     @cache_readonly
     def resid_deviance(self):
-        return self.family.resid_dev(self._endog, self.mu)
+        return self.family.resid_dev(self._endog, self.fittedvalues)
 
     @cache_readonly
     def pearson_chi2(self):
@@ -1224,8 +1310,11 @@ def pearson_chi2(self):
 
     @cache_readonly
     def fittedvalues(self):
+        if not hasattr(self, "mu"):
+            self.mu = self.model.predict(self.params)
         return self.mu
 
+
     @cache_readonly
     def null(self):
         endog = self._endog
@@ -1237,7 +1326,7 @@ def null(self):
         if hasattr(model, 'exposure'):
             kwargs['exposure'] = model.exposure
         if len(kwargs) > 0:
-            return GLM(endog, exog, family=self.family, **kwargs).fit().mu
+            return GLM(endog, exog, family=self.family, **kwargs).fit().fittedvalues
         else:
             wls_model = lm.WLS(endog, exog,
                                weights=self._freq_weights * self._n_trials)
diff --git a/statsmodels/genmod/tests/results/results_glm.py b/statsmodels/genmod/tests/results/results_glm.py
index 713c83fb396..6103f8d282c 100644
--- a/statsmodels/genmod/tests/results/results_glm.py
+++ b/statsmodels/genmod/tests/results/results_glm.py
@@ -1104,6 +1104,22 @@ def __init__(self):
             1.8473219,  2.8643241,  3.1211989,  3.3382067,  2.5269969,
             0.8972542, 0.9793332,  0.5346209,  1.9790936])
 
+
+class Cpunish_offset(Cpunish):
+    '''
+    Same model as Cpunish but with offset of 100.  Many things do not change.
+    '''
+    def __init__(self):
+        super(Cpunish_offset, self).__init__()
+
+        self.params = (-1.140665e+01, 2.611017e-04, 7.781801e-02,
+                       -9.493111e-02, 2.969349e-01, 2.301183e+00,
+                       -1.872207e+01)
+        self.bse = (4.147e+00, 5.187e-05, 7.940e-02, 2.292e-02,
+                    4.375e-01, 4.284e-01, 4.284e+00)
+
+
+
 class InvGauss(object):
     '''
     Usef
diff --git a/statsmodels/genmod/tests/test_glm.py b/statsmodels/genmod/tests/test_glm.py
index f46c801a9fc..f9b6b87398c 100644
--- a/statsmodels/genmod/tests/test_glm.py
+++ b/statsmodels/genmod/tests/test_glm.py
@@ -88,7 +88,7 @@ def test_aic_Stata(self):
         if isinstance(self.res1.model.family, (sm.families.Gamma,
             sm.families.InverseGaussian)):
             llf = self.res1.model.family.loglike(self.res1.model.endog,
-                    self.res1.mu, self.res1.model.freq_weights, scale=1)
+                                                 self.res1.mu, self.res1.model.freq_weights, scale=1)
             aic = (-2*llf+2*(self.res1.df_model+1))/self.res1.nobs
         else:
             aic = self.res1.aic/self.res1.nobs
@@ -111,7 +111,7 @@ def test_loglike(self):
         if isinstance(self.res1.model.family, (sm.families.Gamma,
             sm.families.InverseGaussian)):
             llf = self.res1.model.family.loglike(self.res1.model.endog,
-                    self.res1.mu, self.res1.model.freq_weights, scale=1)
+                                                 self.res1.mu, self.res1.model.freq_weights, scale=1)
         else:
             llf = self.res1.llf
         assert_almost_equal(llf, self.res2.llf, self.decimal_loglike)
@@ -629,19 +629,20 @@ def __init__(self):
 class TestGlmPoissonOffset(CheckModelResultsMixin):
     @classmethod
     def setupClass(cls):
-        from .results.results_glm import Cpunish
+        from .results.results_glm import Cpunish_offset
         from statsmodels.datasets.cpunish import load
+        cls.decimal_params = DECIMAL_4
+        cls.decimal_bse = DECIMAL_4
+        cls.decimal_aic_R = 3
         data = load()
         data.exog[:,3] = np.log(data.exog[:,3])
-        data.exog = add_constant(data.exog, prepend=False)
+        data.exog = add_constant(data.exog, prepend=True)
         exposure = [100] * len(data.endog)
         cls.data = data
         cls.exposure = exposure
         cls.res1 = GLM(data.endog, data.exog, family=sm.families.Poisson(),
                     exposure=exposure).fit()
-        cls.res1.params[-1] += np.log(100) # add exposure back in to param
-                                            # to make the results the same
-        cls.res2 = Cpunish()
+        cls.res2 = Cpunish_offset()
 
     def test_missing(self):
         # make sure offset is dropped correctly
@@ -1619,6 +1620,38 @@ def testTweediePowerEstimate():
     # assert_allclose(res1.scale, np.exp(res2.params[0]), rtol=0.25)
     p = model1.estimate_tweedie_power(res1.mu)
     assert_allclose(p, res2.params[1], rtol=0.25)
+=======
+class TestRegularized(object):
+
+    def test_regularized(self):
+
+        import os
+        from . import glmnet_r_results
+
+        for dtype in "binomial", "poisson":
+
+            cur_dir = os.path.dirname(os.path.abspath(__file__))
+            data = np.loadtxt(os.path.join(cur_dir, "results", "enet_%s.csv" % dtype),
+                              delimiter=",")
+
+            endog = data[:, 0]
+            exog = data[:, 1:]
+
+            fam = {"binomial" : sm.families.Binomial,
+                   "poisson" : sm.families.Poisson}[dtype]
+
+            for j in range(9):
+
+                vn = "rslt_%s_%d" % (dtype, j)
+                r_result = getattr(glmnet_r_results, vn)
+                L1_wt = r_result[0]
+                alpha = r_result[1]
+                params = r_result[2:]
+
+                model = GLM(endog, exog, family=fam())
+                sm_result = model.fit_regularized(L1_wt=L1_wt, alpha=alpha)
+
+                assert_allclose(params, sm_result.params, atol=1e-2, rtol=0.3)
 
 
 if __name__=="__main__":
diff --git a/statsmodels/regression/linear_model.py b/statsmodels/regression/linear_model.py
index aa1a5320c40..d5ac6db56bb 100644
--- a/statsmodels/regression/linear_model.py
+++ b/statsmodels/regression/linear_model.py
@@ -232,123 +232,6 @@ def fit(self, method="pinv", cov_type='nonrobust', cov_kwds=None,
                        **kwargs)
         return RegressionResultsWrapper(lfit)
 
-    def fit_regularized(self, method="coord_descent", maxiter=1000,
-                        alpha=0., L1_wt=1., start_params=None,
-                        cnvrg_tol=1e-8, zero_tol=1e-8, **kwargs):
-        """
-        Return a regularized fit to a linear regression model.
-
-        Parameters
-        ----------
-        method : string
-            Only the coordinate descent algorithm is implemented.
-        maxiter : integer
-            The maximum number of iteration cycles (an iteration cycle
-            involves running coordinate descent on all variables).
-        alpha : scalar or array-like
-            The penalty weight.  If a scalar, the same penalty weight
-            applies to all variables in the model.  If a vector, it
-            must have the same length as `params`, and contains a
-            penalty weight for each coefficient.
-        L1_wt : scalar
-            The fraction of the penalty given to the L1 penalty term.
-            Must be between 0 and 1 (inclusive).  If 0, the fit is
-            ridge regression.  If 1, the fit is the lasso.
-        start_params : array-like
-            Starting values for ``params``.
-        cnvrg_tol : scalar
-            If ``params`` changes by less than this amount (in sup-norm)
-            in once iteration cycle, the algorithm terminates with
-            convergence.
-        zero_tol : scalar
-            Any estimated coefficient smaller than this value is
-            replaced with zero.
-
-        Returns
-        -------
-        A RegressionResults object, of the same type returned by
-        ``fit``.
-
-        Notes
-        -----
-        The approach closely follows that implemented in the glmnet
-        package in R.  The penalty is the "elastic net" penalty, which
-        is a convex combination of L1 and L2 penalties.
-
-        The function that is minimized is: ..math::
-
-            0.5*RSS/n + alpha*((1-L1_wt)*|params|_2^2/2 + L1_wt*|params|_1)
-
-        where RSS is the usual regression sum of squares, n is the
-        sample size, and :math:`|*|_1` and :math:`|*|_2` are the L1 and L2
-        norms.
-
-        Post-estimation results are based on the same data used to
-        select variables, hence may be subject to overfitting biases.
-
-        References
-        ----------
-        Friedman, Hastie, Tibshirani (2008).  Regularization paths for
-        generalized linear models via coordinate descent.  Journal of
-        Statistical Software 33(1), 1-22 Feb 2010.
-        """
-
-        k_exog = self.wexog.shape[1]
-
-        if np.isscalar(alpha):
-            alpha = alpha * np.ones(k_exog, dtype=np.float64)
-
-        # Below we work with RSS + penalty, so we need to rescale.
-        alpha *= 2 * self.wexog.shape[0]
-
-        if start_params is None:
-            params = np.zeros(k_exog, dtype=np.float64)
-        else:
-            params = start_params.copy()
-
-        converged = False
-        xxprod = 2*(self.wexog**2).sum(0)
-
-        # Coordinate descent
-        for itr in range(maxiter):
-
-            params_save = params.copy()
-            for k in range(self.wexog.shape[1]):
-
-                params[k] = 0.
-                wendog_adj = self.wendog - np.dot(self.wexog, params)
-                xyprod = 2*np.dot(self.wexog[:,k], wendog_adj)
-                den = xxprod[k] + alpha[k] * (1 - L1_wt)
-                a = alpha[k] * L1_wt
-                if a >= np.abs(xyprod):
-                    params[k] = 0.
-                elif xyprod > 0:
-                    params[k] = (xyprod - a) / den
-                else:
-                    params[k] = (xyprod + a) / den
-
-            # Check for convergence
-            pchange = np.max(np.abs(params - params_save))
-            if pchange < cnvrg_tol:
-                converged = True
-                break
-
-        # Set approximate zero coefficients to be exactly zero
-        params *= np.abs(params) >= zero_tol
-
-        # Fit the reduced model to get standard errors and other
-        # post-estimation results.
-        ii = np.flatnonzero(params)
-        cov = np.zeros((k_exog, k_exog), dtype=np.float64)
-        if len(ii) > 0:
-            model = self.__class__(self.wendog, self.wexog[:,ii])
-            rslt = model.fit()
-            cov[np.ix_(ii, ii)] = rslt.normalized_cov_params
-
-        lfit = RegressionResults(self, params,
-                                 normalized_cov_params=cov)
-        lfit.converged = converged
-        return RegressionResultsWrapper(lfit)
 
     def predict(self, params, exog=None):
         """
@@ -749,25 +632,37 @@ def __init__(self, endog, exog=None, missing='none', hasconst=None,
         if "weights" in self._init_keys:
             self._init_keys.remove("weights")
 
-    def loglike(self, params):
+    def loglike(self, params, scale=None):
         """
-        The likelihood function for the clasical OLS model.
+        The likelihood function for the OLS model.
 
         Parameters
         ----------
         params : array-like
             The coefficients with which to estimate the log-likelihood.
+        scale : float or None
+            If None, return the profile (concentrated) log likelihood
+            (profiled over the scale parameter), else return the
+            log-likelihood using the given scale value.
 
         Returns
         -------
-        The concentrated likelihood function evaluated at params.
+        The likelihood function evaluated at params.
         """
         nobs2 = self.nobs / 2.0
-        return -nobs2*np.log(2*np.pi)-nobs2*np.log(1/(2*nobs2) *\
-                np.dot(np.transpose(self.endog -
-                    np.dot(self.exog, params)),
-                    (self.endog - np.dot(self.exog,params)))) -\
-                    nobs2
+        nobs = float(self.nobs)
+        resid = self.endog - np.dot(self.exog, params)
+        if hasattr(self, 'offset'):
+            resid -= self.offset
+        ssr = np.sum(resid**2)
+        if scale is None:
+            # profile log likelihood
+            llf = -nobs2*np.log(2*np.pi) - nobs2*np.log(ssr / nobs) - nobs2
+        else:
+            # log-likelihood
+            llf = -nobs2 * np.log(2 * np.pi * scale) - ssr / (2*scale)
+        return llf
+
 
     def whiten(self, Y):
         """
@@ -775,6 +670,182 @@ def whiten(self, Y):
         """
         return Y
 
+
+    def score(self, params, scale=None):
+        """
+        Evaluate the score function at a given point.
+
+        The score corresponds to the profile (concentrated)
+        log-likelihood in which the scale parameter has been profiled
+        out.
+
+        Parameters
+        ----------
+        params : array-like
+            The parameter vector at which the score function is
+            computed.
+        scale : float or None
+            If None, return the profile (concentrated) log likelihood
+            (profiled over the scale parameter), else return the
+            log-likelihood using the given scale value.
+
+        Returns
+        -------
+        The score vector.
+        """
+
+        if not hasattr(self, "_wexog_xprod"):
+            self._setup_score_hess()
+
+        xtxb = np.dot(self._wexog_xprod, params)
+        sdr = -self._wexog_x_wendog + xtxb
+
+        if scale is None:
+            ssr = self._wendog_xprod - 2 * np.dot(self._wexog_x_wendog.T, params)
+            ssr += np.dot(params, xtxb)
+            return -self.nobs * sdr / ssr
+        else:
+            return -sdr / scale
+
+
+    def _setup_score_hess(self):
+        y = self.wendog
+        if hasattr(self, 'offset'):
+            y = y - self.offset
+        self._wendog_xprod = np.sum(y * y)
+        self._wexog_xprod = np.dot(self.wexog.T, self.wexog)
+        self._wexog_x_wendog = np.dot(self.wexog.T, y)
+
+
+    def hessian(self, params, scale=None):
+        """
+        Evaluate the Hessian function at a given point.
+
+        Parameters
+        ----------
+        params : array-like
+            The parameter vector at which the Hessian is computed.
+        scale : float or None
+            If None, return the profile (concentrated) log likelihood
+            (profiled over the scale parameter), else return the
+            log-likelihood using the given scale value.
+
+        Returns
+        -------
+        The Hessian matrix.
+        """
+
+        if not hasattr(self, "_wexog_xprod"):
+            self._setup_score_hess()
+
+        xtxb = np.dot(self._wexog_xprod, params)
+
+        if scale is None:
+            ssr = self._wendog_xprod - 2 * np.dot(self._wexog_x_wendog.T, params)
+            ssr += np.dot(params, xtxb)
+            ssrp = -2*self._wexog_x_wendog + 2*xtxb
+            hm = self._wexog_xprod / ssr - np.outer(ssrp, ssrp) / ssr**2
+            return -self.nobs * hm / 2
+        else:
+            return -self._wexog_xprod / scale
+
+        return hess
+
+
+    def fit_regularized(self, method="elastic_net", alpha=0.,
+                        start_params=None, profile_scale=False,
+                        **kwargs):
+        """
+        Return a regularized fit to a linear regression model.
+
+        Parameters
+        ----------
+        method : string
+            Only the 'elastic_net' approach is currently implemented.
+        alpha : scalar or array-like
+            The penalty weight.  If a scalar, the same penalty weight
+            applies to all variables in the model.  If a vector, it
+            must have the same length as `params`, and contains a
+            penalty weight for each coefficient.
+        start_params : array-like
+            Starting values for ``params``.
+        profile_scale : bool
+            If True the penalized fit is computed using the profile
+            (concentrated) log-likelihood for the Gaussian model.
+            Otherwise the fit uses the residual sum of squares.
+
+        Returns
+        -------
+        A RegressionResults object, of the same type returned by
+        ``fit``.
+
+        Notes
+        -----
+
+        The elastic net approach closely follows that implemented in
+        the glmnet package in R.  The penalty is a combination of L1
+        and L2 penalties.
+
+        The function that is minimized is: ..math::
+
+            0.5*RSS/n + alpha*((1-L1_wt)*|params|_2^2/2 + L1_wt*|params|_1)
+
+        where RSS is the usual regression sum of squares, n is the
+        sample size, and :math:`|*|_1` and :math:`|*|_2` are the L1 and L2
+        norms.
+
+        Post-estimation results are based on the same data used to
+        select variables, hence may be subject to overfitting biases.
+
+        The elastic_net method uses the following keyword arguments:
+
+        maxiter : int
+            Maximum number of iterations
+        L1_wt  : float
+            Must be in [0, 1].  The L1 penalty has weight L1_wt and the
+            L2 penalty has weight 1 - L1_wt.
+        cnvrg_tol : float
+            Convergence threshold for line searches
+        zero_tol : float
+            Coefficients below this threshold are treated as zero.
+
+        References
+        ----------
+        Friedman, Hastie, Tibshirani (2008).  Regularization paths for
+        generalized linear models via coordinate descent.  Journal of
+        Statistical Software 33(1), 1-22 Feb 2010.
+        """
+
+        from statsmodels.base import elastic_net
+
+        if method != "elastic_net":
+            raise ValueError("method for fit_regularied must be elastic_net")
+
+        defaults = {"maxiter" : 50, "L1_wt" : 1, "cnvrg_tol" : 1e-10,
+                    "zero_tol" : 1e-10}
+        for ky in defaults:
+            if ky not in kwargs:
+                kwargs[ky] = defaults[ky]
+
+        if profile_scale:
+            loglike_kwds = {}
+            score_kwds = {}
+            hess_kwds = {}
+        else:
+            loglike_kwds = {"scale": 1}
+            score_kwds = {"scale": 1}
+            hess_kwds = {"scale": 1}
+
+        return elastic_net._fit(self, method=method,
+                                alpha=alpha,
+                                start_params=start_params,
+                                return_object=True,
+                                loglike_kwds=loglike_kwds,
+                                score_kwds=score_kwds,
+                                hess_kwds=hess_kwds,
+                                **kwargs)
+
+
 class GLSAR(GLS):
     __doc__ = """
     A regression model with an AR(p) covariance structure.
diff --git a/statsmodels/regression/tests/lasso_r_results.R b/statsmodels/regression/tests/lasso_r_results.R
index 8d7e3dd6e79..e45286ea59a 100644
--- a/statsmodels/regression/tests/lasso_r_results.R
+++ b/statsmodels/regression/tests/lasso_r_results.R
@@ -24,8 +24,6 @@ for (n in c(100, 200, 300)) {
         for (alpha in c(0, 0.5, 1)) {
 
             fit = glmnet(exog, endog, intercept=FALSE, standardize=FALSE, alpha=alpha)
-            ii = length(fit$lambda) * c(0.3, 0.5, 0.7)
-            ii = round(ii)
 
             for (q in c(0.3, 0.5, 0.7)) {
                 ii = round(q * length(fit$lambda))
diff --git a/statsmodels/regression/tests/test_regression.py b/statsmodels/regression/tests/test_regression.py
index 6589d0890fe..99f628e5fff 100644
--- a/statsmodels/regression/tests/test_regression.py
+++ b/statsmodels/regression/tests/test_regression.py
@@ -1028,6 +1028,9 @@ def test_regularized(self):
             # Smoke test for summary
             smry = rslt.summary()
 
+            # Smoke test for profile likeihood
+            result = mod.fit_regularized(L1_wt=L1_wt, alpha=lam, profile_scale=True)
+
 
 def test_formula_missing_cat():
     # gh-805

From 3c3c4128345d4d59913edb8059b99996770e5833 Mon Sep 17 00:00:00 2001
From: Kerby Shedden <kshedden@umich.edu>
Date: Mon, 4 May 2015 23:24:08 -0400
Subject: [PATCH 02/27] Doc string edits

---
 statsmodels/base/elastic_net.py      | 30 +++++++++++++++++++---------
 statsmodels/genmod/tests/test_glm.py |  2 +-
 2 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/statsmodels/base/elastic_net.py b/statsmodels/base/elastic_net.py
index 1b3d340b018..e4fafa30df2 100644
--- a/statsmodels/base/elastic_net.py
+++ b/statsmodels/base/elastic_net.py
@@ -2,10 +2,16 @@
 import statsmodels.base.wrapper as wrap
 
 
-# All the negative smooth penalized log-likelihood functions.  These
-# functions only account for the likelihod and any smooth penalty.
-# The non-smooth penalty is not accounted for here.
 def _gen_npfuncs(k, L1_wt, alpha, loglike_kwds, score_kwds, hess_kwds):
+    """
+    Negative penalized log-likelihood functions.
+
+    Returns the negative penalized log-likelihood, its derivative, and
+    its Hessian.  The penalty only includes the smooth (L2) term.
+
+    All three functions have arguments (x, model), where ``x`` is a
+    point in the parmeter space and ``model`` is an arbitrary model.
+    """
 
     def nploglike(params, model):
         nobs = model.nobs
@@ -37,8 +43,9 @@ def _fit(model, method="coord_descent", maxiter=100, alpha=0.,
 
     Parameters
     ----------
-    model : ...
-        ...
+    model : model object
+        A statsmodels object implementing ``log-like``, ``score``, and
+        ``hessian``.
     method :
         Only the coordinate descent algorithm is implemented.
     maxiter : integer
@@ -64,10 +71,18 @@ def _fit(model, method="coord_descent", maxiter=100, alpha=0.,
         replaced with zero.
     return_object : bool
         If False, only the parameter estimates are returned.
+    loglike_kwds : dict-like or None
+        Keyword arguments for the log-likelihood function.
+    score_kwds : dict-like or None
+        Keyword arguments for the score function.
+    hess_kwds : dict-like or None
+        Keyword arguments for the Hessian function.
 
     Returns
     -------
-    A results object of the same type returned by `fit`.
+    If `return_object` is true, a results object of the same type
+    returned by `model.fit`, otherise returns the estimated parameter
+    vector.
 
     Notes
     -----
@@ -87,9 +102,6 @@ def _fit(model, method="coord_descent", maxiter=100, alpha=0.,
 
     then optimize the L1 penalized version of this function along
     a coordinate axis.
-
-    This is a generic implementation that may be reimplemented in
-    specific models for better performance.
     """
 
     k_exog = model.exog.shape[1]
diff --git a/statsmodels/genmod/tests/test_glm.py b/statsmodels/genmod/tests/test_glm.py
index f9b6b87398c..bb083755f52 100644
--- a/statsmodels/genmod/tests/test_glm.py
+++ b/statsmodels/genmod/tests/test_glm.py
@@ -625,7 +625,7 @@ def __init__(self):
 #class TestGlmNegbinomial_nbinom(CheckModelResultsMixin):
 #    pass
 
-#NOTE: hacked together version to test poisson offset
+
 class TestGlmPoissonOffset(CheckModelResultsMixin):
     @classmethod
     def setupClass(cls):

From 732227ca8c5c3167200ef09a1f8921c3a361bcbb Mon Sep 17 00:00:00 2001
From: Kerby Shedden <kshedden@umich.edu>
Date: Tue, 5 May 2015 00:06:07 -0400
Subject: [PATCH 03/27] Add tests

---
 statsmodels/duration/tests/test_phreg.py | 27 ++++++++++++++++++------
 statsmodels/genmod/tests/test_glm.py     | 14 ++++++++++++
 2 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/statsmodels/duration/tests/test_phreg.py b/statsmodels/duration/tests/test_phreg.py
index e27ebdefa56..1747ee800e7 100644
--- a/statsmodels/duration/tests/test_phreg.py
+++ b/statsmodels/duration/tests/test_phreg.py
@@ -329,6 +329,7 @@ def test_get_distribution(self):
         fitted_sd = dist.std()
         sample = dist.rvs()
 
+
     def test_fit_regularized(self):
 
         # Data set sizes
@@ -338,7 +339,7 @@ def test_fit_regularized(self):
             for js,s in enumerate([0,0.1]):
 
                 coef_name = "coef_%d_%d_%d" % (n, p, js)
-                coef = getattr(survival_enet_r_results, coef_name)
+                params = getattr(survival_enet_r_results, coef_name)
 
                 fname = "survival_data_%d_%d.csv" % (n, p)
                 time, status, entry, exog = self.load_file(fname)
@@ -346,16 +347,28 @@ def test_fit_regularized(self):
                 exog -= exog.mean(0)
                 exog /= exog.std(0, ddof=1)
 
-                mod = PHReg(time, exog, status=status, ties='breslow')
-                rslt = mod.fit_regularized(alpha=s)
+                model = PHReg(time, exog, status=status, ties='breslow')
+                sm_result = model.fit_regularized(alpha=s)
 
                 # The agreement isn't very high, the issue may be on
-                # the R side.  They seem to use some approximations
-                # that we are not using.
-                assert_allclose(rslt.params, coef, rtol=0.3)
+                # the R side.  See below for further checks.
+                assert_allclose(sm_result.params, params, rtol=0.3)
 
                 # Smoke test for summary
-                smry = rslt.summary()
+                smry = sm_result.summary()
+
+                # The penalized log-likelihood that we are maximizing.
+                def plf(params):
+                    llf = model.loglike(params) / len(time)
+                    L1_wt = 1
+                    llf = llf - s * ((1 - L1_wt)*np.sum(params**2) / 2 + L1_wt*np.sum(np.abs(params)))
+                    return llf
+
+                # Confirm that we are doing better than glmnet.
+                from numpy.testing import assert_equal
+                llf_r = plf(params)
+                llf_sm = plf(sm_result.params)
+                assert_equal(np.sign(llf_sm - llf_r), 1)
 
 
 if  __name__=="__main__":
diff --git a/statsmodels/genmod/tests/test_glm.py b/statsmodels/genmod/tests/test_glm.py
index bb083755f52..372ea4ce0fb 100644
--- a/statsmodels/genmod/tests/test_glm.py
+++ b/statsmodels/genmod/tests/test_glm.py
@@ -1651,8 +1651,22 @@ def test_regularized(self):
                 model = GLM(endog, exog, family=fam())
                 sm_result = model.fit_regularized(L1_wt=L1_wt, alpha=alpha)
 
+                # Agreement is OK, see below for further check
                 assert_allclose(params, sm_result.params, atol=1e-2, rtol=0.3)
 
+                # The penalized log-likelihood that we are maximizing.
+                def plf(params):
+                    llf = model.loglike(params) / len(endog)
+                    llf = llf - alpha * ((1 - L1_wt)*np.sum(params**2) / 2 + L1_wt*np.sum(np.abs(params)))
+                    return llf
+
+                # Confirm that we are doing better than glmnet.
+                from numpy.testing import assert_equal
+                llf_r = plf(params)
+                llf_sm = plf(sm_result.params)
+                assert_equal(np.sign(llf_sm - llf_r), 1)
+
+
 
 if __name__=="__main__":
     #run_module_suite()

From 95bee90b0312f24be3392fa3b43e07a4bba85f39 Mon Sep 17 00:00:00 2001
From: Kerby Shedden <kshedden@umich.edu>
Date: Tue, 5 May 2015 00:09:14 -0400
Subject: [PATCH 04/27] Add missing utility script for tests

---
 statsmodels/genmod/tests/glmnet_r_results.py  |  37 ++++
 .../genmod/tests/results/elastic_net.R        |  37 ++++
 .../results/elastic_net_generate_tests.py     |  24 +++
 .../genmod/tests/results/enet_binomial.csv    | 200 ++++++++++++++++++
 .../genmod/tests/results/enet_poisson.csv     | 200 ++++++++++++++++++
 5 files changed, 498 insertions(+)
 create mode 100644 statsmodels/genmod/tests/glmnet_r_results.py
 create mode 100644 statsmodels/genmod/tests/results/elastic_net.R
 create mode 100644 statsmodels/genmod/tests/results/elastic_net_generate_tests.py
 create mode 100644 statsmodels/genmod/tests/results/enet_binomial.csv
 create mode 100644 statsmodels/genmod/tests/results/enet_poisson.csv

diff --git a/statsmodels/genmod/tests/glmnet_r_results.py b/statsmodels/genmod/tests/glmnet_r_results.py
new file mode 100644
index 00000000000..3f33363ace4
--- /dev/null
+++ b/statsmodels/genmod/tests/glmnet_r_results.py
@@ -0,0 +1,37 @@
+import numpy as np
+
+rslt_binomial_0 = np.array([0,6.618737,0.004032037,0.01433665,0.01265635,0.006173346,0.01067706])
+
+rslt_binomial_1 = np.array([0,1.029661,0.02180239,0.07769613,0.06756466,0.03156418,0.05851878])
+
+rslt_binomial_2 = np.array([0,0.1601819,0.07111087,0.2544921,0.2110318,0.08577924,0.1984383])
+
+rslt_binomial_3 = np.array([0.5,0.05343991,0.004990061,0.2838563,0.2167881,0.02370156,0.2096612])
+
+rslt_binomial_4 = np.array([0.5,0.02313286,0.0708914,0.3791042,0.2938332,0.07506391,0.2982251])
+
+rslt_binomial_5 = np.array([0.5,0.009124078,0.106681,0.4327268,0.3362166,0.1019452,0.3479955])
+
+rslt_binomial_6 = np.array([1,0.02932512,0,0.3085764,0.2300801,0.01143652,0.2291531])
+
+rslt_binomial_7 = np.array([1,0.01269414,0.07022348,0.396642,0.3044255,0.07151663,0.31301])
+
+rslt_binomial_8 = np.array([1,0.005494992,0.1049623,0.4385186,0.3391729,0.09907393,0.3527401])
+
+rslt_poisson_0 = np.array([0,23.5349,0.009251658,0.003730997,0.01266164,0.003439135,0.0141719])
+
+rslt_poisson_1 = np.array([0,3.661269,0.04842557,0.02095708,0.06550316,0.02029514,0.07300782])
+
+rslt_poisson_2 = np.array([0,0.5695749,0.1440462,0.07208017,0.182649,0.07511376,0.2018242])
+
+rslt_poisson_3 = np.array([0.5,0.1577593,0.1247603,0.02857521,0.185693,0.03840622,0.2200925])
+
+rslt_poisson_4 = np.array([0.5,0.05669575,0.187629,0.08842012,0.2348627,0.09736964,0.2628845])
+
+rslt_poisson_5 = np.array([0.5,0.0185653,0.2118078,0.1121067,0.2534181,0.1204543,0.2784761])
+
+rslt_poisson_6 = np.array([1,0.07887965,0.1339927,0.0322772,0.1969884,0.0439019,0.2339252])
+
+rslt_poisson_7 = np.array([1,0.02834788,0.1927163,0.09160406,0.2398164,0.1010126,0.2682158])
+
+rslt_poisson_8 = np.array([1,0.0101877,0.2126847,0.1123439,0.2544153,0.1208601,0.2796794])
diff --git a/statsmodels/genmod/tests/results/elastic_net.R b/statsmodels/genmod/tests/results/elastic_net.R
new file mode 100644
index 00000000000..fd655177b12
--- /dev/null
+++ b/statsmodels/genmod/tests/results/elastic_net.R
@@ -0,0 +1,37 @@
+library(glmnet)
+library(R2nparray)
+
+rslt = list()
+
+for (dtype in c("binomial", "poisson")) {
+
+    ik = 0
+
+    data = read.csv(sprintf("enet_%s.csv", dtype))
+
+    endog = data[, 1]
+    exog = data[, 2:dim(data)[2]]
+    exog = as.matrix(exog)
+
+    for (k in 1:dim(exog)[2]) {
+        exog[,k] = exog[,k] - mean(exog[,k])
+        exog[,k] = exog[,k] / sd(exog[,k])
+    }
+
+    for (alpha in c(0, 0.5, 1)) {
+
+        fit = glmnet(exog, endog, family=dtype, intercept=FALSE,
+            standardize=FALSE, alpha=alpha)
+
+        for (q in c(0.3, 0.5, 0.7)) {
+            ii = round(q * length(fit$lambda))
+            coefs = coef(fit, s=fit$lambda[ii])
+            coefs = coefs[2:length(coefs)]
+            rname = sprintf("rslt_%s_%d", dtype, ik)
+            ik = ik + 1
+            rslt[[rname]] = c(alpha, fit$lambda[ii], coefs)
+        }
+    }
+}
+
+R2nparray(rslt, fname="../glmnet_r_results.py")
diff --git a/statsmodels/genmod/tests/results/elastic_net_generate_tests.py b/statsmodels/genmod/tests/results/elastic_net_generate_tests.py
new file mode 100644
index 00000000000..753803e798d
--- /dev/null
+++ b/statsmodels/genmod/tests/results/elastic_net_generate_tests.py
@@ -0,0 +1,24 @@
+import numpy as np
+
+"""
+Generate data sets for testing elastic net fits of GLMs.
+"""
+
+n = 200
+p = 5
+
+# Logistic
+exog = np.random.normal(size=(n, p))
+lin_pred = exog.sum(1) * 0.2
+exp_val = 1 / (1 + np.exp(-lin_pred))
+endog = 1 * (np.random.uniform(size=n) < exp_val)
+mat = np.concatenate((endog[:, None], exog), axis=1)
+np.savetxt("enet_binomial.csv", mat, fmt="%.2f", delimiter=",")
+
+# Poisson
+exog = np.random.normal(size=(n, p))
+lin_pred = exog.sum(1) * 0.2
+exp_val = np.exp(lin_pred)
+endog = np.random.poisson(exp_val)
+mat = np.concatenate((endog[:, None], exog), axis=1)
+np.savetxt("enet_poisson.csv", mat, fmt="%.2f", delimiter=",")
diff --git a/statsmodels/genmod/tests/results/enet_binomial.csv b/statsmodels/genmod/tests/results/enet_binomial.csv
new file mode 100644
index 00000000000..3884d20d460
--- /dev/null
+++ b/statsmodels/genmod/tests/results/enet_binomial.csv
@@ -0,0 +1,200 @@
+0.00,0.42,0.45,0.24,-0.28,0.68
+0.00,0.62,0.12,0.94,-1.49,-0.53
+1.00,-1.32,-0.69,-0.30,0.50,0.82
+0.00,0.25,-1.27,0.28,2.00,-0.61
+1.00,0.51,0.08,0.21,1.00,-1.36
+1.00,1.88,-0.63,-0.53,-2.08,0.16
+1.00,-0.67,1.09,1.43,0.26,0.73
+0.00,-1.61,-1.07,-1.37,-0.17,0.10
+0.00,1.08,1.04,0.78,-0.62,-2.36
+1.00,-1.32,-0.69,1.24,-1.05,0.29
+0.00,-0.26,1.46,-0.92,1.75,-1.52
+1.00,-0.67,0.63,0.18,1.80,0.70
+0.00,0.12,-0.91,-1.25,-1.49,1.57
+0.00,-1.31,-0.79,0.45,-0.30,-0.98
+1.00,-0.33,1.97,-0.99,-0.23,-1.76
+0.00,1.50,0.95,0.47,-1.14,-0.92
+0.00,1.21,0.22,-0.79,-0.54,0.23
+1.00,-0.12,0.38,0.34,-0.79,-0.66
+1.00,0.32,-0.02,-0.10,1.46,-1.05
+1.00,1.44,-0.47,1.08,0.05,-0.78
+0.00,0.10,-0.57,-0.09,-0.24,-0.19
+0.00,1.30,-0.99,-1.25,-0.36,-0.20
+0.00,-0.48,-0.64,0.56,0.48,1.40
+0.00,0.28,0.39,-2.01,-1.74,-0.47
+1.00,0.51,1.81,0.81,-0.86,0.68
+1.00,-3.04,0.21,-0.67,-0.09,0.56
+1.00,1.44,0.74,0.55,1.14,0.35
+1.00,2.15,-0.07,1.84,0.75,0.03
+1.00,-3.68,1.72,1.68,0.18,-0.23
+0.00,-0.93,0.01,-0.62,0.11,-0.59
+0.00,-0.04,1.09,0.31,0.84,1.33
+0.00,0.69,0.37,-1.40,-0.12,-1.20
+1.00,-0.66,0.48,0.68,-1.03,-1.74
+0.00,0.00,1.83,-0.07,0.35,-0.92
+0.00,1.32,-2.05,1.24,1.00,-0.39
+0.00,-1.17,-1.28,0.31,1.45,1.00
+0.00,-0.64,0.35,-0.86,1.26,-2.51
+1.00,-1.16,-1.45,-0.83,-0.23,1.59
+1.00,1.34,0.57,-0.38,0.14,-0.90
+0.00,-0.01,-0.96,1.03,-2.48,-1.42
+1.00,0.06,0.19,-0.76,-0.15,0.15
+0.00,-1.00,-1.96,-1.18,-0.75,0.74
+0.00,-1.27,0.15,-0.86,-0.20,-0.39
+1.00,-0.90,2.16,1.72,-0.69,-0.68
+1.00,-1.34,1.88,-0.47,-0.28,-1.31
+1.00,-1.13,-0.74,0.09,0.52,0.06
+1.00,1.03,0.17,-1.08,1.79,-0.47
+0.00,-0.16,-0.93,0.45,-1.58,-0.38
+1.00,-0.51,0.02,-0.84,-0.68,1.24
+1.00,0.58,-0.65,0.17,1.20,-0.09
+1.00,0.65,0.42,-0.14,1.14,-0.62
+1.00,0.58,1.51,-0.10,1.55,0.99
+0.00,1.15,-0.47,-0.85,0.63,0.04
+1.00,-1.32,-0.95,0.23,2.30,0.71
+0.00,0.37,-0.04,-1.43,-0.05,-0.10
+1.00,0.50,-0.28,0.69,2.27,1.33
+0.00,-1.05,-2.08,-1.63,0.57,-0.24
+1.00,0.70,0.40,0.35,0.20,-0.11
+1.00,-0.14,-1.21,-0.04,-0.52,0.21
+1.00,-1.38,-2.20,-0.43,0.44,-0.44
+1.00,-0.77,0.23,-0.12,0.11,1.24
+0.00,0.17,-1.04,-1.90,0.26,-0.77
+1.00,1.36,0.67,0.47,0.61,1.16
+1.00,-1.70,1.42,0.77,1.77,1.68
+0.00,-0.91,-0.25,-0.40,-0.24,1.15
+0.00,-1.43,-1.82,-1.24,-1.42,-0.74
+0.00,-0.62,-0.71,-3.27,0.86,0.73
+0.00,-0.66,-0.50,0.34,-0.41,-1.24
+0.00,-0.65,-2.60,0.80,0.64,0.10
+1.00,0.63,-0.29,1.92,0.50,0.51
+0.00,-0.27,0.12,-1.15,0.19,-1.93
+1.00,-1.27,-1.13,-1.23,1.63,1.30
+0.00,-0.68,0.47,-0.27,1.25,-0.29
+0.00,-1.88,-1.73,-0.32,0.67,-0.94
+1.00,0.73,0.62,-1.24,0.16,-0.24
+0.00,-0.54,-1.90,-0.45,1.56,1.27
+1.00,1.28,-0.12,-0.53,1.03,0.91
+0.00,-0.07,-0.11,-2.01,-0.34,-0.61
+1.00,-0.21,-0.02,1.21,-0.16,0.34
+1.00,1.10,-1.87,-0.51,-0.63,0.81
+1.00,2.25,-1.74,-0.88,-0.53,0.53
+1.00,-1.34,0.60,0.03,0.35,0.85
+0.00,-1.04,-0.84,-0.24,-1.17,-1.57
+0.00,1.88,-0.66,0.87,-0.46,0.17
+0.00,-2.24,-0.81,-0.36,1.00,0.12
+0.00,-1.28,0.77,-0.64,-1.20,-0.24
+1.00,1.31,-1.58,0.15,-0.83,0.91
+1.00,0.56,-0.89,-0.01,-0.05,1.59
+0.00,-0.17,-0.18,-0.20,0.87,-0.53
+0.00,0.54,0.58,0.41,1.58,0.77
+0.00,-0.53,-0.24,-0.49,-0.35,-0.04
+1.00,0.79,0.54,-0.27,0.28,-0.26
+0.00,-1.20,-1.04,-0.69,-1.32,0.76
+1.00,0.21,0.06,-0.37,0.87,-0.75
+0.00,0.70,1.24,0.75,-0.17,-0.78
+1.00,0.20,1.47,-0.70,-2.32,-0.76
+1.00,-0.20,-1.14,1.11,-2.40,-0.38
+0.00,0.43,0.35,0.43,-1.37,-0.40
+0.00,-0.27,0.64,0.54,-0.65,-0.48
+1.00,1.07,0.77,-0.44,-0.05,-0.41
+1.00,-0.51,-1.70,-0.46,-1.45,0.54
+0.00,-1.23,-0.34,-1.83,-1.21,-1.57
+0.00,0.11,-0.68,0.65,0.25,0.14
+0.00,-0.04,-0.81,1.11,0.41,0.47
+1.00,-0.90,0.57,-0.92,0.51,-0.58
+1.00,1.34,-1.06,0.33,-0.50,-0.22
+0.00,-0.91,-1.85,0.02,-1.67,-0.73
+0.00,-1.43,-0.42,1.75,-0.49,0.08
+1.00,0.75,0.14,-0.55,0.03,-0.82
+0.00,2.21,-0.48,-2.20,-1.09,1.67
+1.00,0.71,-1.50,-0.71,0.05,2.14
+1.00,-2.32,0.26,-0.69,-1.01,0.27
+1.00,-0.38,0.59,1.06,0.74,-0.03
+1.00,-0.99,-0.17,-0.08,-0.76,-0.11
+0.00,1.29,-1.86,0.33,0.16,-0.06
+0.00,0.58,-1.56,-0.97,0.42,1.93
+0.00,-0.96,-0.20,-1.60,-0.47,0.53
+0.00,0.82,0.12,-0.56,1.02,0.85
+0.00,-1.38,-0.13,0.94,0.40,0.70
+0.00,-0.58,-1.20,-1.64,0.03,-0.85
+0.00,-0.52,-0.76,0.44,1.26,0.55
+1.00,-1.28,0.61,-0.35,-1.13,-0.12
+1.00,-1.20,2.10,0.62,-1.67,-0.33
+0.00,-0.90,-0.69,-0.98,-1.63,0.84
+1.00,1.75,2.48,-0.04,0.00,0.25
+0.00,-0.48,1.86,-2.13,0.87,-0.20
+1.00,0.22,0.66,0.59,0.79,2.09
+1.00,0.52,1.85,-0.32,-0.78,-0.77
+0.00,1.00,-0.59,-1.02,1.88,0.60
+1.00,-1.33,0.05,-0.07,0.96,1.66
+0.00,-0.13,0.88,0.25,-0.20,1.22
+1.00,-0.55,-0.62,-0.81,0.16,0.75
+1.00,-0.60,1.66,-0.90,1.04,1.07
+1.00,2.19,0.83,0.03,-2.03,0.19
+1.00,1.27,-2.35,1.26,-0.56,-0.79
+1.00,1.20,-0.71,-0.37,-0.30,2.33
+0.00,0.17,-1.56,1.28,0.16,-0.06
+1.00,0.04,-0.52,1.09,-1.00,0.46
+1.00,0.75,-0.58,0.15,-1.85,-0.36
+1.00,0.68,1.69,1.54,0.60,0.47
+1.00,-0.38,0.66,1.25,0.18,0.59
+1.00,-0.28,1.11,1.86,0.37,-0.53
+0.00,1.25,-1.66,1.22,-0.69,-0.47
+0.00,-1.14,-0.42,0.88,0.64,0.41
+0.00,0.39,0.91,0.15,0.30,0.34
+0.00,0.96,-0.68,1.63,-0.30,0.01
+0.00,-0.39,-1.36,0.71,-1.62,0.65
+0.00,0.04,-0.98,0.63,0.61,0.33
+0.00,-0.27,1.83,-1.77,0.91,0.79
+0.00,-0.80,-0.01,-0.81,-0.73,0.51
+0.00,-0.86,-1.16,-0.98,-0.54,1.07
+1.00,0.12,-0.35,0.97,1.27,-0.65
+1.00,-0.50,-1.27,0.53,2.28,1.71
+1.00,0.22,-0.10,0.28,0.10,1.48
+0.00,-0.10,-0.56,0.34,-0.25,0.85
+0.00,0.05,-0.97,-2.22,0.72,-0.69
+0.00,-0.20,0.19,-1.82,1.34,-1.21
+1.00,-0.36,0.23,-0.33,0.13,0.21
+1.00,-0.39,-1.46,-0.82,0.27,0.57
+1.00,-0.12,-1.23,-1.97,0.81,-0.08
+0.00,0.54,1.61,-0.27,-0.97,-0.70
+0.00,-0.20,-0.82,0.61,-2.51,-0.53
+1.00,-0.23,0.68,1.84,2.21,0.30
+1.00,-0.80,0.63,-0.12,-1.27,-0.17
+1.00,0.81,-0.14,1.02,0.20,-0.16
+1.00,1.52,-1.64,-0.31,0.84,-0.41
+1.00,-0.14,0.63,0.39,-0.01,-0.17
+0.00,0.62,1.66,0.60,-1.59,-1.81
+0.00,-1.14,0.27,0.16,-0.94,-0.11
+1.00,-1.13,-0.45,1.02,-1.19,-0.45
+0.00,0.38,0.04,-0.26,0.71,0.42
+1.00,1.35,0.23,-1.55,-1.19,-2.59
+1.00,0.87,0.57,-0.51,-0.70,-0.29
+0.00,-0.60,0.06,-0.34,0.43,-0.34
+0.00,0.80,-0.07,1.16,-0.54,-2.22
+0.00,0.70,-1.24,-1.00,-1.43,-0.92
+1.00,-1.11,-0.92,-0.51,-0.93,-0.18
+1.00,-0.29,-0.69,0.41,0.84,1.10
+1.00,0.45,0.92,-0.83,1.22,0.64
+0.00,-0.83,-0.74,1.30,-0.79,0.96
+1.00,-0.41,0.24,-0.12,-0.96,-0.51
+0.00,0.52,-1.95,-0.44,1.06,-0.69
+1.00,-0.54,1.29,1.56,1.16,-1.01
+1.00,-0.39,-0.41,0.25,-0.22,0.45
+0.00,-0.97,0.90,-0.17,0.01,0.03
+1.00,-2.52,1.53,-3.83,2.25,-0.13
+1.00,0.42,-0.44,3.15,-1.38,-1.99
+1.00,1.38,-0.39,-0.54,-0.37,1.21
+1.00,-1.21,1.33,0.99,-1.83,0.80
+0.00,-0.39,-0.50,0.68,-0.39,-0.13
+0.00,-1.36,-0.00,0.35,-1.09,0.60
+0.00,-0.22,-0.02,0.92,0.35,0.07
+1.00,-0.58,-0.34,-1.03,-1.53,0.81
+1.00,-0.07,-1.50,-0.04,0.86,0.78
+0.00,-0.89,-1.18,-0.21,-1.15,0.40
+0.00,0.31,-0.97,-0.09,0.67,0.33
+0.00,1.15,0.43,-0.43,-0.54,-1.09
+1.00,-0.78,0.44,0.44,0.54,1.40
+1.00,-1.08,-0.68,0.26,-0.26,-2.14
+0.00,0.27,2.21,-0.58,-0.37,0.84
diff --git a/statsmodels/genmod/tests/results/enet_poisson.csv b/statsmodels/genmod/tests/results/enet_poisson.csv
new file mode 100644
index 00000000000..ae86b453acc
--- /dev/null
+++ b/statsmodels/genmod/tests/results/enet_poisson.csv
@@ -0,0 +1,200 @@
+0.00,-0.55,0.38,1.26,-1.33,-0.74
+1.00,0.74,0.89,-1.55,-0.26,-0.81
+1.00,0.71,0.64,2.57,-2.68,0.50
+0.00,-0.98,0.47,0.17,0.32,-2.49
+2.00,0.18,-0.52,-1.01,0.04,-0.87
+1.00,0.39,0.54,0.19,0.76,-0.91
+0.00,-0.35,0.30,-2.02,-0.16,-1.16
+0.00,-2.13,-0.60,-1.42,0.47,0.13
+1.00,0.04,-0.81,1.61,0.91,1.04
+1.00,-1.06,0.40,-1.11,1.76,-0.65
+0.00,-1.82,0.20,-1.87,-0.38,-1.54
+1.00,-0.11,-0.15,-0.99,2.31,-1.11
+1.00,-0.17,-0.37,-0.85,0.07,2.76
+1.00,1.28,-1.07,-0.26,0.62,0.96
+3.00,-0.59,1.92,1.04,-0.48,0.69
+1.00,0.42,0.05,1.47,-0.74,0.91
+3.00,0.76,1.23,1.53,-1.09,3.17
+0.00,0.23,0.28,0.60,1.13,0.05
+3.00,1.10,-1.38,0.27,0.82,0.47
+0.00,-0.87,-0.78,-0.47,1.55,-1.25
+1.00,0.22,-0.46,-0.10,-2.41,2.14
+1.00,-2.31,0.31,-1.17,-0.52,1.06
+1.00,-1.42,1.02,2.48,-0.11,1.07
+1.00,0.28,1.25,0.83,0.48,-1.04
+0.00,-0.48,0.24,-0.44,1.00,1.06
+3.00,-0.30,-0.92,0.96,0.33,2.13
+0.00,1.20,-0.39,0.03,-0.55,-1.21
+1.00,-0.10,-0.68,0.11,-1.40,-0.35
+1.00,-1.16,0.73,-0.53,0.14,0.08
+1.00,-1.74,-0.43,-0.84,0.35,-2.00
+2.00,0.48,0.33,0.46,0.34,-0.14
+2.00,-0.41,0.13,0.65,-0.76,1.13
+3.00,0.61,-0.48,1.36,1.34,0.68
+0.00,1.12,-2.13,-0.99,-1.13,-0.29
+2.00,-1.71,0.91,-0.63,0.48,-0.48
+1.00,-1.19,0.02,-0.21,0.51,0.67
+3.00,1.09,-0.63,0.82,-0.27,-1.37
+3.00,1.46,1.03,-0.84,-0.66,-0.38
+5.00,1.84,1.02,1.67,-0.97,0.69
+2.00,0.30,1.62,-1.31,2.28,-0.43
+0.00,0.50,-0.04,-0.99,0.24,-0.86
+0.00,-0.75,1.09,0.22,1.85,0.77
+1.00,1.36,-0.40,-0.52,0.15,-0.18
+2.00,-1.14,0.19,0.36,0.60,-2.80
+1.00,0.54,-1.31,-0.34,-1.65,1.17
+0.00,1.82,-0.44,-0.76,0.99,-0.24
+0.00,-0.19,-0.41,1.63,1.14,-0.50
+4.00,1.01,0.72,0.97,0.90,1.68
+1.00,-1.40,0.12,1.25,1.86,-0.14
+0.00,-0.36,0.64,0.23,-0.71,-2.03
+2.00,0.00,0.92,-0.16,0.08,0.47
+1.00,-0.38,-2.33,1.56,-0.86,-0.31
+0.00,-1.03,-1.43,0.72,0.97,-1.50
+4.00,0.97,1.11,-0.89,0.29,0.89
+0.00,0.66,-1.89,-1.69,1.10,-1.02
+1.00,0.24,0.47,-1.71,1.10,-1.43
+0.00,-0.14,-0.69,0.38,-0.50,-0.11
+2.00,0.02,0.88,-1.96,-0.12,0.94
+2.00,-0.38,-0.27,1.16,0.48,1.98
+1.00,0.22,1.57,-0.77,0.53,0.88
+4.00,0.13,0.49,1.35,-0.23,1.48
+2.00,0.41,-2.89,1.12,-0.00,1.79
+1.00,-1.47,1.51,0.11,0.41,0.68
+2.00,-0.58,1.42,0.43,0.62,1.34
+1.00,0.35,0.71,0.30,-2.30,1.33
+2.00,0.42,-0.55,2.37,0.76,0.70
+1.00,-0.65,0.19,-1.47,-0.08,-0.63
+1.00,-0.75,-1.08,-0.14,1.10,-1.14
+3.00,1.48,-1.65,0.97,-0.25,-0.48
+4.00,-0.42,-1.86,-1.33,-0.63,0.28
+2.00,1.61,-1.08,-0.18,-1.78,0.49
+2.00,-0.45,0.02,1.47,0.80,0.58
+1.00,-1.11,1.13,0.17,1.12,0.49
+1.00,1.61,0.16,1.27,-1.95,-0.00
+4.00,-0.67,0.76,1.21,2.99,-0.07
+0.00,-0.16,-0.75,-0.38,-0.71,-1.44
+2.00,-0.29,0.61,0.88,-0.11,0.31
+0.00,-0.14,0.03,-0.63,-1.06,-0.01
+0.00,0.68,0.53,-0.84,1.04,-1.03
+1.00,-0.96,1.24,0.57,-0.01,0.34
+2.00,-0.01,-0.11,0.19,0.46,0.23
+0.00,1.66,-1.70,-0.25,1.15,-0.84
+0.00,-1.43,0.61,-0.84,2.37,0.35
+0.00,-0.08,-1.17,1.60,-0.85,-2.67
+1.00,0.85,0.70,1.40,-0.46,-0.87
+0.00,-0.22,-0.88,-1.94,0.37,1.33
+2.00,-0.84,1.32,-1.02,-0.21,0.93
+3.00,-1.25,0.65,0.75,1.01,-1.05
+0.00,0.84,-1.04,0.08,-0.24,1.48
+0.00,0.32,-0.69,0.83,-0.29,-1.36
+0.00,0.72,-0.44,-1.80,-0.37,0.33
+0.00,-2.29,-0.09,0.28,-0.99,0.85
+2.00,1.01,-1.89,0.66,-0.11,-0.19
+1.00,0.07,-0.78,-0.65,0.87,1.86
+1.00,0.13,-1.87,0.38,-0.84,-0.38
+1.00,1.14,-0.59,1.22,2.11,1.43
+0.00,-1.29,1.29,0.87,-1.83,1.59
+0.00,-1.14,-2.02,0.17,0.08,-2.81
+1.00,-0.02,-1.71,2.18,0.66,1.10
+1.00,-0.69,-0.79,0.27,0.14,-1.79
+2.00,-0.22,-0.65,-0.09,1.15,-0.47
+0.00,-0.35,0.12,0.36,-0.00,1.00
+0.00,-0.47,1.60,-1.96,0.11,0.88
+2.00,0.29,0.33,0.73,-0.55,0.34
+0.00,-0.25,-1.42,0.76,-1.71,-0.60
+2.00,-0.48,0.83,0.19,-1.23,-0.61
+1.00,-2.01,0.45,-0.78,0.64,1.34
+1.00,-0.34,-2.13,1.11,0.30,-1.44
+0.00,-1.29,1.85,1.03,2.08,-0.67
+1.00,1.44,-0.45,-0.58,0.76,1.95
+1.00,-0.07,-0.15,0.19,2.36,0.60
+1.00,0.37,1.25,-0.67,1.64,-1.62
+4.00,0.51,-0.72,0.82,-0.02,1.77
+1.00,-0.26,-0.14,0.05,1.64,0.72
+2.00,1.88,0.38,-0.50,0.67,0.55
+2.00,-1.71,-1.24,0.44,1.06,1.13
+1.00,0.30,1.03,-0.50,0.88,0.78
+2.00,-0.15,1.82,-0.96,0.14,-0.31
+0.00,0.88,-0.33,-1.65,-1.40,-0.09
+1.00,0.97,-0.95,-0.94,-1.24,-0.31
+1.00,1.29,-1.08,0.01,0.39,0.49
+2.00,-0.22,0.27,1.00,0.47,0.25
+0.00,-0.11,-1.03,-1.13,-2.08,-0.15
+1.00,0.31,-1.72,0.23,1.15,0.74
+3.00,0.04,0.92,1.23,-0.51,-0.76
+0.00,-0.79,0.91,-1.62,-0.01,-0.79
+5.00,1.37,-0.86,2.21,0.87,0.23
+2.00,1.35,-0.51,-2.27,0.46,1.89
+0.00,1.17,-0.37,0.60,1.29,-0.44
+2.00,-0.46,-1.17,1.10,1.61,0.38
+0.00,0.82,0.66,-0.48,-0.52,0.36
+0.00,-1.04,-0.38,-2.27,-0.51,-1.08
+4.00,0.18,-0.20,0.48,1.22,1.19
+6.00,1.58,0.59,0.56,0.04,0.34
+2.00,1.99,0.31,0.77,0.23,-0.12
+1.00,-0.89,1.15,-0.97,0.29,0.60
+0.00,0.08,0.18,1.04,1.09,0.21
+1.00,0.13,-1.82,1.29,1.21,1.16
+1.00,1.60,0.76,-0.37,0.69,-1.22
+0.00,1.43,-0.60,-1.89,-1.00,0.78
+0.00,-0.37,-0.40,0.17,-0.64,0.31
+0.00,0.39,0.90,-0.97,-0.47,0.87
+2.00,0.98,-0.64,-1.86,0.68,0.60
+0.00,1.05,-0.40,0.24,-0.65,0.69
+2.00,1.28,-0.66,1.18,-0.64,0.90
+0.00,-0.03,-1.60,-0.76,-0.46,-0.57
+3.00,1.81,-0.33,-0.25,1.47,1.59
+3.00,0.94,-1.43,0.54,0.31,0.03
+1.00,-0.85,1.07,0.98,-2.15,-0.79
+3.00,1.60,-1.28,1.15,0.10,-0.33
+1.00,2.01,-0.29,1.21,0.50,-1.70
+1.00,-1.23,1.40,-1.12,-1.08,0.25
+0.00,0.01,2.06,0.57,-0.42,-0.56
+1.00,-0.45,0.75,1.28,-0.39,0.30
+1.00,-0.08,-1.03,0.64,-0.02,0.78
+0.00,1.13,0.08,0.16,0.73,-0.09
+2.00,-2.34,0.61,0.44,0.90,0.65
+0.00,-2.00,1.51,-1.33,1.11,-0.59
+0.00,-0.50,-0.01,0.67,-0.19,0.50
+1.00,-0.89,0.61,0.36,1.33,0.69
+0.00,0.92,1.10,-0.47,-1.66,0.64
+2.00,0.80,-0.24,-0.91,1.46,1.74
+2.00,0.69,-0.20,-0.91,-0.91,0.21
+0.00,-0.46,0.43,0.47,0.40,-0.58
+1.00,1.24,-0.76,-0.41,0.88,-0.27
+0.00,-0.15,-1.04,0.63,0.51,-2.06
+2.00,-1.55,-0.59,1.03,-0.31,1.49
+1.00,0.23,-0.45,0.60,0.22,-1.05
+2.00,-0.21,-1.30,1.60,-0.72,1.09
+2.00,0.35,-1.89,1.42,-1.77,-0.14
+0.00,-2.33,0.16,-1.60,-1.17,-0.16
+0.00,0.37,-1.10,0.43,1.19,-1.41
+4.00,-0.32,1.94,0.49,0.14,1.12
+1.00,-1.03,1.27,0.49,-0.55,0.07
+2.00,-0.83,0.66,1.13,0.16,-1.13
+0.00,-1.17,-0.10,-0.09,-0.09,-0.49
+0.00,-1.17,-0.07,-0.10,1.42,-1.77
+0.00,1.66,-1.07,-0.96,1.12,-1.98
+0.00,2.75,1.14,-0.05,0.58,0.14
+1.00,-1.06,0.66,-0.98,1.17,0.02
+0.00,-1.02,-0.36,0.06,0.67,-0.29
+2.00,0.28,0.73,-0.62,-0.56,1.98
+0.00,-0.17,-2.83,-1.42,-0.19,-0.77
+2.00,1.64,-0.91,-1.54,-2.04,1.41
+1.00,1.48,0.46,-0.46,-0.71,-1.07
+0.00,0.02,0.93,2.02,-0.84,1.44
+1.00,0.13,-0.27,1.43,0.22,-1.15
+1.00,0.26,-1.04,0.11,0.18,0.46
+2.00,0.51,-1.22,-0.03,0.11,0.86
+3.00,-1.07,-1.32,-0.11,-0.03,-0.11
+4.00,-1.67,1.23,0.01,-0.85,-0.45
+1.00,0.99,1.07,-0.37,0.38,0.95
+1.00,-1.77,-0.72,-0.54,-0.05,1.08
+0.00,-0.80,-0.03,-0.00,0.92,1.12
+2.00,-0.77,-0.08,-0.41,2.02,-0.62
+0.00,0.80,0.03,0.90,-1.62,-0.40
+0.00,1.06,1.06,-0.73,-0.11,0.04
+1.00,0.25,-0.36,-0.44,0.40,-0.13
+1.00,0.38,-0.47,-1.65,0.47,-0.13
+5.00,1.12,0.34,-0.87,1.43,0.07

From 3c6121ec852c7704094487c5b44ffe37736b6d0a Mon Sep 17 00:00:00 2001
From: Kerby Shedden <kshedden@umich.edu>
Date: Tue, 5 May 2015 08:22:03 -0400
Subject: [PATCH 05/27] Create accessor method with caching for mu

---
 statsmodels/base/elastic_net.py                |  4 ++++
 statsmodels/genmod/generalized_linear_model.py | 10 ++++++++--
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/statsmodels/base/elastic_net.py b/statsmodels/base/elastic_net.py
index e4fafa30df2..9bcc56d3a5d 100644
--- a/statsmodels/base/elastic_net.py
+++ b/statsmodels/base/elastic_net.py
@@ -1,6 +1,10 @@
 import numpy as np
 import statsmodels.base.wrapper as wrap
 
+"""
+Elastic net regularization.
+"""
+
 
 def _gen_npfuncs(k, L1_wt, alpha, loglike_kwds, score_kwds, hess_kwds):
     """
diff --git a/statsmodels/genmod/generalized_linear_model.py b/statsmodels/genmod/generalized_linear_model.py
index 5e9249111de..d81259b6d78 100644
--- a/statsmodels/genmod/generalized_linear_model.py
+++ b/statsmodels/genmod/generalized_linear_model.py
@@ -1308,13 +1308,19 @@ def pearson_chi2(self):
         chisqsum = np.sum(chisq)
         return chisqsum
 
+
     @cache_readonly
     def fittedvalues(self):
-        if not hasattr(self, "mu"):
-            self.mu = self.model.predict(self.params)
         return self.mu
 
 
+    @cache_readonly
+    def mu(self):
+        if not hasattr(self, "_mu"):
+            self._mu = self.model.predict(self.params)
+        return self._mu
+
+
     @cache_readonly
     def null(self):
         endog = self._endog

From e5e1f23df5720a01ec5d2ee8a475af07d2480b9b Mon Sep 17 00:00:00 2001
From: Kerby Shedden <kshedden@umich.edu>
Date: Tue, 5 May 2015 18:30:48 -0400
Subject: [PATCH 06/27] Fix cache wiping for pickle

---
 statsmodels/base/model.py                     | 19 +++++++++++++++----
 .../genmod/generalized_linear_model.py        |  9 ++++-----
 2 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/statsmodels/base/model.py b/statsmodels/base/model.py
index 84be7f0ea20..be1036f847d 100644
--- a/statsmodels/base/model.py
+++ b/statsmodels/base/model.py
@@ -1705,10 +1705,20 @@ def remove_data(self):
         Not fully tested for time series models, tsa, and might delete too much
         for prediction or not all that would be possible.
 
-        The list of arrays to delete is maintained as an attribute of the
-        result and model instance, except for cached values. These lists could
-        be changed before calling remove_data.
+        The lists of arrays to delete are maintained as attributes of
+        the result and model instance, except for cached values. These
+        lists could be changed before calling remove_data.
 
+        The attributes to remove are named in:
+
+        model._data_attr : arrays attached to both the model instance
+            and the results instance with the same attribute name.
+
+        result.data_in_cache : arrays that may exist as values in
+            result._cache (TODO : should privatize name)
+
+        result._data_attr_model : arrays attached to the model
+            instance but not to the results instance
         '''
         def wipe(obj, att):
             #get to last element in attribute path
@@ -1725,8 +1735,9 @@ def wipe(obj, att):
             except AttributeError:
                 pass
 
+        model_only = ['model.' + i for i in getattr(self, "_data_attr_model", [])]
         model_attr = ['model.' + i for i in self.model._data_attr]
-        for att in self._data_attr + model_attr:
+        for att in self._data_attr + model_attr + model_only:
             #print('removing', att)
             wipe(self, att)
 
diff --git a/statsmodels/genmod/generalized_linear_model.py b/statsmodels/genmod/generalized_linear_model.py
index d81259b6d78..81400ed99b5 100644
--- a/statsmodels/genmod/generalized_linear_model.py
+++ b/statsmodels/genmod/generalized_linear_model.py
@@ -1141,7 +1141,6 @@ def fit_constrained(self, constraints, start_params=None, **fit_kwds):
         res._results.results_constrained = res_constr
         # TODO: the next is not the best. history should bin in results
         res._results.model.history = res_constr.model.history
-        res._results.mu = res_constr.mu
         return res
 
 
@@ -1257,7 +1256,9 @@ def __init__(self, model, params, normalized_cov_params, scale,
         # for remove data and pickle without large arrays
         self._data_attr.extend(['results_constrained', '_freq_weights'])
         self.data_in_cache = getattr(self, 'data_in_cache', [])
-        self.data_in_cache.extend(['null'])
+        self.data_in_cache.extend(['null', 'mu'])
+        self._data_attr_model = getattr(self, '_data_attr_model', [])
+        self._data_attr_model.append('mu')
 
         # robust covariance
         from statsmodels.base.covtype import get_robustcov_results
@@ -1316,9 +1317,7 @@ def fittedvalues(self):
 
     @cache_readonly
     def mu(self):
-        if not hasattr(self, "_mu"):
-            self._mu = self.model.predict(self.params)
-        return self._mu
+        return self.model.predict(self.params)
 
 
     @cache_readonly

From ee366033a4031e4cf4beaf93ac78494c7fb918b4 Mon Sep 17 00:00:00 2001
From: Kerby Shedden <kshedden@umich.edu>
Date: Tue, 5 May 2015 22:10:47 -0400
Subject: [PATCH 07/27] Remove dict comprehension for py2.6 compatibility

---
 statsmodels/base/elastic_net.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/statsmodels/base/elastic_net.py b/statsmodels/base/elastic_net.py
index 9bcc56d3a5d..62029b835a1 100644
--- a/statsmodels/base/elastic_net.py
+++ b/statsmodels/base/elastic_net.py
@@ -128,8 +128,8 @@ def _fit(model, method="coord_descent", maxiter=100, alpha=0.,
     btol = 1e-8
     params_zero = np.zeros(len(params), dtype=bool)
 
-    init_args = {k : getattr(model, k) for k in model._init_keys
-                 if k != "offset" and hasattr(model, k)}
+    init_args = dict([(k, getattr(model, k)) for k in model._init_keys
+                      if k != "offset" and hasattr(model, k)])
 
     fgh_list = [_gen_npfuncs(k, L1_wt, alpha, loglike_kwds, score_kwds, hess_kwds)
                 for k in range(k_exog)]

From 6cba1c99c33e2b6c723583a7abcadc3945821ff2 Mon Sep 17 00:00:00 2001
From: Kerby Shedden <kshedden@umich.edu>
Date: Tue, 5 May 2015 23:48:51 -0400
Subject: [PATCH 08/27] Mostly docstring and renaming work.

---
 statsmodels/base/elastic_net.py               | 49 +++++++++++++------
 statsmodels/duration/hazard_regression.py     | 10 ++--
 .../genmod/generalized_linear_model.py        | 12 ++---
 statsmodels/regression/linear_model.py        | 21 +++++---
 4 files changed, 59 insertions(+), 33 deletions(-)

diff --git a/statsmodels/base/elastic_net.py b/statsmodels/base/elastic_net.py
index 62029b835a1..a8de772b9a5 100644
--- a/statsmodels/base/elastic_net.py
+++ b/statsmodels/base/elastic_net.py
@@ -3,6 +3,20 @@
 
 """
 Elastic net regularization.
+
+Routines for fitting regression models using elastic net
+regularization.  The elastic net minimizes the objective function
+
+-llf / nobs + alpha((1 - L1_wt) * sum(params**2) / 2 + L1_wt * sum(abs(params)))
+
+The algorithm implemented here closely follows the implementation in
+the R glmnet package, documented here:
+
+http://cran.r-project.org/web/packages/glmnet/index.html
+
+and here:
+
+http://www.jstatsoft.org/v33/i01/paper
 """
 
 
@@ -13,21 +27,22 @@ def _gen_npfuncs(k, L1_wt, alpha, loglike_kwds, score_kwds, hess_kwds):
     Returns the negative penalized log-likelihood, its derivative, and
     its Hessian.  The penalty only includes the smooth (L2) term.
 
-    All three functions have arguments (x, model), where ``x`` is a
-    point in the parmeter space and ``model`` is an arbitrary model.
+    All three functions have argument signature (x, model), where
+    ``x`` is a point in the parameter space and ``model`` is an
+    arbitrary statsmodels regression model.
     """
 
     def nploglike(params, model):
         nobs = model.nobs
-        pen = alpha[k] * (1 - L1_wt) * np.sum(params**2) / 2
+        pen_llf = alpha[k] * (1 - L1_wt) * np.sum(params**2) / 2
         llf = model.loglike(np.r_[params], **loglike_kwds)
-        return - llf / nobs + pen
+        return - llf / nobs + pen_llf
 
     def npscore(params, model):
         nobs = model.nobs
-        l2_grad = alpha[k] * (1 - L1_wt) * params
+        pen_grad = alpha[k] * (1 - L1_wt) * params
         gr = -model.score(np.r_[params], **score_kwds)[0] / nobs
-        return gr + l2_grad
+        return gr + pen_grad
 
     def nphess(params, model):
         nobs = model.nobs
@@ -38,12 +53,12 @@ def nphess(params, model):
 
 
 
-def _fit(model, method="coord_descent", maxiter=100, alpha=0.,
+def fit(model, method="coord_descent", maxiter=100, alpha=0.,
          L1_wt=1., start_params=None, cnvrg_tol=1e-7, zero_tol=1e-8,
          return_object=False, loglike_kwds=None, score_kwds=None,
-         hess_kwds=None, **kwargs):
+         hess_kwds=None):
     """
-    Return a regularized fit to a regression model.
+    Return an elastic net regularized fit to a regression model.
 
     Parameters
     ----------
@@ -104,8 +119,8 @@ def _fit(model, method="coord_descent", maxiter=100, alpha=0.,
 
     -loglike/n + alpha*(1-L1_wt)*|params|_2^2/2
 
-    then optimize the L1 penalized version of this function along
-    a coordinate axis.
+    then repeatedly optimize the L1 penalized version of this function
+    along coordinate axes.
     """
 
     k_exog = model.exog.shape[1]
@@ -142,6 +157,7 @@ def _fit(model, method="coord_descent", maxiter=100, alpha=0.,
 
             # Under the active set method, if a parameter becomes
             # zero we don't try to change it again.
+            # TODO : give the user the option to switch this off
             if params_zero[k]:
                 continue
 
@@ -158,6 +174,7 @@ def _fit(model, method="coord_descent", maxiter=100, alpha=0.,
             model_1var = model.__class__(model.endog, model.exog[:, k], offset=offset,
                                          **init_args)
 
+            # Do the one-dimensional optimization.
             func, grad, hess = fgh_list[k]
             params[k] = _opt_1d(func, grad, hess, model_1var, params[k], alpha[k]*L1_wt, tol=btol)
 
@@ -182,15 +199,18 @@ def _fit(model, method="coord_descent", maxiter=100, alpha=0.,
     # post-estimation results.
     ii = np.flatnonzero(params)
     cov = np.zeros((k_exog, k_exog))
+    init_args = dict([(k, getattr(model, k)) for k in model._init_keys])
     if len(ii) > 0:
         model1 = model.__class__(model.endog, model.exog[:, ii],
-                               **kwargs)
+                               **init_args)
         rslt = model1.fit()
         cov[np.ix_(ii, ii)] = rslt.normalized_cov_params
     else:
-        model1 = model.__class__(model.endog, model.exog[:, 0], **kwargs)
+        # Hack: no variables were selected but we need to run fit in
+        # order to get the correct results class.  So just fit a model
+        # with one variable.
+        model1 = model.__class__(model.endog, model.exog[:, 0], **init_args)
         rslt = model1.fit()
-        cov[np.ix_(ii, ii)] = rslt.normalized_cov_params
 
     # fit may return a results or a results wrapper
     if issubclass(rslt.__class__, wrap.ResultsWrapper):
@@ -204,6 +224,7 @@ def _fit(model, method="coord_descent", maxiter=100, alpha=0.,
     else:
         scale = 1.
 
+    # Assuming a standard signature for creating results classes.
     refit = klass(model, params, cov, scale=scale)
     refit.regularized = True
 
diff --git a/statsmodels/duration/hazard_regression.py b/statsmodels/duration/hazard_regression.py
index de3312ced62..0e41f6113e2 100644
--- a/statsmodels/duration/hazard_regression.py
+++ b/statsmodels/duration/hazard_regression.py
@@ -505,11 +505,11 @@ def fit_regularized(self, method="elastic_net", alpha=0.,
             if ky not in kwargs:
                 kwargs[ky] = defaults[ky]
 
-        return elastic_net._fit(self, method=method,
-                                alpha=alpha,
-                                start_params=start_params,
-                                return_object=True,
-                                **kwargs)
+        return elastic_net.fit(self, method=method,
+                               alpha=alpha,
+                               start_params=start_params,
+                               return_object=True,
+                               **kwargs)
 
 
     def loglike(self, params):
diff --git a/statsmodels/genmod/generalized_linear_model.py b/statsmodels/genmod/generalized_linear_model.py
index 81400ed99b5..59c9e334c38 100644
--- a/statsmodels/genmod/generalized_linear_model.py
+++ b/statsmodels/genmod/generalized_linear_model.py
@@ -258,9 +258,9 @@ def __init__(self, endog, exog, family=None, offset=None,
                            self.freq_weights)
         if offset is None:
             delattr(self, 'offset')
+            self._init_keys.remove('offset')
         if exposure is None:
             delattr(self, 'exposure')
-
         self.nobs = self.endog.shape[0]
 
         #things to remove_data
@@ -1073,11 +1073,11 @@ def fit_regularized(self, method="elastic_net", alpha=0.,
             if ky not in kwargs:
                 kwargs[ky] = defaults[ky]
 
-        result = elastic_net._fit(self, method=method,
-                                  alpha=alpha,
-                                  start_params=start_params,
-                                  return_object=True,
-                                  **kwargs)
+        result = elastic_net.fit(self, method=method,
+                                 alpha=alpha,
+                                 start_params=start_params,
+                                 return_object=True,
+                                 **kwargs)
 
         self.mu = self.predict(result.params)
         self.scale = self.estimate_scale(self.mu)
diff --git a/statsmodels/regression/linear_model.py b/statsmodels/regression/linear_model.py
index d5ac6db56bb..6cde82ae38c 100644
--- a/statsmodels/regression/linear_model.py
+++ b/statsmodels/regression/linear_model.py
@@ -818,15 +818,20 @@ def fit_regularized(self, method="elastic_net", alpha=0.,
 
         from statsmodels.base import elastic_net
 
+        # In the future we could add support for other penalties, e.g. SCAD.
         if method != "elastic_net":
             raise ValueError("method for fit_regularied must be elastic_net")
 
+        # Set default parameters.
         defaults = {"maxiter" : 50, "L1_wt" : 1, "cnvrg_tol" : 1e-10,
                     "zero_tol" : 1e-10}
         for ky in defaults:
             if ky not in kwargs:
                 kwargs[ky] = defaults[ky]
 
+        # If a scale parameter is passed in, the non-profile
+        # likelihood (residual sum of squares divided by -2) is used,
+        # otherwise the profile likelihood is used.
         if profile_scale:
             loglike_kwds = {}
             score_kwds = {}
@@ -836,14 +841,14 @@ def fit_regularized(self, method="elastic_net", alpha=0.,
             score_kwds = {"scale": 1}
             hess_kwds = {"scale": 1}
 
-        return elastic_net._fit(self, method=method,
-                                alpha=alpha,
-                                start_params=start_params,
-                                return_object=True,
-                                loglike_kwds=loglike_kwds,
-                                score_kwds=score_kwds,
-                                hess_kwds=hess_kwds,
-                                **kwargs)
+        return elastic_net.fit(self, method=method,
+                               alpha=alpha,
+                               start_params=start_params,
+                               return_object=True,
+                               loglike_kwds=loglike_kwds,
+                               score_kwds=score_kwds,
+                               hess_kwds=hess_kwds,
+                               **kwargs)
 
 
 class GLSAR(GLS):

From ab1724f53e645427f571b5a64d026c39343d34ba Mon Sep 17 00:00:00 2001
From: Kerby Shedden <kshedden@umich.edu>
Date: Wed, 6 May 2015 00:13:20 -0400
Subject: [PATCH 09/27] Minor change to support generic results creation

---
 statsmodels/base/_constraints.py | 5 ++++-
 statsmodels/base/elastic_net.py  | 9 ++++++---
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/statsmodels/base/_constraints.py b/statsmodels/base/_constraints.py
index 054c8e3f4ce..f3ac6bc7eaf 100644
--- a/statsmodels/base/_constraints.py
+++ b/statsmodels/base/_constraints.py
@@ -250,7 +250,10 @@ def fit_constrained(model, constraint_matrix, constraint_values,
     #need copy, because we don't want to change it, we don't need deepcopy
     import copy
     init_kwds = copy.copy(self._get_init_kwds())
-    del init_kwds['offset']  # TODO: refactor to combine with above or offset_all
+
+    # TODO: refactor to combine with above or offset_all
+    if 'offset' in init_kwds:
+        del init_kwds['offset']
 
     # using offset as keywords is not supported in all modules
     mod_constr = self.__class__(endog, exogp_st, offset=offset, **init_kwds)
diff --git a/statsmodels/base/elastic_net.py b/statsmodels/base/elastic_net.py
index a8de772b9a5..cf2ec717a84 100644
--- a/statsmodels/base/elastic_net.py
+++ b/statsmodels/base/elastic_net.py
@@ -17,6 +17,9 @@
 and here:
 
 http://www.jstatsoft.org/v33/i01/paper
+
+This routine should work for any regression model that implements
+loglike, score, and hess.
 """
 
 
@@ -255,9 +258,9 @@ def _opt_1d(func, grad, hess, model, start, L1_wt, tol):
 
     Notes
     -----
-    ``func``, ``grad``, and ``hess`` have arguments (x, model), where
-    ``x`` is a point in the parameter space and ``model`` is the model
-    being fit.
+    ``func``, ``grad``, and ``hess`` have argument signature (x,
+    model), where ``x`` is a point in the parameter space and
+    ``model`` is the model being fit.
 
     If the log-likelihood for the model is exactly quadratic, the
     global minimum is returned in one step.  Otherwise numerical

From 613461321d3910f444dab671fbfdb18d0efa3fcf Mon Sep 17 00:00:00 2001
From: Kerby Shedden <kshedden@umich.edu>
Date: Fri, 8 May 2015 22:38:51 -0400
Subject: [PATCH 10/27] Don't attach things to model that aren't needed

---
 statsmodels/base/elastic_net.py                | 4 ++--
 statsmodels/genmod/generalized_linear_model.py | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/statsmodels/base/elastic_net.py b/statsmodels/base/elastic_net.py
index cf2ec717a84..9afb50d00e9 100644
--- a/statsmodels/base/elastic_net.py
+++ b/statsmodels/base/elastic_net.py
@@ -66,7 +66,7 @@ def fit(model, method="coord_descent", maxiter=100, alpha=0.,
     Parameters
     ----------
     model : model object
-        A statsmodels object implementing ``log-like``, ``score``, and
+        A statsmodels object implementing ``loglike``, ``score``, and
         ``hessian``.
     method :
         Only the coordinate descent algorithm is implemented.
@@ -213,7 +213,7 @@ def fit(model, method="coord_descent", maxiter=100, alpha=0.,
         # order to get the correct results class.  So just fit a model
         # with one variable.
         model1 = model.__class__(model.endog, model.exog[:, 0], **init_args)
-        rslt = model1.fit()
+        rslt = model1.fit(maxiter=0)
 
     # fit may return a results or a results wrapper
     if issubclass(rslt.__class__, wrap.ResultsWrapper):
diff --git a/statsmodels/genmod/generalized_linear_model.py b/statsmodels/genmod/generalized_linear_model.py
index 59c9e334c38..b3653b53342 100644
--- a/statsmodels/genmod/generalized_linear_model.py
+++ b/statsmodels/genmod/generalized_linear_model.py
@@ -928,12 +928,12 @@ def _fit_gradient(self, start_params=None, method="newton",
                                     maxiter=maxiter, full_output=full_output,
                                     method=method, disp=disp, **kwargs)
 
-        self.mu = self.predict(rslt.params)
-        self.scale = self.estimate_scale(self.mu)
+        mu = self.predict(rslt.params)
+        scale = self.estimate_scale(mu)
 
         glm_results = GLMResults(self, rslt.params,
-                                 rslt.normalized_cov_params / self.scale,
-                                 self.scale,
+                                 rslt.normalized_cov_params / scale,
+                                 scale,
                                  cov_type=cov_type, cov_kwds=cov_kwds,
                                  use_t=use_t)
 

From 44f13cc1022deaa5ca52dcef9e40b7619566d8e9 Mon Sep 17 00:00:00 2001
From: Kerby Shedden <kshedden@umich.edu>
Date: Tue, 1 Sep 2015 03:38:16 -0400
Subject: [PATCH 11/27] Rename return_object as refit and make behavior more
 consistent

---
 statsmodels/duration/hazard_regression.py      | 14 ++++++++++----
 statsmodels/genmod/generalized_linear_model.py | 17 +++++++++++++----
 statsmodels/regression/linear_model.py         | 15 ++++++++++-----
 3 files changed, 33 insertions(+), 13 deletions(-)

diff --git a/statsmodels/duration/hazard_regression.py b/statsmodels/duration/hazard_regression.py
index 0e41f6113e2..a351d482cd2 100644
--- a/statsmodels/duration/hazard_regression.py
+++ b/statsmodels/duration/hazard_regression.py
@@ -447,7 +447,7 @@ def fit(self, groups=None, **args):
 
 
     def fit_regularized(self, method="elastic_net", alpha=0.,
-                        start_params=None, **kwargs):
+                        start_params=None, refit=True, **kwargs):
         """
         Return a regularized fit to a linear regression model.
 
@@ -462,10 +462,17 @@ def fit_regularized(self, method="elastic_net", alpha=0.,
             penalty weight for each coefficient.
         start_params : array-like
             Starting values for `params`.
+        refit : bool
+            If True, the model is refit using only the variables that
+            have non-zero coefficients in the regularized fit and
+            returns a results object.  The refitted model is not
+            regularized.  If False, only the array of coefficients
+            from the regularized fit are returned.
+
 
         Returns
         -------
-        A PHregResults object, of the same type returned by `fit`.
+        An array of coefficient estimates, or a PHregResults object.
 
         Notes
         -----
@@ -500,7 +507,7 @@ def fit_regularized(self, method="elastic_net", alpha=0.,
             raise ValueError("method for fit_regularied must be elastic_net")
 
         defaults = {"maxiter" : 50, "L1_wt" : 1, "cnvrg_tol" : 1e-10,
-                    "zero_tol" : 1e-10}
+                    "zero_tol" : 1e-10, "refit" : True}
         for ky in defaults:
             if ky not in kwargs:
                 kwargs[ky] = defaults[ky]
@@ -508,7 +515,6 @@ def fit_regularized(self, method="elastic_net", alpha=0.,
         return elastic_net.fit(self, method=method,
                                alpha=alpha,
                                start_params=start_params,
-                               return_object=True,
                                **kwargs)
 
 
diff --git a/statsmodels/genmod/generalized_linear_model.py b/statsmodels/genmod/generalized_linear_model.py
index b3653b53342..742ea68eb26 100644
--- a/statsmodels/genmod/generalized_linear_model.py
+++ b/statsmodels/genmod/generalized_linear_model.py
@@ -1015,7 +1015,7 @@ def _fit_irls(self, start_params=None, maxiter=100, tol=1e-8,
 
 
     def fit_regularized(self, method="elastic_net", alpha=0.,
-                        start_params=None, **kwargs):
+                        start_params=None, refit=True, **kwargs):
         """
         Return a regularized fit to a linear regression model.
 
@@ -1030,10 +1030,16 @@ def fit_regularized(self, method="elastic_net", alpha=0.,
             penalty weight for each coefficient.
         start_params : array-like
             Starting values for `params`.
+        refit : bool
+            If True, the model is refit using only the variables that
+            have non-zero coefficients in the regularized fit and
+            returns a results object.  The refitted model is not
+            regularized.  If False, only the array of coefficients
+            from the regularized fit are returned.
 
         Returns
         -------
-        A GLMResults object, of the same type returned by `fit`.
+        An array, or a GLMResults object of the same type returned by `fit`.
 
         Notes
         -----
@@ -1068,7 +1074,7 @@ def fit_regularized(self, method="elastic_net", alpha=0.,
             raise ValueError("method for fit_regularied must be elastic_net")
 
         defaults = {"maxiter" : 50, "L1_wt" : 1, "cnvrg_tol" : 1e-10,
-                    "zero_tol" : 1e-10}
+                    "zero_tol" : 1e-10, "refit" : refit}
         for ky in defaults:
             if ky not in kwargs:
                 kwargs[ky] = defaults[ky]
@@ -1076,9 +1082,12 @@ def fit_regularized(self, method="elastic_net", alpha=0.,
         result = elastic_net.fit(self, method=method,
                                  alpha=alpha,
                                  start_params=start_params,
-                                 return_object=True,
                                  **kwargs)
 
+        if not refit:
+            import pandas as pd
+            return pd.Series(index=self.exog_names, data=result)
+
         self.mu = self.predict(result.params)
         self.scale = self.estimate_scale(self.mu)
 
diff --git a/statsmodels/regression/linear_model.py b/statsmodels/regression/linear_model.py
index 6cde82ae38c..c1d33783101 100644
--- a/statsmodels/regression/linear_model.py
+++ b/statsmodels/regression/linear_model.py
@@ -754,7 +754,7 @@ def hessian(self, params, scale=None):
 
     def fit_regularized(self, method="elastic_net", alpha=0.,
                         start_params=None, profile_scale=False,
-                        **kwargs):
+                        refit=True, **kwargs):
         """
         Return a regularized fit to a linear regression model.
 
@@ -773,11 +773,17 @@ def fit_regularized(self, method="elastic_net", alpha=0.,
             If True the penalized fit is computed using the profile
             (concentrated) log-likelihood for the Gaussian model.
             Otherwise the fit uses the residual sum of squares.
+        refit : bool
+            If True, the model is refit using only the variables that
+            have non-zero coefficients in the regularized fit and
+            returns a results object.  The refitted model is not
+            regularized.  If False, only the array of coefficients
+            from the regularized fit are returned.
 
         Returns
         -------
-        A RegressionResults object, of the same type returned by
-        ``fit``.
+        An array of coefficients, or a RegressionResults object of the
+        same type returned by ``fit``.
 
         Notes
         -----
@@ -824,7 +830,7 @@ def fit_regularized(self, method="elastic_net", alpha=0.,
 
         # Set default parameters.
         defaults = {"maxiter" : 50, "L1_wt" : 1, "cnvrg_tol" : 1e-10,
-                    "zero_tol" : 1e-10}
+                    "zero_tol" : 1e-10, "refit" : True}
         for ky in defaults:
             if ky not in kwargs:
                 kwargs[ky] = defaults[ky]
@@ -844,7 +850,6 @@ def fit_regularized(self, method="elastic_net", alpha=0.,
         return elastic_net.fit(self, method=method,
                                alpha=alpha,
                                start_params=start_params,
-                               return_object=True,
                                loglike_kwds=loglike_kwds,
                                score_kwds=score_kwds,
                                hess_kwds=hess_kwds,

From 9ddb8406c5d865f58696ceb26063b50b7a66596e Mon Sep 17 00:00:00 2001
From: Kerby Shedden <kshedden@umich.edu>
Date: Tue, 1 Sep 2015 03:39:35 -0400
Subject: [PATCH 12/27] Rename return_object to refit

---
 statsmodels/base/elastic_net.py | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/statsmodels/base/elastic_net.py b/statsmodels/base/elastic_net.py
index 9afb50d00e9..6522cc0cd3f 100644
--- a/statsmodels/base/elastic_net.py
+++ b/statsmodels/base/elastic_net.py
@@ -58,7 +58,7 @@ def nphess(params, model):
 
 def fit(model, method="coord_descent", maxiter=100, alpha=0.,
          L1_wt=1., start_params=None, cnvrg_tol=1e-7, zero_tol=1e-8,
-         return_object=False, loglike_kwds=None, score_kwds=None,
+         refit=False, loglike_kwds=None, score_kwds=None,
          hess_kwds=None):
     """
     Return an elastic net regularized fit to a regression model.
@@ -91,8 +91,12 @@ def fit(model, method="coord_descent", maxiter=100, alpha=0.,
     zero_tol : scalar
         Any estimated coefficient smaller than this value is
         replaced with zero.
-    return_object : bool
-        If False, only the parameter estimates are returned.
+    refit : bool
+        If True, the model is refit using only the variables that have
+        non-zero coefficients in the regularized fit and returns a
+        results object.  The refitted model is not regularized.  If
+        False, only the array of coefficients from the regularized fit
+        is returned.
     loglike_kwds : dict-like or None
         Keyword arguments for the log-likelihood function.
     score_kwds : dict-like or None
@@ -102,9 +106,8 @@ def fit(model, method="coord_descent", maxiter=100, alpha=0.,
 
     Returns
     -------
-    If `return_object` is true, a results object of the same type
-    returned by `model.fit`, otherise returns the estimated parameter
-    vector.
+    If `refit` is true, a results object of the same type returned by
+    `model.fit`, otherise returns the estimated parameter vector.
 
     Notes
     -----
@@ -195,14 +198,14 @@ def fit(model, method="coord_descent", maxiter=100, alpha=0.,
     # Set approximate zero coefficients to be exactly zero
     params *= np.abs(params) >= zero_tol
 
-    if not return_object:
+    if not refit:
         return params
 
     # Fit the reduced model to get standard errors and other
     # post-estimation results.
     ii = np.flatnonzero(params)
     cov = np.zeros((k_exog, k_exog))
-    init_args = dict([(k, getattr(model, k)) for k in model._init_keys])
+    init_args = dict([(k, getattr(model, k, None)) for k in model._init_keys])
     if len(ii) > 0:
         model1 = model.__class__(model.endog, model.exog[:, ii],
                                **init_args)
@@ -230,6 +233,8 @@ def fit(model, method="coord_descent", maxiter=100, alpha=0.,
     # Assuming a standard signature for creating results classes.
     refit = klass(model, params, cov, scale=scale)
     refit.regularized = True
+    refit.method = method
+    refit.fit_history = {'iteration' : itr + 1}
 
     return refit
 

From 054df32dd47a767799079189f6cece0e611ecd21 Mon Sep 17 00:00:00 2001
From: Kerby Shedden <kshedden@umich.edu>
Date: Tue, 1 Sep 2015 05:41:54 -0400
Subject: [PATCH 13/27] Return results even with no refit

---
 statsmodels/base/elastic_net.py               | 12 +++++------
 statsmodels/base/model.py                     |  4 ++++
 statsmodels/duration/hazard_regression.py     | 19 +++++++----------
 .../genmod/generalized_linear_model.py        | 21 +++++++------------
 statsmodels/regression/linear_model.py        | 17 +++++++--------
 .../regression/tests/test_regression.py       |  1 -
 6 files changed, 31 insertions(+), 43 deletions(-)

diff --git a/statsmodels/base/elastic_net.py b/statsmodels/base/elastic_net.py
index 6522cc0cd3f..b99557672b5 100644
--- a/statsmodels/base/elastic_net.py
+++ b/statsmodels/base/elastic_net.py
@@ -1,5 +1,6 @@
 import numpy as np
 import statsmodels.base.wrapper as wrap
+from statsmodels.base.model import Results
 
 """
 Elastic net regularization.
@@ -93,10 +94,8 @@ def fit(model, method="coord_descent", maxiter=100, alpha=0.,
         replaced with zero.
     refit : bool
         If True, the model is refit using only the variables that have
-        non-zero coefficients in the regularized fit and returns a
-        results object.  The refitted model is not regularized.  If
-        False, only the array of coefficients from the regularized fit
-        is returned.
+        non-zero coefficients in the regularized fit.  The refitted
+        model is not regularized.
     loglike_kwds : dict-like or None
         Keyword arguments for the log-likelihood function.
     score_kwds : dict-like or None
@@ -106,8 +105,7 @@ def fit(model, method="coord_descent", maxiter=100, alpha=0.,
 
     Returns
     -------
-    If `refit` is true, a results object of the same type returned by
-    `model.fit`, otherise returns the estimated parameter vector.
+    A results object.
 
     Notes
     -----
@@ -199,7 +197,7 @@ def fit(model, method="coord_descent", maxiter=100, alpha=0.,
     params *= np.abs(params) >= zero_tol
 
     if not refit:
-        return params
+        return Results(model, params)
 
     # Fit the reduced model to get standard errors and other
     # post-estimation results.
diff --git a/statsmodels/base/model.py b/statsmodels/base/model.py
index be1036f847d..f3b3fb887e5 100644
--- a/statsmodels/base/model.py
+++ b/statsmodels/base/model.py
@@ -770,6 +770,10 @@ def predict(self, exog=None, transform=True, *args, **kwargs):
             return predict_results
 
 
+    def summary(self):
+        pass
+
+
 #TODO: public method?
 class LikelihoodModelResults(Results):
     """
diff --git a/statsmodels/duration/hazard_regression.py b/statsmodels/duration/hazard_regression.py
index a351d482cd2..3c073b3763c 100644
--- a/statsmodels/duration/hazard_regression.py
+++ b/statsmodels/duration/hazard_regression.py
@@ -447,7 +447,7 @@ def fit(self, groups=None, **args):
 
 
     def fit_regularized(self, method="elastic_net", alpha=0.,
-                        start_params=None, refit=True, **kwargs):
+                        start_params=None, refit=False, **kwargs):
         """
         Return a regularized fit to a linear regression model.
 
@@ -464,15 +464,13 @@ def fit_regularized(self, method="elastic_net", alpha=0.,
             Starting values for `params`.
         refit : bool
             If True, the model is refit using only the variables that
-            have non-zero coefficients in the regularized fit and
-            returns a results object.  The refitted model is not
-            regularized.  If False, only the array of coefficients
-            from the regularized fit are returned.
+            have non-zero coefficients in the regularized fit.  The
+            refitted model is not regularized.
 
 
         Returns
         -------
-        An array of coefficient estimates, or a PHregResults object.
+        A results object.
 
         Notes
         -----
@@ -507,15 +505,14 @@ def fit_regularized(self, method="elastic_net", alpha=0.,
             raise ValueError("method for fit_regularied must be elastic_net")
 
         defaults = {"maxiter" : 50, "L1_wt" : 1, "cnvrg_tol" : 1e-10,
-                    "zero_tol" : 1e-10, "refit" : True}
-        for ky in defaults:
-            if ky not in kwargs:
-                kwargs[ky] = defaults[ky]
+                    "zero_tol" : 1e-10}
+        defaults.update(kwargs)
 
         return elastic_net.fit(self, method=method,
                                alpha=alpha,
                                start_params=start_params,
-                               **kwargs)
+                               refit=refit,
+                               **defaults)
 
 
     def loglike(self, params):
diff --git a/statsmodels/genmod/generalized_linear_model.py b/statsmodels/genmod/generalized_linear_model.py
index 742ea68eb26..5aad0890d8c 100644
--- a/statsmodels/genmod/generalized_linear_model.py
+++ b/statsmodels/genmod/generalized_linear_model.py
@@ -1015,7 +1015,7 @@ def _fit_irls(self, start_params=None, maxiter=100, tol=1e-8,
 
 
     def fit_regularized(self, method="elastic_net", alpha=0.,
-                        start_params=None, refit=True, **kwargs):
+                        start_params=None, refit=False, **kwargs):
         """
         Return a regularized fit to a linear regression model.
 
@@ -1032,10 +1032,8 @@ def fit_regularized(self, method="elastic_net", alpha=0.,
             Starting values for `params`.
         refit : bool
             If True, the model is refit using only the variables that
-            have non-zero coefficients in the regularized fit and
-            returns a results object.  The refitted model is not
-            regularized.  If False, only the array of coefficients
-            from the regularized fit are returned.
+            have non-zero coefficients in the regularized fit.  The
+            refitted model is not regularized.
 
         Returns
         -------
@@ -1074,19 +1072,14 @@ def fit_regularized(self, method="elastic_net", alpha=0.,
             raise ValueError("method for fit_regularied must be elastic_net")
 
         defaults = {"maxiter" : 50, "L1_wt" : 1, "cnvrg_tol" : 1e-10,
-                    "zero_tol" : 1e-10, "refit" : refit}
-        for ky in defaults:
-            if ky not in kwargs:
-                kwargs[ky] = defaults[ky]
+                    "zero_tol" : 1e-10}
+        defaults.update(kwargs)
 
         result = elastic_net.fit(self, method=method,
                                  alpha=alpha,
                                  start_params=start_params,
-                                 **kwargs)
-
-        if not refit:
-            import pandas as pd
-            return pd.Series(index=self.exog_names, data=result)
+                                 refit=refit,
+                                 **defaults)
 
         self.mu = self.predict(result.params)
         self.scale = self.estimate_scale(self.mu)
diff --git a/statsmodels/regression/linear_model.py b/statsmodels/regression/linear_model.py
index c1d33783101..2ba9f8009c5 100644
--- a/statsmodels/regression/linear_model.py
+++ b/statsmodels/regression/linear_model.py
@@ -754,7 +754,7 @@ def hessian(self, params, scale=None):
 
     def fit_regularized(self, method="elastic_net", alpha=0.,
                         start_params=None, profile_scale=False,
-                        refit=True, **kwargs):
+                        refit=False, **kwargs):
         """
         Return a regularized fit to a linear regression model.
 
@@ -775,10 +775,8 @@ def fit_regularized(self, method="elastic_net", alpha=0.,
             Otherwise the fit uses the residual sum of squares.
         refit : bool
             If True, the model is refit using only the variables that
-            have non-zero coefficients in the regularized fit and
-            returns a results object.  The refitted model is not
-            regularized.  If False, only the array of coefficients
-            from the regularized fit are returned.
+            have non-zero coefficients in the regularized fit.  The
+            refitted model is not regularized.
 
         Returns
         -------
@@ -830,10 +828,8 @@ def fit_regularized(self, method="elastic_net", alpha=0.,
 
         # Set default parameters.
         defaults = {"maxiter" : 50, "L1_wt" : 1, "cnvrg_tol" : 1e-10,
-                    "zero_tol" : 1e-10, "refit" : True}
-        for ky in defaults:
-            if ky not in kwargs:
-                kwargs[ky] = defaults[ky]
+                    "zero_tol" : 1e-10}
+        defaults.update(kwargs)
 
         # If a scale parameter is passed in, the non-profile
         # likelihood (residual sum of squares divided by -2) is used,
@@ -853,7 +849,8 @@ def fit_regularized(self, method="elastic_net", alpha=0.,
                                loglike_kwds=loglike_kwds,
                                score_kwds=score_kwds,
                                hess_kwds=hess_kwds,
-                               **kwargs)
+                               refit=refit,
+                               **defaults)
 
 
 class GLSAR(GLS):
diff --git a/statsmodels/regression/tests/test_regression.py b/statsmodels/regression/tests/test_regression.py
index 99f628e5fff..441e93787c7 100644
--- a/statsmodels/regression/tests/test_regression.py
+++ b/statsmodels/regression/tests/test_regression.py
@@ -989,7 +989,6 @@ def test_empty_model(self):
        result = model.fit_regularized(alpha=1000)
 
        assert_equal(result.params, 0.)
-       assert_equal(result.bse, 0.)
 
 
     def test_regularized(self):

From ae6f8957075513bb3cd9a992a9a2bb771545c037 Mon Sep 17 00:00:00 2001
From: Kerby Shedden <kshedden@umich.edu>
Date: Tue, 1 Sep 2015 06:06:11 -0400
Subject: [PATCH 14/27] Add RegularizedResults class and wrapper

---
 statsmodels/base/elastic_net.py | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/statsmodels/base/elastic_net.py b/statsmodels/base/elastic_net.py
index b99557672b5..36a983bd3f4 100644
--- a/statsmodels/base/elastic_net.py
+++ b/statsmodels/base/elastic_net.py
@@ -1,6 +1,7 @@
 import numpy as np
 import statsmodels.base.wrapper as wrap
 from statsmodels.base.model import Results
+import statsmodels.base.wrapper as wrap
 
 """
 Elastic net regularization.
@@ -197,7 +198,8 @@ def fit(model, method="coord_descent", maxiter=100, alpha=0.,
     params *= np.abs(params) >= zero_tol
 
     if not refit:
-        return Results(model, params)
+        results = RegularizedResults(model, params)
+        return RegularizedResultsWrapper(results)
 
     # Fit the reduced model to get standard errors and other
     # post-estimation results.
@@ -296,3 +298,24 @@ def _opt_1d(func, grad, hess, model, start, L1_wt, tol):
 
     x_opt = brent(func, args=(model,), brack=(x-1, x+1), tol=tol)
     return x_opt
+
+
+class RegularizedResults(Results):
+
+    def __init__(self, model, params):
+        super(RegularizedResults, self).__init__(model, params)
+
+
+class RegularizedResultsWrapper(wrap.ResultsWrapper):
+    _attrs = {
+        'params': 'columns',
+        'resid': 'rows',
+        'fittedvalues': 'rows',
+    }
+
+    _wrap_attrs = _attrs
+    _wrap_methods = {
+    }
+
+wrap.populate_wrapper(RegularizedResultsWrapper,
+                      RegularizedResults)

From 949a02d6935e87b2b86c729ce65a8f8b946c9a96 Mon Sep 17 00:00:00 2001
From: Kerby Shedden <kshedden@umich.edu>
Date: Tue, 1 Sep 2015 09:16:24 -0400
Subject: [PATCH 15/27] Fix wrapper issue

---
 statsmodels/base/elastic_net.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/statsmodels/base/elastic_net.py b/statsmodels/base/elastic_net.py
index 36a983bd3f4..8f0d7435914 100644
--- a/statsmodels/base/elastic_net.py
+++ b/statsmodels/base/elastic_net.py
@@ -2,6 +2,7 @@
 import statsmodels.base.wrapper as wrap
 from statsmodels.base.model import Results
 import statsmodels.base.wrapper as wrap
+from statsmodels.tools.decorators import cache_readonly
 
 """
 Elastic net regularization.
@@ -291,6 +292,8 @@ def _opt_1d(func, grad, hess, model, start, L1_wt, tol):
         h = (L1_wt - b) / c
     elif d < 0:
         h = -(L1_wt + b) / c
+    else:
+        return np.nan
 
     f1 = func(x + h, model) + L1_wt*np.abs(x + h)
     if f1 <= f + L1_wt*np.abs(x) + 1e-10:
@@ -305,6 +308,10 @@ class RegularizedResults(Results):
     def __init__(self, model, params):
         super(RegularizedResults, self).__init__(model, params)
 
+    @cache_readonly
+    def fittedvalues(self):
+        return self.model.predict(self.params)
+
 
 class RegularizedResultsWrapper(wrap.ResultsWrapper):
     _attrs = {
@@ -314,8 +321,6 @@ class RegularizedResultsWrapper(wrap.ResultsWrapper):
     }
 
     _wrap_attrs = _attrs
-    _wrap_methods = {
-    }
 
 wrap.populate_wrapper(RegularizedResultsWrapper,
                       RegularizedResults)

From 941e2101c3860fc1b3c4fb3691dd592fcdc835a9 Mon Sep 17 00:00:00 2001
From: Kerby Shedden <kshedden@umich.edu>
Date: Fri, 29 Jan 2016 16:17:36 -0500
Subject: [PATCH 16/27] remove unused code

---
 statsmodels/duration/hazard_regression.py | 50 -----------------------
 1 file changed, 50 deletions(-)

diff --git a/statsmodels/duration/hazard_regression.py b/statsmodels/duration/hazard_regression.py
index 3c073b3763c..91ce1100a28 100644
--- a/statsmodels/duration/hazard_regression.py
+++ b/statsmodels/duration/hazard_regression.py
@@ -1725,53 +1725,3 @@ def std(self):
         """
 
         return np.sqrt(self.var())
-
-
-def _opt_1d(funcs, start, L1_wt, tol):
-    """
-    Optimize a L1-penalized smooth one-dimensional function of a
-    single variable.
-
-    Parameters
-    ----------
-    funcs : tuple of functions
-        funcs[0] is the objective function to be minimized.  funcs[1]
-        and funcs[2] are, respectively, the first and second
-        derivatives of the smooth part of funcs[0] (i.e. excluding the
-        L1 penalty).
-    start : real
-        A starting value for the function argument
-    L1_wt : non-negative real
-        The weight for the L1 penalty function.
-    tol : non-negative real
-        A convergence threshold.
-
-    Returns
-    -------
-    The argmin of the objective function.
-    """
-
-    # TODO: can we detect failures without calling funcs[0] twice?
-
-    x = start
-    f = funcs[0](x)
-
-    b = funcs[1](x)
-    c = funcs[2](x)
-    d = b - c*x
-
-    if L1_wt > np.abs(d):
-        return 0.
-    elif d >= 0:
-        x += (L1_wt - b) / c
-    elif d < 0:
-        x -= (L1_wt + b) / c
-
-    f1 = funcs[0](x)
-
-    # This is an expensive fall-back if the quadratic
-    # approximation is poor and sends us far off-course.
-    if f1 > f + 1e-10:
-        return brent(funcs[0], brack=(x-0.2, x+0.2), tol=tol)
-
-    return x

From b837ef2b26aab96f4b35f8074044794334ae480f Mon Sep 17 00:00:00 2001
From: Kerby Shedden <kshedden@umich.edu>
Date: Fri, 29 Jan 2016 23:11:37 -0500
Subject: [PATCH 17/27] Improve test coverage

---
 statsmodels/compat/numpy.py               | 163 ++++++++++++++++++++++
 statsmodels/duration/hazard_regression.py |  12 +-
 2 files changed, 167 insertions(+), 8 deletions(-)

diff --git a/statsmodels/compat/numpy.py b/statsmodels/compat/numpy.py
index ac69b9e446b..285066fd111 100644
--- a/statsmodels/compat/numpy.py
+++ b/statsmodels/compat/numpy.py
@@ -44,7 +44,45 @@
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
+np_new_unique
+-------------
+Optionally provides the count of the number of occurences of each
+unique element.
+
+Copied from Numpy source, under license:
+
+Copyright (c) 2005-2015, NumPy Developers.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+  notice, this list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above
+  copyright notice, this list of conditions and the following
+  disclaimer in the documentation and/or other materials provided
+  with the distribution.
+
+* Neither the name of the NumPy Developers nor the names of any
+  contributors may be used to endorse or promote products derived
+  from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 """
+
 from __future__ import absolute_import
 from .scipy import NumpyVersion
 import numpy as np
@@ -287,3 +325,128 @@ def nanmean(a, axis=None):
 
         return avg
 
+
+if NumpyVersion(np.__version__) >= '1.9.0':
+    np_new_unique = np.unique
+else:
+    def np_new_unique(ar, return_index=False, return_inverse=False, return_counts=False):
+        """
+        Find the unique elements of an array.
+
+        Returns the sorted unique elements of an array. There are three optional
+        outputs in addition to the unique elements: the indices of the input array
+        that give the unique values, the indices of the unique array that
+        reconstruct the input array, and the number of times each unique value
+        comes up in the input array.
+
+        Parameters
+        ----------
+        ar : array_like
+            Input array. This will be flattened if it is not already 1-D.
+        return_index : bool, optional
+            If True, also return the indices of `ar` that result in the unique
+            array.
+        return_inverse : bool, optional
+            If True, also return the indices of the unique array that can be used
+            to reconstruct `ar`.
+        return_counts : bool, optional
+            If True, also return the number of times each unique value comes up
+            in `ar`.
+
+            .. versionadded:: 1.9.0
+
+        Returns
+        -------
+        unique : ndarray
+            The sorted unique values.
+        unique_indices : ndarray, optional
+            The indices of the first occurrences of the unique values in the
+            (flattened) original array. Only provided if `return_index` is True.
+        unique_inverse : ndarray, optional
+            The indices to reconstruct the (flattened) original array from the
+            unique array. Only provided if `return_inverse` is True.
+        unique_counts : ndarray, optional
+            The number of times each of the unique values comes up in the
+            original array. Only provided if `return_counts` is True.
+
+            .. versionadded:: 1.9.0
+
+        See Also
+        --------
+        numpy.lib.arraysetops : Module with a number of other functions for
+                                performing set operations on arrays.
+
+        Examples
+        --------
+        >>> np.unique([1, 1, 2, 2, 3, 3])
+        array([1, 2, 3])
+        >>> a = np.array([[1, 1], [2, 3]])
+        >>> np.unique(a)
+        array([1, 2, 3])
+
+        Return the indices of the original array that give the unique values:
+
+        >>> a = np.array(['a', 'b', 'b', 'c', 'a'])
+        >>> u, indices = np.unique(a, return_index=True)
+        >>> u
+        array(['a', 'b', 'c'],
+               dtype='|S1')
+        >>> indices
+        array([0, 1, 3])
+        >>> a[indices]
+        array(['a', 'b', 'c'],
+               dtype='|S1')
+
+        Reconstruct the input array from the unique values:
+
+        >>> a = np.array([1, 2, 6, 4, 2, 3, 2])
+        >>> u, indices = np.unique(a, return_inverse=True)
+        >>> u
+        array([1, 2, 3, 4, 6])
+        >>> indices
+        array([0, 1, 4, 3, 1, 2, 1])
+        >>> u[indices]
+        array([1, 2, 6, 4, 2, 3, 2])
+
+        """
+        ar = np.asanyarray(ar).flatten()
+
+        optional_indices = return_index or return_inverse
+        optional_returns = optional_indices or return_counts
+
+        if ar.size == 0:
+            if not optional_returns:
+                ret = ar
+            else:
+                ret = (ar,)
+                if return_index:
+                    ret += (np.empty(0, np.bool),)
+                if return_inverse:
+                    ret += (np.empty(0, np.bool),)
+                if return_counts:
+                    ret += (np.empty(0, np.intp),)
+            return ret
+
+        if optional_indices:
+            perm = ar.argsort(kind='mergesort' if return_index else 'quicksort')
+            aux = ar[perm]
+        else:
+            ar.sort()
+            aux = ar
+        flag = np.concatenate(([True], aux[1:] != aux[:-1]))
+
+        if not optional_returns:
+            ret = aux[flag]
+        else:
+            ret = (aux[flag],)
+            if return_index:
+                ret += (perm[flag],)
+            if return_inverse:
+                iflag = np.cumsum(flag) - 1
+                inv_idx = np.empty(ar.shape, dtype=np.intp)
+                inv_idx[perm] = iflag
+                ret += (inv_idx,)
+            if return_counts:
+                idx = np.concatenate(np.nonzero(flag) + ([ar.size],))
+                ret += (np.diff(idx),)
+        return ret
diff --git a/statsmodels/duration/hazard_regression.py b/statsmodels/duration/hazard_regression.py
index 91ce1100a28..0e67bba3822 100644
--- a/statsmodels/duration/hazard_regression.py
+++ b/statsmodels/duration/hazard_regression.py
@@ -4,6 +4,7 @@
 from statsmodels.tools.decorators import cache_readonly
 from scipy.optimize import brent
 from statsmodels.compat.numpy import np_matrix_rank
+from statsmodels.compat.numpy import np_new_unique
 
 """
 Implementation of proportional hazards regression models for duration
@@ -1432,14 +1433,9 @@ def _group_stats(self, groups):
         """
         Descriptive statistics of the groups.
         """
-        # better handled with np.unique(..., return_counts=True)
-        gsize = {}
-        for x in groups:
-            if x not in gsize:
-                gsize[x] = 0
-            gsize[x] += 1
-        gsize = np.asarray(list(gsize.values()))
-        return gsize.min(), gsize.max(), gsize.mean(), len(gsize)
+        gsizes = np_new_unique(groups, return_counts=True)
+        gsizes = gsizes[1]
+        return gsizes.min(), gsizes.max(), gsizes.mean()
 
     @cache_readonly
     def weighted_covariate_averages(self):

From 60c40f3c6dbe45a6a6757de4255a93a1a9230f71 Mon Sep 17 00:00:00 2001
From: Kerby Shedden <kshedden@umich.edu>
Date: Fri, 29 Jan 2016 23:49:50 -0500
Subject: [PATCH 18/27] varfunc derivatives should be analytic

---
 statsmodels/genmod/families/varfuncs.py | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/statsmodels/genmod/families/varfuncs.py b/statsmodels/genmod/families/varfuncs.py
index af66d8c3215..1349fc7a1ea 100644
--- a/statsmodels/genmod/families/varfuncs.py
+++ b/statsmodels/genmod/families/varfuncs.py
@@ -50,9 +50,7 @@ def deriv(self, mu):
         """
         Derivative of the variance function v'(mu)
         """
-        from statsmodels.tools.numdiff import approx_fprime_cs
-        # TODO: diag workaround proplem with numdiff for 1d
-        return np.diag(approx_fprime_cs(mu, self))
+        return np.zeros_like(mu)
 
 
 constant = VarianceFunction()
@@ -111,11 +109,21 @@ def __call__(self, mu):
     def deriv(self, mu):
         """
         Derivative of the variance function v'(mu)
+
+        May be undefined at zero.
         """
-        from statsmodels.tools.numdiff import approx_fprime_cs, approx_fprime
+
+        der = self.power * np.fabs(mu) ** (self.power - 1)
+        ii = np.flatnonzero(mu < 0)
+        der[ii] *= -1
+
+        #from statsmodels.tools.numdiff import approx_fprime_cs, approx_fprime
         #return approx_fprime_cs(mu, self)  # TODO fix breaks in `fabs
         # TODO: diag is workaround problem with numdiff for 1d
-        return np.diag(approx_fprime(mu, self))
+        #dd = np.diag(approx_fprime(mu, self))
+
+        return der
+
 
 
 mu = Power()
@@ -201,9 +209,7 @@ def deriv(self, mu):
         """
         Derivative of the variance function v'(mu)
         """
-        from statsmodels.tools.numdiff import approx_fprime_cs, approx_fprime
-        # TODO: diag workaround proplem with numdiff for 1d
-        return np.diag(approx_fprime_cs(mu, self))
+        return 1 - 2*mu
 
 
 binary = Binomial()

From 89439d6587f54f85b40b90567c1fd78dd7b55e5a Mon Sep 17 00:00:00 2001
From: Kerby Shedden <kshedden@umich.edu>
Date: Fri, 29 Jan 2016 23:53:18 -0500
Subject: [PATCH 19/27] Remove cruft

---
 statsmodels/genmod/families/varfuncs.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/statsmodels/genmod/families/varfuncs.py b/statsmodels/genmod/families/varfuncs.py
index 1349fc7a1ea..4d4a45ca7a4 100644
--- a/statsmodels/genmod/families/varfuncs.py
+++ b/statsmodels/genmod/families/varfuncs.py
@@ -117,11 +117,6 @@ def deriv(self, mu):
         ii = np.flatnonzero(mu < 0)
         der[ii] *= -1
 
-        #from statsmodels.tools.numdiff import approx_fprime_cs, approx_fprime
-        #return approx_fprime_cs(mu, self)  # TODO fix breaks in `fabs
-        # TODO: diag is workaround problem with numdiff for 1d
-        #dd = np.diag(approx_fprime(mu, self))
-
         return der
 
 

From b27df8a10584bb5f94d90ffdd10924cf0a2a88ad Mon Sep 17 00:00:00 2001
From: Kerby Shedden <kshedden@umich.edu>
Date: Fri, 29 Jan 2016 23:54:34 -0500
Subject: [PATCH 20/27] Minor formatting

---
 statsmodels/genmod/families/varfuncs.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/statsmodels/genmod/families/varfuncs.py b/statsmodels/genmod/families/varfuncs.py
index 4d4a45ca7a4..0a2814aa1dc 100644
--- a/statsmodels/genmod/families/varfuncs.py
+++ b/statsmodels/genmod/families/varfuncs.py
@@ -45,7 +45,6 @@ def __call__(self, mu):
         mu = np.asarray(mu)
         return np.ones(mu.shape, np.float64)
 
-
     def deriv(self, mu):
         """
         Derivative of the variance function v'(mu)
@@ -105,7 +104,6 @@ def __call__(self, mu):
         """
         return np.power(np.fabs(mu), self.power)
 
-
     def deriv(self, mu):
         """
         Derivative of the variance function v'(mu)
@@ -116,11 +114,9 @@ def deriv(self, mu):
         der = self.power * np.fabs(mu) ** (self.power - 1)
         ii = np.flatnonzero(mu < 0)
         der[ii] *= -1
-
         return der
 
 
-
 mu = Power()
 mu.__doc__ = """
 Returns np.fabs(mu)

From 295deab0a952b766c7b207ee0dd3106e5fe68cf7 Mon Sep 17 00:00:00 2001
From: Kerby Shedden <kshedden@umich.edu>
Date: Sat, 30 Jan 2016 10:53:32 -0500
Subject: [PATCH 21/27] Improve test coverage

---
 statsmodels/duration/hazard_regression.py | 22 +++++++++++-----------
 statsmodels/duration/tests/test_phreg.py  | 10 +++++++---
 2 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/statsmodels/duration/hazard_regression.py b/statsmodels/duration/hazard_regression.py
index 0e67bba3822..def8deb39cf 100644
--- a/statsmodels/duration/hazard_regression.py
+++ b/statsmodels/duration/hazard_regression.py
@@ -1260,8 +1260,8 @@ def get_distribution(self, params):
 
         Parameters
         ----------
-        params : arrayh-like
-            The model proportional hazards model parameters.
+        params : array-like
+            The proportional hazards model parameters.
 
         Returns
         -------
@@ -1271,7 +1271,7 @@ def get_distribution(self, params):
         -----
         The distributions are obtained from a simple discrete estimate
         of the survivor function that puts all mass on the observed
-        failure times wihtin a stratum.
+        failure times within a stratum.
         """
 
         # TODO: this returns a Python list of rv_discrete objects, so
@@ -1306,8 +1306,8 @@ def get_distribution(self, params):
 
             # The individual survival functions.
             usurv = np.exp(-ichaz)
-            usurv = np.concatenate((usurv, np.zeros((usurv.shape[0], 1))),
-                                   axis=1)
+            z = np.zeros((usurv.shape[0], 1))
+            usurv = np.concatenate((usurv, z), axis=1)
 
             # The individual survival probability masses.
             probs = -np.diff(usurv, 1)
@@ -1319,15 +1319,15 @@ def get_distribution(self, params):
         mxc = max([x.shape[1] for x in xk])
         for k in range(self.surv.nstrat):
             if xk[k].shape[1] < mxc:
-                xk1 = np.zeros((xk.shape[0], mxc))
-                pk1 = np.zeros((pk.shape[0], mxc))
-                xk1[:, -mxc:] = xk
-                pk1[:, -mxc:] = pk
+                xk1 = np.zeros((xk[k].shape[0], mxc))
+                pk1 = np.zeros((pk[k].shape[0], mxc))
+                xk1[:, 0:xk[k].shape[1]] = xk[k]
+                pk1[:, 0:pk[k].shape[1]] = pk[k]
                 xk[k], pk[k] = xk1, pk1
 
-        xka = np.nan * np.zeros((len(self.endog), mxc), dtype=np.float64)
+        # Put the support points and probabilities into single matrices
+        xka = np.nan * np.ones((len(self.endog), mxc))
         pka = np.ones((len(self.endog), mxc), dtype=np.float64) / mxc
-
         for stx in range(self.surv.nstrat):
             ix = self.surv.stratum_rows[stx]
             xka[ix, :] = xk[stx]
diff --git a/statsmodels/duration/tests/test_phreg.py b/statsmodels/duration/tests/test_phreg.py
index 1747ee800e7..4afe02af0bb 100644
--- a/statsmodels/duration/tests/test_phreg.py
+++ b/statsmodels/duration/tests/test_phreg.py
@@ -313,12 +313,16 @@ def test_predict(self):
     def test_get_distribution(self):
         # Smoke test
         np.random.seed(34234)
-        exog = np.random.normal(size=(200, 2))
+        n = 200
+        exog = np.random.normal(size=(n, 2))
         lin_pred = exog.sum(1)
         elin_pred = np.exp(-lin_pred)
-        time = -elin_pred * np.log(np.random.uniform(size=200))
+        time = -elin_pred * np.log(np.random.uniform(size=n))
+        status = np.ones(n)
+        status[0:20] = 0
+        strata = np.kron(range(5), np.ones(n // 5))
 
-        mod = PHReg(time, exog)
+        mod = PHReg(time, exog, status=status, strata=strata)
         rslt = mod.fit()
 
         dist = rslt.get_distribution()

From 91bb6ec4a92075579537d42069c80b2066eafd49 Mon Sep 17 00:00:00 2001
From: Kerby Shedden <kshedden@umich.edu>
Date: Thu, 5 May 2016 09:43:18 -0400
Subject: [PATCH 22/27] renaming per Josef's code review

---
 statsmodels/base/elastic_net.py                |  6 +++---
 statsmodels/duration/hazard_regression.py      | 12 ++++++------
 statsmodels/genmod/generalized_linear_model.py | 12 ++++++------
 statsmodels/regression/linear_model.py         | 18 +++++++++---------
 4 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/statsmodels/base/elastic_net.py b/statsmodels/base/elastic_net.py
index 8f0d7435914..0d18757b065 100644
--- a/statsmodels/base/elastic_net.py
+++ b/statsmodels/base/elastic_net.py
@@ -59,7 +59,7 @@ def nphess(params, model):
 
 
 
-def fit(model, method="coord_descent", maxiter=100, alpha=0.,
+def fit_elasticnet(model, method="coord_descent", maxiter=100, alpha=0.,
          L1_wt=1., start_params=None, cnvrg_tol=1e-7, zero_tol=1e-8,
          refit=False, loglike_kwds=None, score_kwds=None,
          hess_kwds=None):
@@ -89,7 +89,7 @@ def fit(model, method="coord_descent", maxiter=100, alpha=0.,
         Starting values for `params`.
     cnvrg_tol : scalar
         If `params` changes by less than this amount (in sup-norm)
-        in once iteration cycle, the algorithm terminates with
+        in one iteration cycle, the algorithm terminates with
         convergence.
     zero_tol : scalar
         Any estimated coefficient smaller than this value is
@@ -196,7 +196,7 @@ def fit(model, method="coord_descent", maxiter=100, alpha=0.,
             break
 
     # Set approximate zero coefficients to be exactly zero
-    params *= np.abs(params) >= zero_tol
+    params[np.abs(params) < zero_tol] = 0
 
     if not refit:
         results = RegularizedResults(model, params)
diff --git a/statsmodels/duration/hazard_regression.py b/statsmodels/duration/hazard_regression.py
index def8deb39cf..ec57d8ce084 100644
--- a/statsmodels/duration/hazard_regression.py
+++ b/statsmodels/duration/hazard_regression.py
@@ -500,7 +500,7 @@ def fit_regularized(self, method="elastic_net", alpha=0.,
             Coefficients below this threshold are treated as zero.
         """
 
-        from statsmodels.base import elastic_net
+        from statsmodels.base.elastic_net import fit_elasticnet
 
         if method != "elastic_net":
             raise ValueError("method for fit_regularied must be elastic_net")
@@ -509,11 +509,11 @@ def fit_regularized(self, method="elastic_net", alpha=0.,
                     "zero_tol" : 1e-10}
         defaults.update(kwargs)
 
-        return elastic_net.fit(self, method=method,
-                               alpha=alpha,
-                               start_params=start_params,
-                               refit=refit,
-                               **defaults)
+        return fit_elasticnet(self, method=method,
+                              alpha=alpha,
+                              start_params=start_params,
+                              refit=refit,
+                              **defaults)
 
 
     def loglike(self, params):
diff --git a/statsmodels/genmod/generalized_linear_model.py b/statsmodels/genmod/generalized_linear_model.py
index 5aad0890d8c..8ea9eb56e87 100644
--- a/statsmodels/genmod/generalized_linear_model.py
+++ b/statsmodels/genmod/generalized_linear_model.py
@@ -1066,7 +1066,7 @@ def fit_regularized(self, method="elastic_net", alpha=0.,
             Coefficients below this threshold are treated as zero.
         """
 
-        from statsmodels.base import elastic_net
+        from statsmodels.base.elastic_net import fit_elasticnet
 
         if method != "elastic_net":
             raise ValueError("method for fit_regularied must be elastic_net")
@@ -1075,11 +1075,11 @@ def fit_regularized(self, method="elastic_net", alpha=0.,
                     "zero_tol" : 1e-10}
         defaults.update(kwargs)
 
-        result = elastic_net.fit(self, method=method,
-                                 alpha=alpha,
-                                 start_params=start_params,
-                                 refit=refit,
-                                 **defaults)
+        result = fit_elasticnet(self, method=method,
+                                alpha=alpha,
+                                start_params=start_params,
+                                refit=refit,
+                                **defaults)
 
         self.mu = self.predict(result.params)
         self.scale = self.estimate_scale(self.mu)
diff --git a/statsmodels/regression/linear_model.py b/statsmodels/regression/linear_model.py
index 2ba9f8009c5..280ab717f6e 100644
--- a/statsmodels/regression/linear_model.py
+++ b/statsmodels/regression/linear_model.py
@@ -820,7 +820,7 @@ def fit_regularized(self, method="elastic_net", alpha=0.,
         Statistical Software 33(1), 1-22 Feb 2010.
         """
 
-        from statsmodels.base import elastic_net
+        from statsmodels.base.elastic_net import fit_elasticnet
 
         # In the future we could add support for other penalties, e.g. SCAD.
         if method != "elastic_net":
@@ -843,14 +843,14 @@ def fit_regularized(self, method="elastic_net", alpha=0.,
             score_kwds = {"scale": 1}
             hess_kwds = {"scale": 1}
 
-        return elastic_net.fit(self, method=method,
-                               alpha=alpha,
-                               start_params=start_params,
-                               loglike_kwds=loglike_kwds,
-                               score_kwds=score_kwds,
-                               hess_kwds=hess_kwds,
-                               refit=refit,
-                               **defaults)
+        return fit_elasticnet(self, method=method,
+                              alpha=alpha,
+                              start_params=start_params,
+                              loglike_kwds=loglike_kwds,
+                              score_kwds=score_kwds,
+                              hess_kwds=hess_kwds,
+                              refit=refit,
+                              **defaults)
 
 
 class GLSAR(GLS):

From b106e099411454c1241a6e30ff1bd13ada3d60f1 Mon Sep 17 00:00:00 2001
From: Kerby Shedden <kshedden@umich.edu>
Date: Thu, 5 May 2016 09:48:21 -0400
Subject: [PATCH 23/27] Replace hasattr with getattr for simplicity

---
 statsmodels/genmod/generalized_linear_model.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/statsmodels/genmod/generalized_linear_model.py b/statsmodels/genmod/generalized_linear_model.py
index 8ea9eb56e87..255038b712d 100644
--- a/statsmodels/genmod/generalized_linear_model.py
+++ b/statsmodels/genmod/generalized_linear_model.py
@@ -273,9 +273,7 @@ def __init__(self, endog, exog, family=None, offset=None,
 
         # Construct a combined offset/exposure term.  Note that
         # exposure has already been logged if present.
-        offset_exposure = 0.
-        if hasattr(self, 'offset'):
-            offset_exposure = self.offset
+        offset_exposure = getattr(self, 'offset', 0)
         if hasattr(self, 'exposure'):
             offset_exposure = offset_exposure + self.exposure
         self._offset_exposure = offset_exposure

From e74696a76a82eafd8724b285278f69737e141604 Mon Sep 17 00:00:00 2001
From: Kerby Shedden <kshedden@umich.edu>
Date: Thu, 5 May 2016 10:11:31 -0400
Subject: [PATCH 24/27] Remove conflict markers

---
 statsmodels/genmod/tests/test_glm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/statsmodels/genmod/tests/test_glm.py b/statsmodels/genmod/tests/test_glm.py
index 372ea4ce0fb..cf573602d45 100644
--- a/statsmodels/genmod/tests/test_glm.py
+++ b/statsmodels/genmod/tests/test_glm.py
@@ -1620,7 +1620,7 @@ def testTweediePowerEstimate():
     # assert_allclose(res1.scale, np.exp(res2.params[0]), rtol=0.25)
     p = model1.estimate_tweedie_power(res1.mu)
     assert_allclose(p, res2.params[1], rtol=0.25)
-=======
+
 class TestRegularized(object):
 
     def test_regularized(self):

From 3863f41ee71b16a17e5cbeebeedbe01ca214ac9c Mon Sep 17 00:00:00 2001
From: Kerby Shedden <kshedden@umich.edu>
Date: Thu, 5 May 2016 21:51:12 -0400
Subject: [PATCH 25/27] fix group_stats

---
 statsmodels/duration/hazard_regression.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/statsmodels/duration/hazard_regression.py b/statsmodels/duration/hazard_regression.py
index ec57d8ce084..db2e855215f 100644
--- a/statsmodels/duration/hazard_regression.py
+++ b/statsmodels/duration/hazard_regression.py
@@ -1435,7 +1435,7 @@ def _group_stats(self, groups):
         """
         gsizes = np_new_unique(groups, return_counts=True)
         gsizes = gsizes[1]
-        return gsizes.min(), gsizes.max(), gsizes.mean()
+        return gsizes.min(), gsizes.max(), gsizes.mean(), len(gsizes)
 
     @cache_readonly
     def weighted_covariate_averages(self):

From 3c6c8b5831669dc7b943190343de15c5956869a5 Mon Sep 17 00:00:00 2001
From: Kerby Shedden <kshedden@umich.edu>
Date: Thu, 5 May 2016 23:25:12 -0400
Subject: [PATCH 26/27] Fix argument order issue in binomial loglike

---
 statsmodels/base/elastic_net.py               |  3 ++-
 .../genmod/generalized_linear_model.py        | 26 +++++++++----------
 2 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/statsmodels/base/elastic_net.py b/statsmodels/base/elastic_net.py
index 0d18757b065..183a49bb169 100644
--- a/statsmodels/base/elastic_net.py
+++ b/statsmodels/base/elastic_net.py
@@ -53,7 +53,8 @@ def npscore(params, model):
     def nphess(params, model):
         nobs = model.nobs
         pen_hess = alpha[k] * (1 - L1_wt)
-        return -model.hessian(np.r_[params], **hess_kwds)[0,0] / nobs + pen_hess
+        h = -model.hessian(np.r_[params], **hess_kwds)[0,0] / nobs + pen_hess
+        return h
 
     return nploglike, npscore, nphess
 
diff --git a/statsmodels/genmod/generalized_linear_model.py b/statsmodels/genmod/generalized_linear_model.py
index 255038b712d..8591ac55d5c 100644
--- a/statsmodels/genmod/generalized_linear_model.py
+++ b/statsmodels/genmod/generalized_linear_model.py
@@ -290,6 +290,7 @@ def __init__(self, endog, exog, family=None, offset=None,
         self.data_weights = data_weights
         if np.shape(self.data_weights) == () and self.data_weights > 1:
             self.data_weights = self.data_weights * np.ones((endog.shape[0]))
+        self._setup_binomial()
 
 
     def initialize(self):
@@ -369,8 +370,9 @@ def loglike(self, params, scale=None):
         expval = self.family.link.inverse(lin_pred)
         if scale is None:
             scale = self.estimate_scale(expval)
-        return self.family.loglike(expval, self.endog, self.freq_weights,
-                                   scale)
+        llf = self.family.loglike(self.endog, expval, self.freq_weights,
+                                  scale)
+        return llf
 
     def score_obs(self, params, scale=None):
         """score first derivative of the loglikelihood for each observation.
@@ -828,6 +830,15 @@ def get_distribution(self, params, scale=1, exog=None, exposure=None,
         else:
             raise ValueError("get_distribution not implemented for %s" % self.family.name)
 
+    def _setup_binomial(self):
+        # this checks what kind of data is given for Binomial.
+        # family will need a reference to endog if this is to be removed from
+        # preprocessing
+        self.n_trials = np.ones((self.endog.shape[0]))  # For binomial
+        if isinstance(self.family, families.Binomial):
+            tmp = self.family.initialize(self.endog, self.freq_weights)
+            self.endog = tmp[0]
+            self.n_trials = tmp[1]
 
     def fit(self, start_params=None, maxiter=100, method='IRLS', tol=1e-8,
             scale=None, cov_type='nonrobust', cov_kwds=None, use_t=None,
@@ -880,15 +891,6 @@ def fit(self, start_params=None, maxiter=100, method='IRLS', tol=1e-8,
         -----
         This method does not take any extra undocumented ``kwargs``.
         """
-        # this checks what kind of data is given for Binomial.
-        # family will need a reference to endog if this is to be removed from
-        # preprocessing
-        self.n_trials = np.ones((self.endog.shape[0]))  # For binomial
-        if isinstance(self.family, families.Binomial):
-            tmp = self.family.initialize(self.endog, self.freq_weights)
-            self.endog = tmp[0]
-            self.n_trials = tmp[1]
-
         self.scaletype = scale
 
         if method.lower() == "irls":
@@ -1063,7 +1065,6 @@ def fit_regularized(self, method="elastic_net", alpha=0.,
         zero_tol : float
             Coefficients below this threshold are treated as zero.
         """
-
         from statsmodels.base.elastic_net import fit_elasticnet
 
         if method != "elastic_net":
@@ -1240,7 +1241,6 @@ def __init__(self, model, params, normalized_cov_params, scale,
         self.family = model.family
         self._endog = model.endog
         self.nobs = model.endog.shape[0]
-        self.mu = model.mu
         self._freq_weights = model.freq_weights
         if isinstance(self.family, families.Binomial):
             self._n_trials = self.model.n_trials

From f40c7f28ac9b961143e03e0d02b225a79d47ac66 Mon Sep 17 00:00:00 2001
From: Kerby Shedden <kshedden@umich.edu>
Date: Thu, 5 May 2016 23:40:38 -0400
Subject: [PATCH 27/27] Need to be able to calculate scale before calling fit

---
 .../genmod/generalized_linear_model.py        | 20 +++++--------------
 1 file changed, 5 insertions(+), 15 deletions(-)

diff --git a/statsmodels/genmod/generalized_linear_model.py b/statsmodels/genmod/generalized_linear_model.py
index 8591ac55d5c..0e1e7f7141f 100644
--- a/statsmodels/genmod/generalized_linear_model.py
+++ b/statsmodels/genmod/generalized_linear_model.py
@@ -258,9 +258,9 @@ def __init__(self, endog, exog, family=None, offset=None,
                            self.freq_weights)
         if offset is None:
             delattr(self, 'offset')
-            self._init_keys.remove('offset')
         if exposure is None:
             delattr(self, 'exposure')
+
         self.nobs = self.endog.shape[0]
 
         #things to remove_data
@@ -269,29 +269,19 @@ def __init__(self, endog, exog, family=None, offset=None,
         # register kwds for __init__, offset and exposure are added by super
         self._init_keys.append('family')
 
-        self.nobs = len(endog)
+        self._setup_binomial()
 
         # Construct a combined offset/exposure term.  Note that
         # exposure has already been logged if present.
-        offset_exposure = getattr(self, 'offset', 0)
+        offset_exposure = 0.
+        if hasattr(self, 'offset'):
+            offset_exposure = self.offset
         if hasattr(self, 'exposure'):
             offset_exposure = offset_exposure + self.exposure
         self._offset_exposure = offset_exposure
 
-        # Set here so the the likelihood, score and Hessian can be
-        # evaluated without fitting the model.
         self.scaletype = None
 
-        # Set the data weights
-        if endog.ndim > 1 and endog.shape[1] == 2:
-            data_weights = endog.sum(1)  # weights are total trials
-        else:
-            data_weights = np.ones((endog.shape[0]))
-        self.data_weights = data_weights
-        if np.shape(self.data_weights) == () and self.data_weights > 1:
-            self.data_weights = self.data_weights * np.ones((endog.shape[0]))
-        self._setup_binomial()
-
 
     def initialize(self):
         """