statsmodels · josef-pkt · Oct 28, 2013 · Oct 24, 2013 · Oct 25, 2013 · Oct 25, 2013
diff --git a/statsmodels/base/model.py b/statsmodels/base/model.py
@@ -186,33 +186,39 @@ def fit(self, start_params=None, method='newton', maxiter=100,
         start_params : array-like, optional
             Initial guess of the solution for the loglikelihood maximization.
             The default is an array of zeros.
-        method : str {'newton','nm','bfgs','powell','cg','ncg','basinhopping'}
-            Method can be 'newton' for Newton-Raphson, 'nm' for Nelder-Mead,
-            'bfgs' for Broyden-Fletcher-Goldfarb-Shanno, 'powell' for modified
-            Powell's method, 'cg' for conjugate gradient, 'ncg' for Newton-
-            conjugate gradient or 'basinhopping' for global basin-hopping
-            solver, if available. `method` determines which solver from
-            scipy.optimize is used. The explicit arguments in `fit` are passed
-            to the solver, with the exception of the basin-hopping solver. Each
+        method : str, optional
+            The `method` determines which solver from `scipy.optimize`
+            is used, and it can be chosen from among the following strings:
+
+            - 'newton' for Newton-Raphson, 'nm' for Nelder-Mead
+            - 'bfgs' for Broyden-Fletcher-Goldfarb-Shanno (BFGS)
+            - 'lbfgs' for limited-memory BFGS
+            - 'powell' for modified Powell's method
+            - 'cg' for conjugate gradient
+            - 'ncg' for Newton-conjugate gradient
+            - 'basinhopping' for global basin-hopping solver
+
+            The explicit arguments in `fit` are passed to the solver,
+            with the exception of the basin-hopping solver. Each
             solver has several optional arguments that are not the same across
             solvers. See the notes section below (or scipy.optimize) for the
             available arguments and for the list of explicit arguments that the
-            basin-hopping solver supports..
-        maxiter : int
+            basin-hopping solver supports.
+        maxiter : int, optional
             The maximum number of iterations to perform.
-        full_output : bool
+        full_output : bool, optional
             Set to True to have all available output in the Results object's
             mle_retvals attribute. The output is dependent on the solver.
             See LikelihoodModelResults notes section for more information.
-        disp : bool
+        disp : bool, optional
             Set to True to print convergence messages.
-        fargs : tuple
+        fargs : tuple, optional
             Extra arguments passed to the likelihood function, i.e.,
             loglike(x,*args)
-        callback : callable callback(xk)
+        callback : callable callback(xk), optional
             Called after each iteration, as callback(xk), where xk is the
             current parameter vector.
-        retall : bool
+        retall : bool, optional
             Set to True to return list of solutions at each iteration.
             Available in Results object's mle_retvals attribute.
 
@@ -242,6 +248,18 @@ def fit(self, start_params=None, method='newton', maxiter=100,
                 epsilon
                     If fprime is approximated, use this value for the step
                     size. Only relevant if LikelihoodModel.score is None.
+            'lbfgs'
+                m : int
+                    This many terms are used for the Hessian approximation.
+                factr : float
+                    A stop condition that is a variant of relative error.
+                pgtol : float
+                    A stop condition that uses the projected gradient.
+                epsilon
+                    If fprime is approximated, use this value for the step
+                    size. Only relevant if LikelihoodModel.score is None.
+                maxfun : int
+                    Maximum number of function evaluations to make.
             'cg'
                 gtol : float
                     Stop when norm of gradient is less than gtol.
@@ -303,7 +321,7 @@ def fit(self, start_params=None, method='newton', maxiter=100,
         cov_params_func = kwargs.setdefault('cov_params_func', None)
 
         Hinv = None  # JP error if full_output=0, Hinv not defined
-        methods = ['newton', 'nm', 'bfgs', 'powell', 'cg', 'ncg',
+        methods = ['newton', 'nm', 'bfgs', 'lbfgs', 'powell', 'cg', 'ncg',
                    'basinhopping']
         methods += extra_fit_funcs.keys()
         if start_params is None:
@@ -337,6 +355,7 @@ def fit(self, start_params=None, method='newton', maxiter=100,
             'newton': _fit_mle_newton,
             'nm': _fit_mle_nm,  # Nelder-Mead
             'bfgs': _fit_mle_bfgs,
+            'lbfgs': _fit_mle_lbfgs,
             'cg': _fit_mle_cg,
             'ncg': _fit_mle_ncg,
             'powell': _fit_mle_powell,
@@ -471,6 +490,54 @@ def _fit_mle_bfgs(f, score, start_params, fargs, kwargs, disp=True,
     return xopt, retvals
 
 
+def _fit_mle_lbfgs(f, score, start_params, fargs, kwargs, disp=True,
+                    maxiter=100, callback=None, retall=False,
+                    full_output=True, hess=None):
+
+    # Pass the following keyword argument names through to fmin_l_bfgs_b
+    # if they are present in kwargs, otherwise use the fmin_l_bfgs_b
+    # default values.
+    names = ('m', 'pgtol', 'factr', 'maxfun', 'approx_grad')
+    extra_kwargs = dict((x, kwargs[x]) for x in names if x in kwargs)
+
+    if extra_kwargs.get('approx_grad', False):
+        score = None
+
+    epsilon = kwargs.setdefault('epsilon', 1e-8)
+    bounds = [(None, None)] * len(start_params)
+    try:
+        retvals = optimize.fmin_l_bfgs_b(f, start_params,
+                fprime=score, args=fargs,
+                maxiter=maxiter, callback=callback,
+                bounds=bounds, epsilon=epsilon, disp=disp, **extra_kwargs)
+    except TypeError:
+        if maxiter is not None or callback is not None:
+            from warnings import warn
+            warn("fmin_l_bfgs_b does not support maxiter or callback arguments"
+                    "Update your scipy, otherwise they have no effect",
+                    UserWarning)
+        retvals = optimize.fmin_l_bfgs_b(f, start_params,
+                fprime=score, args=fargs,
+                bounds=bounds, epsilon=epsilon, disp=disp, **extra_kwargs)
+    if full_output:
+        xopt, fopt, d = retvals
+        # The warnflag is
+        # 0 if converged
+        # 1 if too many function evaluations or too many iterations
+        # 2 if stopped for another reason, given in d['task']
+        warnflag = d['warnflag']
+        converged = (warnflag == 0)
+        gopt = d['grad']
+        fcalls = d['funcalls']
+        retvals = {'fopt': fopt, 'gopt': gopt,
+                'fcalls':fcalls, 'warnflag': warnflag,
+                'converged': converged}
+    else:
+        xopt = None
+
+    return xopt, retvals
+
+
 def _fit_mle_nm(f, score, start_params, fargs, kwargs, disp=True,
                 maxiter=100, callback=None, retall=False,
                 full_output=True, hess=None):
@@ -923,7 +990,7 @@ class LikelihoodModelResults(Results):
     --------
     The covariance of params is given by scale times normalized_cov_params.
 
-    Return values by solver if full_ouput is True during fit:
+    Return values by solver if full_output is True during fit:
 
         'newton'
             fopt : float
@@ -974,6 +1041,22 @@ class LikelihoodModelResults(Results):
                 True: converged.  False: did not converge.
             allvecs : list
                 Results at each iteration.
+        'lbfgs'
+            fopt : float
+                Value of the (negative) loglikelihood at its minimum.
+            gopt : float
+                Value of gradient at minimum, which should be near 0.
+            fcalls : int
+                Number of calls to loglike.
+            warnflag : int
+                Warning flag:
+
+                - 0 if converged
+                - 1 if too many function evaluations or too many iterations
+                - 2 if stopped for another reason
+
+            converged : bool
+                True: converged.  False: did not converge.
         'powell'
             fopt : float
                 Value of the (negative) loglikelihood at its minimum.

diff --git a/statsmodels/tsa/arima_model.py b/statsmodels/tsa/arima_model.py
@@ -485,11 +485,7 @@ def score(self, params):
         -----
         This is a numerical approximation.
         """
-        loglike = self.loglike
-        #if self.transparams:
-        #    params = self._invtransparams(params)
-        #return approx_fprime(params, loglike, epsilon=1e-5)
-        return approx_fprime_cs(params, loglike)
+        return approx_fprime_cs(params, self.loglike, args=(False,))
 
     def hessian(self, params):
         """
@@ -499,10 +495,7 @@ def hessian(self, params):
         -----
         This is a numerical approximation.
         """
-        loglike = self.loglike
-        #if self.transparams:
-        #    params = self._invtransparams(params)
-        return approx_hess_cs(params, loglike)
+        return approx_hess_cs(params, self.loglike, args=(False,))
 
     def _transparams(self, params):
         """
@@ -665,7 +658,7 @@ def predict(self, params, start=None, end=None, exog=None, dynamic=False):
         return predictedvalues
     predict.__doc__ = _arma_predict
 
-    def loglike(self, params):
+    def loglike(self, params, set_sigma2=True):
         """
         Compute the log-likelihood for ARMA(p,q) model
 
@@ -675,17 +668,17 @@ def loglike(self, params):
         """
         method = self.method
         if method in ['mle', 'css-mle']:
-            return self.loglike_kalman(params)
+            return self.loglike_kalman(params, set_sigma2)
         elif method == 'css':
             return self.loglike_css(params)
         else:
             raise ValueError("Method %s not understood" % method)
 
-    def loglike_kalman(self, params):
+    def loglike_kalman(self, params, set_sigma2=True):
         """
         Compute exact loglikelihood for ARMA(p,q) model using the Kalman Filter.
         """
-        return KalmanFilter.loglike(params, self)
+        return KalmanFilter.loglike(params, self, set_sigma2)
 
     def loglike_css(self, params):
         """
@@ -717,7 +710,7 @@ def loglike_css(self, params):
         return llf
 
     def fit(self, order=None, start_params=None, trend='c', method = "css-mle",
-            transparams=True, solver=None, maxiter=35, full_output=1,
+            transparams=True, solver='lbfgs', maxiter=35, full_output=1,
             disp=5, callback=None, **kwargs):
         """
         Fits ARMA(p,q) model using exact maximum likelihood via Kalman filter.
@@ -741,7 +734,7 @@ def fit(self, order=None, start_params=None, trend='c', method = "css-mle",
             `start_params` as starting parameters.  See above for more
             information.
         trend : str {'c','nc'}
-            Whehter to include a constant or not.  'c' includes constant,
+            Whether to include a constant or not.  'c' includes constant,
             'nc' no constant.
         solver : str or None, optional
             Solver to be used.  The default is 'l_bfgs' (limited memory
@@ -846,6 +839,8 @@ def fit(self, order=None, start_params=None, trend='c', method = "css-mle",
         if transparams: # transform initial parameters to ensure invertibility
             start_params = self._invtransparams(start_params)
 
+        # NOTE: after having added 'lbfgs' to the list of fitting methods,
+        #       the solver-is-None branch should no longer be necessary
         if solver is None:  # use default limited memory bfgs
             bounds = [(None,)*2]*(k_ar+k_ma+k)
             pgtol = kwargs.get('pgtol', 1e-8)
@@ -858,6 +853,11 @@ def fit(self, order=None, start_params=None, trend='c', method = "css-mle",
             params = mlefit[0]
 
         else:   # call the solver from LikelihoodModel
+            if solver == 'lbfgs':
+                kwargs.setdefault('pgtol', 1e-8)
+                kwargs.setdefault('factr', 1e2)
+                kwargs.setdefault('m', 12)
+                kwargs.setdefault('approx_grad', True)
             mlefit = super(ARMA, self).fit(start_params, method=solver,
                         maxiter=maxiter, full_output=full_output, disp=disp,
                         callback = callback, **kwargs)
@@ -946,7 +946,7 @@ def _get_predict_end(self, end, dynamic=False):
         return end - self.k_diff, out_of_sample
 
     def fit(self, start_params=None, trend='c', method = "css-mle",
-            transparams=True, solver=None, maxiter=35, full_output=1,
+            transparams=True, solver='lbfgs', maxiter=35, full_output=1,
             disp=5, callback=None, **kwargs):
         """
         Fits ARIMA(p,d,q) model by exact maximum likelihood via Kalman filter.

diff --git a/statsmodels/tsa/kalmanf/kalmanfilter.py b/statsmodels/tsa/kalmanf/kalmanfilter.py
@@ -611,7 +611,7 @@ def _init_kalman_state(cls, params, arma_model):
                newparams, Z_mat, m, R_mat, T_mat, paramsdtype)
 
     @classmethod
-    def loglike(cls, params, arma_model):
+    def loglike(cls, params, arma_model, set_sigma2=True):
         """
         The loglikelihood for an ARMA model using the Kalman Filter recursions.
 
@@ -623,6 +623,10 @@ def loglike(cls, params, arma_model):
             coefficients, then the `q` MA coefficients.
         arma_model : `statsmodels.tsa.arima.ARMA` instance
             A reference to the ARMA model instance.
+        set_sigma2 : bool, optional
+            True if arma_model.sigma2 should be set.
+            Note that sigma2 will be computed in any case,
+            but it will be discarded if set_sigma2 is False.
 
         Notes
         -----
@@ -647,7 +651,9 @@ def loglike(cls, params, arma_model):
         else:
             raise TypeError("This dtype %s is not supported "
                             " Please files a bug report." % paramsdtype)
-        arma_model.sigma2 = sigma2
+        if set_sigma2:
+            arma_model.sigma2 = sigma2
+
         return loglike.item() # return a scalar not a 0d array