statsmodels · josef-pkt · Nov 2, 2013 · Oct 29, 2013 · Oct 29, 2013 · Oct 29, 2013
diff --git a/statsmodels/base/model.py b/statsmodels/base/model.py
@@ -501,6 +501,21 @@ def _fit_mle_bfgs(f, score, start_params, fargs, kwargs, disp=True,
 def _fit_mle_lbfgs(f, score, start_params, fargs, kwargs, disp=True,
                     maxiter=None, callback=None, retall=False,
                     full_output=True, hess=None):
+    """
+    Parameters
+    ----------
+    f : function
+        Returns negative log likelihood given parameters.
+    score : function
+        Returns gradient of negative log likelihood with respect to params.
+
+    Notes
+    -----
+    Within the mle part of statsmodels, the log likelihood function and
+    its gradient with respect to the parameters do not have notationally
+    consistent sign.
+
+    """
 
     # The maxiter may be set at multiple points throughout statsmodels.
     # In the following lines of code, we track how its value changes
@@ -509,31 +524,53 @@ def _fit_mle_lbfgs(f, score, start_params, fargs, kwargs, disp=True,
     if maxiter is None:
         maxiter = 100
 
+    # Use unconstrained optimization by default.
+    bounds = kwargs.setdefault('bounds', [(None, None)] * len(start_params))
+
     # Pass the following keyword argument names through to fmin_l_bfgs_b
     # if they are present in kwargs, otherwise use the fmin_l_bfgs_b
     # default values.
-    names = ('m', 'pgtol', 'factr', 'maxfun', 'approx_grad')
+    names = ('m', 'pgtol', 'factr', 'maxfun', 'epsilon', 'approx_grad')
     extra_kwargs = dict((x, kwargs[x]) for x in names if x in kwargs)
 
-    if extra_kwargs.get('approx_grad', False):
-        score = None
-
-    epsilon = kwargs.setdefault('epsilon', 1e-8)
-    bounds = kwargs.setdefault('bounds', [(None, None)] * len(start_params))
+    # Extract values for the options related to the gradient.
+    approx_grad = extra_kwargs.get('approx_grad', False)
+    loglike_and_score = kwargs.get('loglike_and_score', None)
+    epsilon = kwargs.get('epsilon', None)
+
+    # Choose among three options for dealing with the gradient (the gradient
+    # of a log likelihood function with respect to its parameters
+    # is more specifically called the score in statistics terminology).
+    # The first option is to use the finite-differences
+    # approximation that is built into the fmin_l_bfgs_b optimizer.
+    # The second option is to use the provided score function.
+    # The third option is to use the score component of a provided
+    # function that simultaneously evaluates the log likelihood and score.
+    if epsilon and not approx_grad:
+        raise ValueError('a finite-differences epsilon was provided '
+                'even though we are not using approx_grad')
+    if approx_grad and (score or loglike_and_score):
+        raise ValueError('gradient approximation was requested '
+                'even though an analytic score function was given')
+    if loglike_and_score:
+        func = lambda p, *a : tuple(-x for x in loglike_and_score(p, *a))
+    elif score:
+        func = f
+        extra_kwargs['fprime'] = score
+    elif approx_grad:
+        func = f
 
     # Customize the fmin_l_bfgs_b call according to the scipy version.
     # Old scipy does not support maxiter and callback.
     scipy_version_curr = distutils.version.LooseVersion(scipy.__version__)
     scipy_version_12 = distutils.version.LooseVersion('0.12.0')
     if scipy_version_curr < scipy_version_12:
-        retvals = optimize.fmin_l_bfgs_b(f, start_params,
-                fprime=score, args=fargs,
-                bounds=bounds, epsilon=epsilon, disp=disp, **extra_kwargs)
+        retvals = optimize.fmin_l_bfgs_b(func, start_params,
+                args=fargs, bounds=bounds, disp=disp, **extra_kwargs)
     else:
-        retvals = optimize.fmin_l_bfgs_b(f, start_params,
-                fprime=score, args=fargs,
+        retvals = optimize.fmin_l_bfgs_b(func, start_params,
                 maxiter=maxiter, callback=callback,
-                bounds=bounds, epsilon=epsilon, disp=disp, **extra_kwargs)
+                args=fargs, bounds=bounds, disp=disp, **extra_kwargs)
 
     if full_output:
         xopt, fopt, d = retvals

diff --git a/statsmodels/discrete/discrete_model.py b/statsmodels/discrete/discrete_model.py
@@ -1588,6 +1588,21 @@ def score(self, params):
         #NOTE: might need to switch terms if params is reshaped
         return np.dot(firstterm.T, self.exog).flatten()
 
+    def loglike_and_score(self, params):
+        """
+        Returns log likelihood and score, efficiently reusing calculations.
+
+        Note that both of these returned quantities will need to be negated
+        before being minimized by the maximum likelihood fitting machinery.
+
+        """
+        params = params.reshape(self.K, -1, order='F')
+        cdf_dot_exog_params = self.cdf(np.dot(self.exog, params))
+        loglike_value = np.sum(self.wendog * np.log(cdf_dot_exog_params))
+        firstterm = self.wendog[:, 1:] - cdf_dot_exog_params[:, 1:]
+        score_array = np.dot(firstterm.T, self.exog).flatten()
+        return loglike_value, score_array
+
     def jac(self, params):
         """
         Jacobian matrix for multinomial logit model log-likelihood

diff --git a/statsmodels/discrete/tests/test_discrete.py b/statsmodels/discrete/tests/test_discrete.py
@@ -1062,18 +1062,7 @@ def test_bse(self):
     test_jac = no_info
 
 
-class TestMNLogitNewtonBaseZero(CheckModelResults):
-    @classmethod
-    def setupClass(cls):
-        from results.results_discrete import Anes
-        data = sm.datasets.anes96.load()
-        cls.data = data
-        exog = data.exog
-        exog = sm.add_constant(exog, prepend=False)
-        cls.res1 = MNLogit(data.endog, exog).fit(method="newton", disp=0)
-        res2 = Anes()
-        res2.mnlogit_basezero()
-        cls.res2 = res2
+class CheckMNLogitBaseZero(CheckModelResults):
 
     def test_margeff_overall(self):
         me = self.res1.get_margeff()
@@ -1178,6 +1167,40 @@ def test_pred_table(self):
     def test_resid(self):
         assert_array_equal(self.res1.resid_misclassified, self.res2.resid)
 
+
+class TestMNLogitNewtonBaseZero(CheckMNLogitBaseZero):
+    @classmethod
+    def setupClass(cls):
+        from results.results_discrete import Anes
+        data = sm.datasets.anes96.load()
+        cls.data = data
+        exog = data.exog
+        exog = sm.add_constant(exog, prepend=False)
+        cls.res1 = MNLogit(data.endog, exog).fit(method="newton", disp=0)
+        res2 = Anes()
+        res2.mnlogit_basezero()
+        cls.res2 = res2
+
+class TestMNLogitLBFGSBaseZero(CheckMNLogitBaseZero):
+    @classmethod
+    def setupClass(cls):
+        from results.results_discrete import Anes
+        data = sm.datasets.anes96.load()
+        cls.data = data
+        exog = data.exog
+        exog = sm.add_constant(exog, prepend=False)
+        mymodel = MNLogit(data.endog, exog)
+        cls.res1 = mymodel.fit(method="lbfgs", disp=0, maxiter=50000,
+                #m=12, pgtol=1e-7, factr=1e3, # 5 failures
+                #m=20, pgtol=1e-8, factr=1e2, # 3 failures
+                #m=30, pgtol=1e-9, factr=1e1, # 1 failure
+                m=40, pgtol=1e-10, factr=5e0,
+                loglike_and_score=mymodel.loglike_and_score)
+        res2 = Anes()
+        res2.mnlogit_basezero()
+        cls.res2 = res2
+
+
 def test_perfect_prediction():
     cur_dir = os.path.dirname(os.path.abspath(__file__))
     iris_dir = os.path.join(cur_dir, '..', '..', 'genmod', 'tests', 'results')