From aa12688b5ed96b113b320862b27359c52e31349d Mon Sep 17 00:00:00 2001 From: alex Date: Tue, 29 Oct 2013 09:58:42 -0400 Subject: [PATCH 1/5] ENH: add MNLogit loglike_and_score --- statsmodels/discrete/discrete_model.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/statsmodels/discrete/discrete_model.py b/statsmodels/discrete/discrete_model.py index 2b65a03bdb6..f60f5cd5ea6 100644 --- a/statsmodels/discrete/discrete_model.py +++ b/statsmodels/discrete/discrete_model.py @@ -1588,6 +1588,21 @@ def score(self, params): #NOTE: might need to switch terms if params is reshaped return np.dot(firstterm.T, self.exog).flatten() + def loglike_and_score(self, params): + """ + Returns log likelihood and score, efficiently reusing calculations. + + Note that both of these returned quantities will need to be negated + before being minimized by the maximum likelihood fitting machinery. + + """ + params = params.reshape(self.K, -1, order='F') + cdf_dot_exog_params = self.cdf(np.dot(self.exog, params)) + loglike_value = np.sum(self.wendog * np.log(cdf_dot_exog_params)) + firstterm = self.wendog[:, 1:] - cdf_dot_exog_params[:, 1:] + score_array = np.dot(firstterm.T, self.exog).flatten() + return loglike_value, score_array + def jac(self, params): """ Jacobian matrix for multinomial logit model log-likelihood From 0474015ad015aa2e92ea26860178deb6e24fd31b Mon Sep 17 00:00:00 2001 From: alex Date: Tue, 29 Oct 2013 14:06:35 -0400 Subject: [PATCH 2/5] ENH: mle lbfgs now uses simultaneous loglike and score, tested with MNLogit --- statsmodels/base/model.py | 61 +++++++++++++++++---- statsmodels/discrete/tests/test_discrete.py | 47 ++++++++++++---- 2 files changed, 84 insertions(+), 24 deletions(-) diff --git a/statsmodels/base/model.py b/statsmodels/base/model.py index 9d0f5e3432d..c7a704f0f15 100644 --- a/statsmodels/base/model.py +++ b/statsmodels/base/model.py @@ -501,6 +501,21 @@ def _fit_mle_bfgs(f, score, start_params, fargs, kwargs, disp=True, def _fit_mle_lbfgs(f, score, start_params, fargs, kwargs, disp=True, maxiter=None, callback=None, retall=False, full_output=True, hess=None): + """ + Parameters + ---------- + f : function + Returns negative log likelihood given parameters. + score : function + Returns gradient of negative log likelihood with respect to params. + + Notes + ----- + Within the mle part of statsmodels, the log likelihood function and + its gradient with respect to the parameters do not have notationally + consistent sign. + + """ # The maxiter may be set at multiple points throughout statsmodels. # In the following lines of code, we track how its value changes @@ -509,31 +524,53 @@ def _fit_mle_lbfgs(f, score, start_params, fargs, kwargs, disp=True, if maxiter is None: maxiter = 100 + # Use unconstrained optimization by default. + bounds = kwargs.setdefault('bounds', [(None, None)] * len(start_params)) + # Pass the following keyword argument names through to fmin_l_bfgs_b # if they are present in kwargs, otherwise use the fmin_l_bfgs_b # default values. - names = ('m', 'pgtol', 'factr', 'maxfun', 'approx_grad') + names = ('m', 'pgtol', 'factr', 'maxfun', 'epsilon', 'approx_grad') extra_kwargs = dict((x, kwargs[x]) for x in names if x in kwargs) - if extra_kwargs.get('approx_grad', False): - score = None - - epsilon = kwargs.setdefault('epsilon', 1e-8) - bounds = kwargs.setdefault('bounds', [(None, None)] * len(start_params)) + # Extract values for the options related to the gradient. + approx_grad = extra_kwargs.get('approx_grad', False) + loglike_and_score = kwargs.get('loglike_and_score', None) + epsilon = kwargs.get('epsilon', None) + + # Choose among three options for dealing with the gradient (the gradient + # of a log likelihood function with respect to its parameters + # is more specifically called the score in statistics terminology). + # The first option is to use the finite-differences + # approximation that is built into the fmin_l_bfgs_b optimizer. + # The second option is to use the provided score function. + # The third option is to use the score component of a provided + # function that simultaneously evaluates the log likelihood and score. + if epsilon and not approx_grad: + raise ValueError('a finite-differences epsilon was provided ' + 'even though we are not using approx_grad') + if approx_grad and (score or loglike_and_score): + raise ValueError('gradient approximation was requested ' + 'even though an analytic score function was given') + if loglike_and_score: + func = lambda p, *a : tuple(-x for x in loglike_and_score(p, *a)) + elif score: + func = f + extra_kwargs['fprime'] = score + elif approx_grad: + func = f # Customize the fmin_l_bfgs_b call according to the scipy version. # Old scipy does not support maxiter and callback. scipy_version_curr = distutils.version.LooseVersion(scipy.__version__) scipy_version_12 = distutils.version.LooseVersion('0.12.0') if scipy_version_curr < scipy_version_12: - retvals = optimize.fmin_l_bfgs_b(f, start_params, - fprime=score, args=fargs, - bounds=bounds, epsilon=epsilon, disp=disp, **extra_kwargs) + retvals = optimize.fmin_l_bfgs_b(func, start_params, + args=fargs, bounds=bounds, disp=disp, **extra_kwargs) else: - retvals = optimize.fmin_l_bfgs_b(f, start_params, - fprime=score, args=fargs, + retvals = optimize.fmin_l_bfgs_b(func, start_params, maxiter=maxiter, callback=callback, - bounds=bounds, epsilon=epsilon, disp=disp, **extra_kwargs) + args=fargs, bounds=bounds, disp=disp, **extra_kwargs) if full_output: xopt, fopt, d = retvals diff --git a/statsmodels/discrete/tests/test_discrete.py b/statsmodels/discrete/tests/test_discrete.py index 672f97f1823..2a91b2dd312 100644 --- a/statsmodels/discrete/tests/test_discrete.py +++ b/statsmodels/discrete/tests/test_discrete.py @@ -1062,18 +1062,7 @@ def test_bse(self): test_jac = no_info -class TestMNLogitNewtonBaseZero(CheckModelResults): - @classmethod - def setupClass(cls): - from results.results_discrete import Anes - data = sm.datasets.anes96.load() - cls.data = data - exog = data.exog - exog = sm.add_constant(exog, prepend=False) - cls.res1 = MNLogit(data.endog, exog).fit(method="newton", disp=0) - res2 = Anes() - res2.mnlogit_basezero() - cls.res2 = res2 +class CheckMNLogitBaseZero(CheckModelResults): def test_margeff_overall(self): me = self.res1.get_margeff() @@ -1178,6 +1167,40 @@ def test_pred_table(self): def test_resid(self): assert_array_equal(self.res1.resid_misclassified, self.res2.resid) + +class TestMNLogitNewtonBaseZero(CheckMNLogitBaseZero): + @classmethod + def setupClass(cls): + from results.results_discrete import Anes + data = sm.datasets.anes96.load() + cls.data = data + exog = data.exog + exog = sm.add_constant(exog, prepend=False) + cls.res1 = MNLogit(data.endog, exog).fit(method="newton", disp=0) + res2 = Anes() + res2.mnlogit_basezero() + cls.res2 = res2 + +class TestMNLogitLBFGSBaseZero(CheckMNLogitBaseZero): + @classmethod + def setupClass(cls): + from results.results_discrete import Anes + data = sm.datasets.anes96.load() + cls.data = data + exog = data.exog + exog = sm.add_constant(exog, prepend=False) + mymodel = MNLogit(data.endog, exog) + cls.res1 = mymodel.fit(method="lbfgs", disp=0, maxiter=50000, + #m=12, pgtol=1e-7, factr=1e3, # 5 failures + #m=20, pgtol=1e-8, factr=1e2, # 3 failures + #m=30, pgtol=1e-9, factr=1e1, # 1 failure + m=40, pgtol=1e-10, factr=5e0, + loglike_and_score=mymodel.loglike_and_score) + res2 = Anes() + res2.mnlogit_basezero() + cls.res2 = res2 + + def test_perfect_prediction(): cur_dir = os.path.dirname(os.path.abspath(__file__)) iris_dir = os.path.join(cur_dir, '..', '..', 'genmod', 'tests', 'results') From 81a6f528cdb0af21c65c25587380d74ad15c124d Mon Sep 17 00:00:00 2001 From: alex Date: Tue, 29 Oct 2013 14:59:32 -0400 Subject: [PATCH 3/5] MAINT: change default maxiter to match docstring --- statsmodels/tsa/arima_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/statsmodels/tsa/arima_model.py b/statsmodels/tsa/arima_model.py index 0ef4d1f82cd..6d8c8fd2a98 100644 --- a/statsmodels/tsa/arima_model.py +++ b/statsmodels/tsa/arima_model.py @@ -954,7 +954,7 @@ def _get_predict_end(self, end, dynamic=False): return end - self.k_diff, out_of_sample def fit(self, start_params=None, trend='c', method = "css-mle", - transparams=True, solver='lbfgs', maxiter=35, full_output=1, + transparams=True, solver='lbfgs', maxiter=50, full_output=1, disp=5, callback=None, **kwargs): """ Fits ARIMA(p,d,q) model by exact maximum likelihood via Kalman filter. From 907122b797b4bf19ad3cf5e25686bc40c4464c51 Mon Sep 17 00:00:00 2001 From: alex Date: Tue, 29 Oct 2013 15:03:26 -0400 Subject: [PATCH 4/5] MAINT: do not tell lbfgs to approximate the gradient if we are already passing it a score function --- statsmodels/tsa/arima_model.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/statsmodels/tsa/arima_model.py b/statsmodels/tsa/arima_model.py index 6d8c8fd2a98..e902b7f9077 100644 --- a/statsmodels/tsa/arima_model.py +++ b/statsmodels/tsa/arima_model.py @@ -861,10 +861,9 @@ def fit(self, order=None, start_params=None, trend='c', method = "css-mle", kwargs.setdefault('pgtol', 1e-8) kwargs.setdefault('factr', 1e2) kwargs.setdefault('m', 12) - kwargs.setdefault('approx_grad', True) mlefit = super(ARMA, self).fit(start_params, method=solver, maxiter=maxiter, full_output=full_output, disp=disp, - callback = callback, **kwargs) + callback=callback, **kwargs) self.mlefit = mlefit params = mlefit.params From 3b14099a1969d55400d9898e5336a1eb9b1649ba Mon Sep 17 00:00:00 2001 From: alex Date: Sat, 2 Nov 2013 10:35:44 -0400 Subject: [PATCH 5/5] MAINT: restore approx_grad flag superpowers --- statsmodels/base/model.py | 19 ++++++++----------- statsmodels/tsa/arima_model.py | 1 + 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/statsmodels/base/model.py b/statsmodels/base/model.py index c7a704f0f15..b63a1bc9f12 100644 --- a/statsmodels/base/model.py +++ b/statsmodels/base/model.py @@ -499,7 +499,7 @@ def _fit_mle_bfgs(f, score, start_params, fargs, kwargs, disp=True, def _fit_mle_lbfgs(f, score, start_params, fargs, kwargs, disp=True, - maxiter=None, callback=None, retall=False, + maxiter=100, callback=None, retall=False, full_output=True, hess=None): """ Parameters @@ -517,13 +517,6 @@ def _fit_mle_lbfgs(f, score, start_params, fargs, kwargs, disp=True, """ - # The maxiter may be set at multiple points throughout statsmodels. - # In the following lines of code, we track how its value changes - # between layers. - maxiter_ = maxiter - if maxiter is None: - maxiter = 100 - # Use unconstrained optimization by default. bounds = kwargs.setdefault('bounds', [(None, None)] * len(start_params)) @@ -534,10 +527,14 @@ def _fit_mle_lbfgs(f, score, start_params, fargs, kwargs, disp=True, extra_kwargs = dict((x, kwargs[x]) for x in names if x in kwargs) # Extract values for the options related to the gradient. - approx_grad = extra_kwargs.get('approx_grad', False) + approx_grad = kwargs.get('approx_grad', False) loglike_and_score = kwargs.get('loglike_and_score', None) epsilon = kwargs.get('epsilon', None) + # The approx_grad flag has superpowers nullifying the score function arg. + if approx_grad: + score = None + # Choose among three options for dealing with the gradient (the gradient # of a log likelihood function with respect to its parameters # is more specifically called the score in statistics terminology). @@ -549,9 +546,9 @@ def _fit_mle_lbfgs(f, score, start_params, fargs, kwargs, disp=True, if epsilon and not approx_grad: raise ValueError('a finite-differences epsilon was provided ' 'even though we are not using approx_grad') - if approx_grad and (score or loglike_and_score): + if approx_grad and loglike_and_score: raise ValueError('gradient approximation was requested ' - 'even though an analytic score function was given') + 'even though an analytic loglike_and_score function was given') if loglike_and_score: func = lambda p, *a : tuple(-x for x in loglike_and_score(p, *a)) elif score: diff --git a/statsmodels/tsa/arima_model.py b/statsmodels/tsa/arima_model.py index e902b7f9077..fbf977df720 100644 --- a/statsmodels/tsa/arima_model.py +++ b/statsmodels/tsa/arima_model.py @@ -861,6 +861,7 @@ def fit(self, order=None, start_params=None, trend='c', method = "css-mle", kwargs.setdefault('pgtol', 1e-8) kwargs.setdefault('factr', 1e2) kwargs.setdefault('m', 12) + kwargs.setdefault('approx_grad', True) mlefit = super(ARMA, self).fit(start_params, method=solver, maxiter=maxiter, full_output=full_output, disp=disp, callback=callback, **kwargs)