New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
add a fitting interface for simultaneous log likelihood and score, for lbfgs, tested with MNLogit #1161
add a fitting interface for simultaneous log likelihood and score, for lbfgs, tested with MNLogit #1161
Changes from 2 commits
aa12688
0474015
81a6f52
907122b
3b14099
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -501,6 +501,21 @@ def _fit_mle_bfgs(f, score, start_params, fargs, kwargs, disp=True, | |
def _fit_mle_lbfgs(f, score, start_params, fargs, kwargs, disp=True, | ||
maxiter=None, callback=None, retall=False, | ||
full_output=True, hess=None): | ||
""" | ||
Parameters | ||
---------- | ||
f : function | ||
Returns negative log likelihood given parameters. | ||
score : function | ||
Returns gradient of negative log likelihood with respect to params. | ||
|
||
Notes | ||
----- | ||
Within the mle part of statsmodels, the log likelihood function and | ||
its gradient with respect to the parameters do not have notationally | ||
consistent sign. | ||
|
||
""" | ||
|
||
# The maxiter may be set at multiple points throughout statsmodels. | ||
# In the following lines of code, we track how its value changes | ||
|
@@ -509,31 +524,53 @@ def _fit_mle_lbfgs(f, score, start_params, fargs, kwargs, disp=True, | |
if maxiter is None: | ||
maxiter = 100 | ||
|
||
# Use unconstrained optimization by default. | ||
bounds = kwargs.setdefault('bounds', [(None, None)] * len(start_params)) | ||
|
||
# Pass the following keyword argument names through to fmin_l_bfgs_b | ||
# if they are present in kwargs, otherwise use the fmin_l_bfgs_b | ||
# default values. | ||
names = ('m', 'pgtol', 'factr', 'maxfun', 'approx_grad') | ||
names = ('m', 'pgtol', 'factr', 'maxfun', 'epsilon', 'approx_grad') | ||
extra_kwargs = dict((x, kwargs[x]) for x in names if x in kwargs) | ||
|
||
if extra_kwargs.get('approx_grad', False): | ||
score = None | ||
|
||
epsilon = kwargs.setdefault('epsilon', 1e-8) | ||
bounds = kwargs.setdefault('bounds', [(None, None)] * len(start_params)) | ||
# Extract values for the options related to the gradient. | ||
approx_grad = extra_kwargs.get('approx_grad', False) | ||
loglike_and_score = kwargs.get('loglike_and_score', None) | ||
epsilon = kwargs.get('epsilon', None) | ||
|
||
# Choose among three options for dealing with the gradient (the gradient | ||
# of a log likelihood function with respect to its parameters | ||
# is more specifically called the score in statistics terminology). | ||
# The first option is to use the finite-differences | ||
# approximation that is built into the fmin_l_bfgs_b optimizer. | ||
# The second option is to use the provided score function. | ||
# The third option is to use the score component of a provided | ||
# function that simultaneously evaluates the log likelihood and score. | ||
if epsilon and not approx_grad: | ||
raise ValueError('a finite-differences epsilon was provided ' | ||
'even though we are not using approx_grad') | ||
if approx_grad and (score or loglike_and_score): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Now we raise, which should actually happen in ARIMA which has score because of LikelihoodModel.fit and approx_grad=True. AFAICS, which is not very clear yet. |
||
raise ValueError('gradient approximation was requested ' | ||
'even though an analytic score function was given') | ||
if loglike_and_score: | ||
func = lambda p, *a : tuple(-x for x in loglike_and_score(p, *a)) | ||
elif score: | ||
func = f | ||
extra_kwargs['fprime'] = score | ||
elif approx_grad: | ||
func = f | ||
|
||
# Customize the fmin_l_bfgs_b call according to the scipy version. | ||
# Old scipy does not support maxiter and callback. | ||
scipy_version_curr = distutils.version.LooseVersion(scipy.__version__) | ||
scipy_version_12 = distutils.version.LooseVersion('0.12.0') | ||
if scipy_version_curr < scipy_version_12: | ||
retvals = optimize.fmin_l_bfgs_b(f, start_params, | ||
fprime=score, args=fargs, | ||
bounds=bounds, epsilon=epsilon, disp=disp, **extra_kwargs) | ||
retvals = optimize.fmin_l_bfgs_b(func, start_params, | ||
args=fargs, bounds=bounds, disp=disp, **extra_kwargs) | ||
else: | ||
retvals = optimize.fmin_l_bfgs_b(f, start_params, | ||
fprime=score, args=fargs, | ||
retvals = optimize.fmin_l_bfgs_b(func, start_params, | ||
maxiter=maxiter, callback=callback, | ||
bounds=bounds, epsilon=epsilon, disp=disp, **extra_kwargs) | ||
args=fargs, bounds=bounds, disp=disp, **extra_kwargs) | ||
|
||
if full_output: | ||
xopt, fopt, d = retvals | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1588,6 +1588,21 @@ def score(self, params): | |
#NOTE: might need to switch terms if params is reshaped | ||
return np.dot(firstterm.T, self.exog).flatten() | ||
|
||
def loglike_and_score(self, params): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Update: maybe not such a good idea, because your _fit_mle_lbfgs covers several cases of loglike and score Since this is mainly for optimization and doesn't affect other statistical results, I would define it in terms of the negative logliglihood in statsmodels.base.model.GenericLikelihoodModel and subclasses, I used There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is GenericLikelihoodModel even used? I've run across swaths of statsmodels code that is either commented out or has comments like 'dead code below here' related to the generic likelihood model. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. GenericLikelihoodModel is the rapid prototyping and development version. Most likelihood models grow up and then integrate into the LikelihoodModel hierarchy and don't subclass GenericLikelihoodModel. |
||
""" | ||
Returns log likelihood and score, efficiently reusing calculations. | ||
|
||
Note that both of these returned quantities will need to be negated | ||
before being minimized by the maximum likelihood fitting machinery. | ||
|
||
""" | ||
params = params.reshape(self.K, -1, order='F') | ||
cdf_dot_exog_params = self.cdf(np.dot(self.exog, params)) | ||
loglike_value = np.sum(self.wendog * np.log(cdf_dot_exog_params)) | ||
firstterm = self.wendog[:, 1:] - cdf_dot_exog_params[:, 1:] | ||
score_array = np.dot(firstterm.T, self.exog).flatten() | ||
return loglike_value, score_array | ||
|
||
def jac(self, params): | ||
""" | ||
Jacobian matrix for multinomial logit model log-likelihood | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Here we removed the score if
approx_grad