Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add a fitting interface for simultaneous log likelihood and score, for lbfgs, tested with MNLogit #1161

Merged
merged 5 commits into from Nov 2, 2013
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
61 changes: 49 additions & 12 deletions statsmodels/base/model.py
Expand Up @@ -501,6 +501,21 @@ def _fit_mle_bfgs(f, score, start_params, fargs, kwargs, disp=True,
def _fit_mle_lbfgs(f, score, start_params, fargs, kwargs, disp=True,
maxiter=None, callback=None, retall=False,
full_output=True, hess=None):
"""
Parameters
----------
f : function
Returns negative log likelihood given parameters.
score : function
Returns gradient of negative log likelihood with respect to params.

Notes
-----
Within the mle part of statsmodels, the log likelihood function and
its gradient with respect to the parameters do not have notationally
consistent sign.

"""

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here we removed the score if approx_grad

# The maxiter may be set at multiple points throughout statsmodels.
# In the following lines of code, we track how its value changes
Expand All @@ -509,31 +524,53 @@ def _fit_mle_lbfgs(f, score, start_params, fargs, kwargs, disp=True,
if maxiter is None:
maxiter = 100

# Use unconstrained optimization by default.
bounds = kwargs.setdefault('bounds', [(None, None)] * len(start_params))

# Pass the following keyword argument names through to fmin_l_bfgs_b
# if they are present in kwargs, otherwise use the fmin_l_bfgs_b
# default values.
names = ('m', 'pgtol', 'factr', 'maxfun', 'approx_grad')
names = ('m', 'pgtol', 'factr', 'maxfun', 'epsilon', 'approx_grad')
extra_kwargs = dict((x, kwargs[x]) for x in names if x in kwargs)

if extra_kwargs.get('approx_grad', False):
score = None

epsilon = kwargs.setdefault('epsilon', 1e-8)
bounds = kwargs.setdefault('bounds', [(None, None)] * len(start_params))
# Extract values for the options related to the gradient.
approx_grad = extra_kwargs.get('approx_grad', False)
loglike_and_score = kwargs.get('loglike_and_score', None)
epsilon = kwargs.get('epsilon', None)

# Choose among three options for dealing with the gradient (the gradient
# of a log likelihood function with respect to its parameters
# is more specifically called the score in statistics terminology).
# The first option is to use the finite-differences
# approximation that is built into the fmin_l_bfgs_b optimizer.
# The second option is to use the provided score function.
# The third option is to use the score component of a provided
# function that simultaneously evaluates the log likelihood and score.
if epsilon and not approx_grad:
raise ValueError('a finite-differences epsilon was provided '
'even though we are not using approx_grad')
if approx_grad and (score or loglike_and_score):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now we raise, which should actually happen in ARIMA which has score because of LikelihoodModel.fit and approx_grad=True.

AFAICS, which is not very clear yet.

raise ValueError('gradient approximation was requested '
'even though an analytic score function was given')
if loglike_and_score:
func = lambda p, *a : tuple(-x for x in loglike_and_score(p, *a))
elif score:
func = f
extra_kwargs['fprime'] = score
elif approx_grad:
func = f

# Customize the fmin_l_bfgs_b call according to the scipy version.
# Old scipy does not support maxiter and callback.
scipy_version_curr = distutils.version.LooseVersion(scipy.__version__)
scipy_version_12 = distutils.version.LooseVersion('0.12.0')
if scipy_version_curr < scipy_version_12:
retvals = optimize.fmin_l_bfgs_b(f, start_params,
fprime=score, args=fargs,
bounds=bounds, epsilon=epsilon, disp=disp, **extra_kwargs)
retvals = optimize.fmin_l_bfgs_b(func, start_params,
args=fargs, bounds=bounds, disp=disp, **extra_kwargs)
else:
retvals = optimize.fmin_l_bfgs_b(f, start_params,
fprime=score, args=fargs,
retvals = optimize.fmin_l_bfgs_b(func, start_params,
maxiter=maxiter, callback=callback,
bounds=bounds, epsilon=epsilon, disp=disp, **extra_kwargs)
args=fargs, bounds=bounds, disp=disp, **extra_kwargs)

if full_output:
xopt, fopt, d = retvals
Expand Down
15 changes: 15 additions & 0 deletions statsmodels/discrete/discrete_model.py
Expand Up @@ -1588,6 +1588,21 @@ def score(self, params):
#NOTE: might need to switch terms if params is reshaped
return np.dot(firstterm.T, self.exog).flatten()

def loglike_and_score(self, params):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Update: maybe not such a good idea, because your _fit_mle_lbfgs covers several cases of loglike and score

Since this is mainly for optimization and doesn't affect other statistical results, I would define it in terms of the negative logliglihood nloglike_and_score avoids the negation.

in statsmodels.base.model.GenericLikelihoodModel and subclasses, I used nloglike

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is GenericLikelihoodModel even used? I've run across swaths of statsmodels code that is either commented out or has comments like 'dead code below here' related to the generic likelihood model.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

GenericLikelihoodModel is the rapid prototyping and development version. Most likelihood models grow up and then integrate into the LikelihoodModel hierarchy and don't subclass GenericLikelihoodModel.
GenericLikelihoodModel has more preconfigured or more general defaults than LikelihoodModel.

"""
Returns log likelihood and score, efficiently reusing calculations.

Note that both of these returned quantities will need to be negated
before being minimized by the maximum likelihood fitting machinery.

"""
params = params.reshape(self.K, -1, order='F')
cdf_dot_exog_params = self.cdf(np.dot(self.exog, params))
loglike_value = np.sum(self.wendog * np.log(cdf_dot_exog_params))
firstterm = self.wendog[:, 1:] - cdf_dot_exog_params[:, 1:]
score_array = np.dot(firstterm.T, self.exog).flatten()
return loglike_value, score_array

def jac(self, params):
"""
Jacobian matrix for multinomial logit model log-likelihood
Expand Down
47 changes: 35 additions & 12 deletions statsmodels/discrete/tests/test_discrete.py
Expand Up @@ -1062,18 +1062,7 @@ def test_bse(self):
test_jac = no_info


class TestMNLogitNewtonBaseZero(CheckModelResults):
@classmethod
def setupClass(cls):
from results.results_discrete import Anes
data = sm.datasets.anes96.load()
cls.data = data
exog = data.exog
exog = sm.add_constant(exog, prepend=False)
cls.res1 = MNLogit(data.endog, exog).fit(method="newton", disp=0)
res2 = Anes()
res2.mnlogit_basezero()
cls.res2 = res2
class CheckMNLogitBaseZero(CheckModelResults):

def test_margeff_overall(self):
me = self.res1.get_margeff()
Expand Down Expand Up @@ -1178,6 +1167,40 @@ def test_pred_table(self):
def test_resid(self):
assert_array_equal(self.res1.resid_misclassified, self.res2.resid)


class TestMNLogitNewtonBaseZero(CheckMNLogitBaseZero):
@classmethod
def setupClass(cls):
from results.results_discrete import Anes
data = sm.datasets.anes96.load()
cls.data = data
exog = data.exog
exog = sm.add_constant(exog, prepend=False)
cls.res1 = MNLogit(data.endog, exog).fit(method="newton", disp=0)
res2 = Anes()
res2.mnlogit_basezero()
cls.res2 = res2

class TestMNLogitLBFGSBaseZero(CheckMNLogitBaseZero):
@classmethod
def setupClass(cls):
from results.results_discrete import Anes
data = sm.datasets.anes96.load()
cls.data = data
exog = data.exog
exog = sm.add_constant(exog, prepend=False)
mymodel = MNLogit(data.endog, exog)
cls.res1 = mymodel.fit(method="lbfgs", disp=0, maxiter=50000,
#m=12, pgtol=1e-7, factr=1e3, # 5 failures
#m=20, pgtol=1e-8, factr=1e2, # 3 failures
#m=30, pgtol=1e-9, factr=1e1, # 1 failure
m=40, pgtol=1e-10, factr=5e0,
loglike_and_score=mymodel.loglike_and_score)
res2 = Anes()
res2.mnlogit_basezero()
cls.res2 = res2


def test_perfect_prediction():
cur_dir = os.path.dirname(os.path.abspath(__file__))
iris_dir = os.path.join(cur_dir, '..', '..', 'genmod', 'tests', 'results')
Expand Down