@@ -22,14 +22,14 @@

def _olsen_reparam(params):
"""
Go from true parameters to gamma and theta of Olsen
Go from true parameters to gamma and theta of Olsen.
gamma = beta/sigma
theta = 1/sigma
"""
beta, sigma = params[:-1], params[-1]
theta = 1./sigma
gamma = beta/sigma
gamma = beta*theta
return np.r_[gamma, theta]

def _reparam_olsen(params):
@@ -41,7 +41,7 @@ def _reparam_olsen(params):
"""
gamma, theta = params[:-1], params[-1]
sigma = 1./theta
beta = gamma/theta
beta = gamma*sigma
return np.r_[beta, sigma]

def dlogcdf(X):
@@ -57,14 +57,13 @@ def _null_params(model):
endog = model.endog
left = model.left
right = model.right
res = Tobit(endog, np.ones_like(endog), left=left,
right=right).fit(method='bfgs', start_params = [1.,1.],
res = Tobit2(endog, np.ones_like(endog), left=left,
right=right).fit(method='bfgs', start_params = [0.,endog.std()],
disp=0)
k_zeros = model.exog.shape[1] - 1
params = np.r_[[0] * k_zeros, res.params]
print "start params: ", params
params = _olsen_reparam(params)
params[-1] = np.log(params[-1])
#params = _olsen_reparam(params)
return params

def _olsmle_params(model):
@@ -79,13 +78,11 @@ def _olsmle_params(model):
"""
ols_res = sm.OLS(model.endog, model.exog).fit()
params = ols_res.params
#sigma = np.log(ols_res.scale ** .5)
sigma = ols_res.scale ** .5
start_params = _olsen_reparam()
theta = 1/ols_res.scale ** .5
n_goodobs = len(self._center_endog)
params = params/n_goonobs
return _olsen_reparam(np.r_[params, sigma])
#return _olsen_reparam(np.r_[params, sigma])
return np.r_[params, np.log(sigma)]

def _ols_params(model):
"""
@@ -102,7 +99,15 @@ def _ols_params(model):
_start_param_funcs = {'null' : _null_params, 'olsmle' : _olsmle_params,
'ols' : _ols_params}

class Tobit(base.LikelihoodModel):
def _exponentiate_sigma(params):
params = params.copy() # do this or it gets modified several times
params[-1] = np.exp(params[-1])
return params

##### End Helper Functions #####


class Tobit1(base.LikelihoodModel):
def __init__(self, endog, exog, left=True, right=True):
super(Tobit, self).__init__(endog, exog)
# set up censoring
@@ -145,63 +150,37 @@ def _set_loglike(self):
"""
left, right = self.left, self.right
if np.isfinite(left) and np.isinf(right): # left-censored
self.loglike = self._loglike_olsen_left
self.score = self._score_left
#self.hessian = self._hessian_left
self.loglike_ = self._loglike_olsen_left
self.score_ = self._score_left
self.hessian_ = self._hessian_left_olsen
elif np.isinf(left) and np.isfinite(right): # right-censored
self.loglike = self._loglike_right
self.loglike_ = self._loglike_right
#self.score = self._score_right
#self.hessian = self._hessian_right
else: # left and right censored
self.loglike = self._loglike_both
self.loglike_ = self._loglike_both
#self.score = self._score_both
#self.hessian = self._hessian_both

#def score(self, params):
#NOTE: it might be easier to just use the numerical differentiation
#they're almost exactly the same here
# reparam for olsen
#sigma = 1/params[-1]
#params = params[:-1]/params[-1]
#sigma = params[-1]
#if self._transparams:
# sigma = np.exp(sigma)
#params = params[:-1]

#left_exog = self._left_exog
#if left_exog.size > 0:
# scaled_left_exog = (self.left - np.dot(left_exog, params)) / sigma
# part1 = norm.pdf(scaled_left_exog)/norm.cdf(scaled_left_exog)
# dLdB_left = -np.sum(left_exog/sigma * part1[:,None], axis=0)
# dLdSigma_left = -np.sum(part1 * scaled_left_exog, axis=0)
#else:
# dLdB_left = dLdSigma_left = 0

#right_exog = self._right_exog
#if right_exog.size > 0:
# scaled_right_exog = (np.dot(right_exog,
# params) - self.right) / sigma
# part1 = norm.pdf(scaled_right_exog)/norm.cdf(scaled_right_exog)
# dLdB_right = np.sum(right_exog/sigma * part1[:,None], axis=0)
# dLdSigma_right = -np.sum(scaled_right_exog*part1, axis=0)
#else:
# dLdB_right = dLdSigma_right = 0

#endog = self._center_endog
#exog = self._center_exog
#scaled_center_exog = (endog - np.dot(exog, params))/sigma
#dLdB_center = np.sum(exog/sigma * scaled_center_exog[:,None], axis=0)
#dLdSigma_center = np.sum(scaled_center_exog**2 - 1, axis=0)
#dLdparams = dLdB_left + dLdB_right + dLdB_center
#dLdsigma = dLdSigma_left + dLdSigma_right + dLdSigma_center
#return np.r_[dLdparams, dLdsigma]
#loglike = self.loglike
#return approx_fprime(params, loglike)

@transform2(_olsen_reparam)
#implemented here, so we don't have to repeat reparams stuff
#@transform2(_olsen_reparam)
def loglike(self, params):
params = params.copy()
params[-1] = np.exp(params[-1])
params = _olsen_reparam(params)
return self.loglike_(params)

def score(self, params):
params = params.copy()
params[-1] = np.exp(params[-1])
params = _olsen_reparam(params)
return self.score_(params)


#@transform2(_olsen_reparam)
def _score_left(self, params):
return approx_fprime(params, self.loglike)
gamma, theta = params[:-1], np.exp(params[-1])
#return approx_fprime(params, self.loglike)
gamma, theta = params[:-1], params[-1]

# score for gamma
left, left_exog = self.left, self._left_exog
@@ -220,15 +199,60 @@ def _score_left(self, params):
dLdTheta = theta_center + theta_left
return np.r_[dLdGamma, dLdTheta]

@transform2(_olsen_reparam)
#@transform2(_olsen_reparam)
def hessian(self, params):
# this is the hessian for left-censored at zero
#gamma, theta = params[:-1], params[-1]
#predicted = -np.dot(self.exog, gamma)
#fF = norm.pdf(predicted)/norm.cdf(predicted)
#D = fF * (-predicted - fF)
loglike = self.loglike
return approx_hess(params, loglike, epsilon=1e-8)[0]
return approx_hess(params, loglike, epsilon=1e-4)[0]

def _hessian_left_olsen(self, params):
gamma, theta = params[:-1], params[-1]

# get set up
center_endog = self._center_endog
center_exog = self._center_exog
left, left_exog = self.left, self._left_exog
left_resid = left*theta - np.dot(left_exog, gamma)
dlncdf = dlogcdf(left_resid)[:,None]

# dLdGamma2 for uncensored
dLdGamma2_center = -np.dot(center_exog.T, center_exog)
# dLdGamma2 for censored
dLdGamma2_left = np.dot((left_exog * dlncdf * (left_resid[:,None] + dlncdf)).T,
left_exog)
# dLdGamma2
dLdGamma2 = dLdGamma2_center - dLdGamma2_left

# dLdGammadTheta for uncensored
dLdGammadTheta_center = np.dot(center_exog.T, center_endog)
# dLdGammadTheta for censored
dLdGammadTheta_left = np.dot((dlncdf * left * (left_resid[:,None] + dlncdf)).T,
left_exog)
# dLdGammadTheta
dLdGammadTheta = dLdGammadTheta_center + dLdGammadTheta_left


# dLdTheta2 for uncensored
dLdTheta2_center = -(np.dot(center_endog, center_endog) - \
len(center_endog)/theta**2)
# dLdTheta2 for censored
dLdTheta2_left = np.sum(dlncdf * left ** 2 * ( 1 - dlncdf))
dLdTheta2 = dLdTheta2_center + dLdTheta2_left

# put it all together
k = center_exog.shape[1] + 1
hess = np.zeros((k,k))
hess[:-1,:-1] = dLdGamma2
hess[-1,:-1] = dLdGammadTheta
hess[:-1,-1] = dLdGammadTheta
hess[-1,-1] = dLdTheta2
return hess



def _loglike_left(self, params, sigma):
left_exog = (self.left - np.dot(self._left_exog, params)) / sigma
@@ -246,9 +270,8 @@ def _loglike_right(self, params, sigma):
# left_like = -1e4
return right_like

@transform2(_olsen_reparam)
def _loglike_olsen_left(self, params):
gamma, theta = params[:-1], np.exp(params[-1])
gamma, theta = params[:-1], params[-1]

left, left_exog = self.left, self._left_exog
llf_left = np.sum(norm.logcdf(left*theta - np.dot(left_exog, gamma)))
@@ -259,7 +282,6 @@ def _loglike_olsen_left(self, params):
(center_endog - np.dot(center_exog, gamma))**2))
return llf_center + llf_left

@transform2(_olsen_reparam)
def _loglike_olsen_right(self, params):
gamma, theta = params[:-1], params[-1]

@@ -272,26 +294,26 @@ def _loglike_olsen_right(self, params):
llf_right = np.sum(norm.logcdf(np.dot(right_exog, gamma)-right*theta))
return llf_right + llf_center

def loglike_(self, params): #NOTE: these needs sigma as last parameter
sigma = params[-1]
if self._transparams:
sigma = np.exp(sigma)
params = params[:-1]
#def loglike_(self, params): #NOTE: these needs sigma as last parameter
# sigma = params[-1]
# if self._transparams:
# sigma = np.exp(sigma)
# params = params[:-1]

left_like = 0
if self._left_exog.size:
left_like = self._loglike_left(params, sigma)
# left_like = 0
# if self._left_exog.size:
# left_like = self._loglike_left(params, sigma)

right_like = 0
if self._right_exog.size:
right_like = self._loglike_right(params, sigma)
# right_like = 0
# if self._right_exog.size:
# right_like = self._loglike_right(params, sigma)

center_endog = self._center_endog
center_exog = self._center_exog
center_like = np.sum(norm.logpdf((center_endog - np.dot(center_exog,
params)) / sigma) - np.log(sigma))
llf = left_like + right_like + center_like
return llf
# center_endog = self._center_endog
# center_exog = self._center_exog
# center_like = np.sum(norm.logpdf((center_endog - np.dot(center_exog,
# params)) / sigma) - np.log(sigma))
# llf = left_like + right_like + center_like
# return llf

def _get_start_params(self, method='null'):
"""
@@ -306,7 +328,7 @@ def _get_start_params(self, method='null'):
"""
return _start_param_funcs[method](self)

@unset_transform # likelihood expects not to transform
#@unset_transform # likelihood expects not to transform
def fit(self, start_params = None, method='newton', **kwargs):
"""
Notes
@@ -320,14 +342,198 @@ def fit(self, start_params = None, method='newton', **kwargs):
method=method, **kwargs)
return TobitResults(self, mlefit)

class Tobit2(base.LikelihoodModel):
def __init__(self, endog, exog, left=True, right=True):
super(Tobit2, self).__init__(endog, exog)
self._transparams = False
# set up censoring
self._init_censored(left, right)
self._set_loglike()

def _init_censored(self, left, right):
if left is False and right is False:
raise ValueError("Must give a censoring level on the right or "
"left.")
endog = self.endog
exog = self.exog
if left is True:
left = self.endog.min()
elif left is False:
left = -np.inf
if right is True:
right = self.endog.max()
elif right is False:
right = np.inf
self.left, self.right = left, right

# do this all here, so that we only have to do the indexing once
left_idx = endog <= left # do we need to attach these?
right_idx = endog >= right
self._left_endog = endog[left_idx]
self.n_lcens = left_idx.sum()
self.n_rcens = right_idx.sum()
self._right_endog = endog[right_idx]
self._left_exog = exog[left_idx]
self._right_exog = exog[right_idx]
center_idx = ~np.logical_or(left_idx, right_idx)
self._center_endog = endog[center_idx]
self._center_exog = exog[center_idx]

def _set_loglike(self):
"""
Use this so we don't have a lot of if checks in each loglike call
"""
left, right = self.left, self.right
if np.isfinite(left) and np.isinf(right): # left-censored
self._loglike = lambda params,sigma : self._loglike_left(params,
sigma) + \
self._loglike_right(params,
sigma)
self._score = self._score_left
#self.hessian_ = self._hessian_left
elif np.isinf(left) and np.isfinite(right): # right-censored
self._loglike = self._loglike_right
#self._score = self._score_right
#self.hessian = self._hessian_right
else: # left and right censored
self._loglike = self._loglike_both
#self._score = self._score_both
#self.hessian = self._hessian_both

@transform2(_exponentiate_sigma) # keeps std. dev. positive
def loglike(self, params):
"""
needs sigma as last parameter
"""
params, sigma = params[:-1], params[-1]
return self._loglike(params, sigma)

@transform2(_exponentiate_sigma)
def score(self, params):
params, sigma = params[:-1], params[-1]
return self._score(params, sigma)

def hessian(self, params):
# this is the hessian for left-censored at zero
loglike = self.loglike
return approx_hess(params, loglike, epsilon=1e-4)[0]

def _get_score_left(self, params, sigma):
left_exog = self._left_exog
resid_left = (self.left - np.dot(left_exog, params)) / sigma
dlncdf = dlogcdf(resid_left)
dLdB_left = -np.sum(left_exog/sigma * dlncdf[:,None], axis=0)
dLdSigma_left = -np.sum(dlncdf * resid_left, axis=0)
score_l = np.r_[dLdB_left, dLdSigma_left]
return score_l

def _get_loglike_left(self, params, sigma):
left_exog = (self.left - np.dot(self._left_exog, params)) / sigma
left_like = np.sum(norm.logcdf(left_exog))
# can get overflow from the above if cdf is very small
# seems to mean bad starting values, but might need a check
#if np.isinf(left_like) and left_like < 0:
# left_like = -1e4
return left_like

def _score_left(self, params, sigma):
score_l = self._get_score_left(params, sigma)
score_c = self._get_score_center(params, sigma)
return score_l + score_c

def _loglike_left(self, params, sigma):
llf_l = self._get_loglike_left(params, sigma)
llf_r = self._get_loglike_right(params, sigma)
return llf_l + llf_r

def _get_score_right(self, params, sigma):
right_resid = (np.dot(right_exog, params) - self.right) / sigma
dlncdf = dlogcdf(right_resid)
dLdB_right = np.sum(right_exog/sigma * dlncdf[:,None], axis=0)
dLdSigma_right = -np.sum(right_resid*dlncdf, axis=0)
return np.r_[dLdB_right, dLdSigma_right]

def _get_loglike_right(self, params, sigma):
right_exog = (np.dot(self._right_exog, params) - self.right) / sigma
right_like = np.sum(norm.logcdf(right_exog))
#if np.isinf(right_like) and left_like < 0:
# left_like = -1e4
return right_like

def _score_right(self, params, sigma):
score_r = self._get_score_right(params, sigma)
score_c = self._get_score_center(params, sigma)
return score_r + score_c

def _loglike_right(self, params, sigma):
llf_r = self._get_loglike_right(params, sigma)
llf_c = self._get_loglike_center(params, sigma)
return llf_r + llf_c

def _get_score_center(self, params, sigma):
endog = self._center_endog
exog = self._center_exog
scaled_center_exog = (endog - np.dot(exog, params))/sigma
dLdB_center = np.sum(exog/sigma * scaled_center_exog[:,None], axis=0)
dLdSigma_center = np.sum(scaled_center_exog**2 - 1, axis=0)
return np.r_[dLdB_center, dLdSigma_center]

def _get_loglike_center(self, params, sigma):
center_endog = self._center_endog
center_exog = self._center_exog
center_like = np.sum(norm.logpdf((center_endog - np.dot(center_exog,
params)) / sigma) - np.log(sigma))
return center_like

def _score_both(self, params, sigma):
score_l = self._get_score_left(params, sigma)
score_r = self._get_score_right(params, sigma)
score_c = self._get_score_center(params, sigma)
return score_l + score_r + score_c

def _loglike_both(self, params, sigma):
llf_l = self._get_loglike_left(params, sigma)
llf_r = self._get_loglike_right(params, sigma)
llf_c = self._get_loglike_center(params, sigma)
return llf_l + llf_r + llf_c

def _get_start_params(self, method='null'):
"""
Get starting parameters from an OLS estimation
Parameters
----------
method : str {'null', 'olsmle', 'ols'}
The method to use to get the starting values. It is recommended
to use the default. See the private functions _null_params,
_olsmle_params, and _ols_params for more information.
"""
return _start_param_funcs[method](self)

@set_transform # likelihood expects not to transform
def fit(self, start_params = None, method='newton', **kwargs):
"""
Notes
-----
It is not recommended to change the default solver or starting
parameters. Please report performance issues to the developers.
"""
if start_params is None:
start_params = self._get_start_params()
else:
start_params = np.copy(start_params)
mlefit = super(Tobit2, self).fit(start_params=start_params,
method=method, **kwargs)
return TobitResults(self, mlefit)

class TobitResults(base.LikelihoodModelResults):
def __init__(self, model, mlefit):
self.model = model
params = mlefit.params
# re-parameterize from exponential
params[-1] = np.exp(params[-1])
# re-parameterize from Olsen
params = _reparam_olsen(params)
#params = _reparam_olsen(params)
beta, sigma = params[:-1], params[-1]
self.model_params = beta
mlefit.params = np.r_[beta, sigma]
@@ -458,18 +664,19 @@ def webuse(data, baseurl='http://www.stata-press.com/data/r11/'):

# ok
print 'Newton'
mod1 = Tobit(endog, exog, left=0, right=False).fit(method='newton')
mod1 = Tobit2(endog, exog, left=0, right=False).fit(method='newton',
retall=1)

# nooooooooo
print 'NM'
mod2 = Tobit(endog, exog, left=0, right=False).fit(method='nm',
maxiter=1500)
print 'NM' # this doesn't work well,
mod2 = Tobit2(endog, exog, left=0, right=False).fit(method='nm',
maxiter=10000, maxfun=10000, ftol=1e-12, xtol=1e-12)
print 'BFGS'
mod3 = Tobit(endog, exog, left=0, right=False).fit(method='bfgs')
mod3 = Tobit2(endog, exog, left=0, right=False).fit(method='bfgs')
print 'Powell'
mod4 = Tobit(endog, exog, left=0, right=False).fit(method='powell')
mod4 = Tobit2(endog, exog, left=0, right=False).fit(method='powell',
xtol=1e-8, ftol=1e-8)
print 'CG'
mod5 = Tobit(endog, exog, left=0, right=False).fit(method='cg',
mod5 = Tobit2(endog, exog, left=0, right=False).fit(method='cg',
maxiter=1000)
print 'NCG'
mod6 = Tobit(endog, exog, left=0, right=False).fit(method='ncg')
mod6 = Tobit2(endog, exog, left=0, right=False).fit(method='ncg')