Skip to content

Commit

Permalink
Merge c9d1b20 into 831e2ad
Browse files Browse the repository at this point in the history
  • Loading branch information
bashtage committed Nov 7, 2013
2 parents 831e2ad + c9d1b20 commit 635172a
Show file tree
Hide file tree
Showing 2 changed files with 144 additions and 51 deletions.
94 changes: 57 additions & 37 deletions statsmodels/regression/linear_model.py
@@ -1,10 +1,10 @@
"""
This module implements some standard regression models:
This module implements standard regression models:
Generalized Least Squares (GLS),
Ordinary Least Squares (OLS),
and Weighted Least Squares (WLS),
as well as an GLS model with autoregressive error terms GLSAR(p)
Generalized Least Squares (GLS)
Ordinary Least Squares (OLS)
Weighted Least Squares (WLS)
Generalized Least Squares with autoregressive error terms GLSAR(p)
Models are specified with an endogenous response variable and an
exogenous design matrix and are fit using their `fit` method.
Expand Down Expand Up @@ -46,9 +46,10 @@

def _get_sigma(sigma, nobs):
"""
Returns sigma for GLS and the inverse of its Cholesky decomposition.
Handles dimensions and checks integrity. If sigma is None, returns
None, None. Otherwise returns sigma, cholsigmainv.
Returns sigma (matrix, nobs by nobs) for GLS and the inverse of its
Cholesky decomposition. Handles dimensions and checks integrity.
If sigma is None, returns None, None. Otherwise returns sigma,
cholsigmainv.
"""
if sigma is None:
return None, None
Expand All @@ -58,13 +59,13 @@ def _get_sigma(sigma, nobs):
if sigma.ndim == 1:
if sigma.shape != (nobs,):
raise ValueError("Sigma must be a scalar, 1d of length %s or a 2d "
"array of shape %s x %s" % (nobs, nobs))
"array of shape %s x %s" % (nobs, nobs, nobs))
cholsigmainv = np.diag(1/sigma**.5)
sigma = np.diag(sigma)
else:
if sigma.shape != (nobs, nobs):
raise ValueError("Sigma must be a scalar, 1d of length %s or a 2d "
"array of shape %s x %s" % (nobs, nobs))
"array of shape %s x %s" % (nobs, nobs, nobs))
cholsigmainv = np.linalg.cholesky(np.linalg.pinv(sigma)).T

return sigma, cholsigmainv
Expand Down Expand Up @@ -92,6 +93,10 @@ def initialize(self):

@property
def df_model(self):
"""
The model degree of freedom, defined as the rank of the regressor
matrix minus 1 if a constant is included.
"""
if self._df_model is None:
if self.rank is None:
self.rank = rank(self.exog)
Expand All @@ -104,6 +109,11 @@ def df_model(self, value):

@property
def df_resid(self):
"""
The residual degree of freedom, defined as the number of observations
minus the rank of the regressor matrix.
"""

if self._df_resid is None:
if self.rank is None:
self.rank = rank(self.exog)
Expand Down Expand Up @@ -158,8 +168,9 @@ def fit(self, method="pinv", **kwargs):

elif method == "qr":
if ((not hasattr(self, 'exog_Q')) or
(not hasattr(self, 'exog_R')) or
(not hasattr(self, 'normalized_cov_params')) or
(not hasattr(self, 'rank'))):
(getattr(self, 'rank', None) is None)):
Q, R = np.linalg.qr(self.wexog)
self.exog_Q, self.exog_R = Q, R
self.normalized_cov_params = np.linalg.inv(np.dot(R.T, R))
Expand Down Expand Up @@ -289,7 +300,7 @@ class GLS(RegressionModel):

def __init__(self, endog, exog, sigma=None, missing='none', hasconst=None):
#TODO: add options igls, for iterative fgls if sigma is None
#TODO: default is sigma is none should be two-step GLS
#TODO: default if sigma is none should be two-step GLS
sigma, cholsigmainv = _get_sigma(sigma, len(endog))
super(GLS, self).__init__(endog, exog, missing=missing,
hasconst=hasconst, sigma=sigma,
Expand Down Expand Up @@ -324,9 +335,9 @@ def whiten(self, X):

def loglike(self, params):
"""
Returns the value of the gaussian loglikelihood function at params.
Returns the value of the Gaussian log-likelihood function at params.
Given the whitened design matrix, the loglikelihood is evaluated
Given the whitened design matrix, the log-likelihood is evaluated
at the parameter vector `params` for the dependent variable `endog`.
Parameters
Expand All @@ -337,14 +348,14 @@ def loglike(self, params):
Returns
-------
loglike : float
The value of the loglikelihood function for a GLS Model.
The value of the log-likelihood function for a GLS Model.
Notes
-----
The loglikelihood function for the normal distribution is
The log-likelihood function for the normal distribution is
.. math:: -\\frac{n}{2}\\log\\left(Y-\\hat{Y}\\right)-\\frac{n}{2}\\left(1+\\log\\left(\\frac{2\\pi}{n}\\right)\\right)-\\frac{1}{2}\\log\\left(\\left|\\Sigma\\right|\\right)
.. math:: -\\frac{n}{2}\\log\\left(\\left(Y-\\hat{Y}\\right)^{\\prime}\\left(Y-\\hat{Y}\\right)\\right)-\\frac{n}{2}\\left(1+\\log\\left(\\frac{2\\pi}{n}\\right)\\right)-\\frac{1}{2}\\log\\left(\\left|\\Sigma\\right|\\right)
Y and Y-hat are whitened.
Expand All @@ -356,7 +367,8 @@ def loglike(self, params):
llf -= (1+np.log(np.pi/nobs2))*nobs2 # with likelihood constant
if np.any(self.sigma) and self.sigma.ndim == 2:
#FIXME: robust-enough check? unneeded if _det_sigma gets defined
llf -= .5*np.log(np.linalg.det(self.sigma))
det = np.linalg.slogdet(self.sigma)
llf -= .5*det[1]
# with error covariance matrix
return llf

Expand Down Expand Up @@ -402,7 +414,7 @@ class WLS(RegressionModel):
Notes
-----
If the weights are a function of the data, then the postestimation
If the weights are a function of the data, then the post estimation
statistics such as fvalue and mse_model might not be correct, as the
package does not yet support no-constant regression.
""" % {'params' : base._model_params_doc,
Expand All @@ -421,7 +433,7 @@ def __init__(self, endog, exog, weights=1., missing='none', hasconst=None):
weights=weights, hasconst=hasconst)
nobs = self.exog.shape[0]
weights = self.weights
if len(weights) != nobs and weights.size == nobs:
if weights.size != nobs and weights.shape[0] != nobs:
raise ValueError('Weights must be scalar or same length as design')

def whiten(self, X):
Expand All @@ -446,9 +458,9 @@ def whiten(self, X):

def loglike(self, params):
"""
Returns the value of the gaussian loglikelihood function at params.
Returns the value of the gaussian log-likelihood function at params.
Given the whitened design matrix, the loglikelihood is evaluated
Given the whitened design matrix, the log-likelihood is evaluated
at the parameter vector `params` for the dependent variable `Y`.
Parameters
Expand All @@ -458,7 +470,7 @@ def loglike(self, params):
Returns
-------
The value of the loglikelihood function for a WLS Model.
The value of the log-likelihood function for a WLS Model.
Notes
--------
Expand Down Expand Up @@ -519,19 +531,19 @@ def __init__(self, endog, exog=None, missing='none', hasconst=None):
hasconst=hasconst)

def loglike(self, params):
'''
"""
The likelihood function for the clasical OLS model.
Parameters
----------
params : array-like
The coefficients with which to estimate the loglikelihood.
The coefficients with which to estimate the log-likelihood.
Returns
-------
The concentrated likelihood function evaluated at params.
'''
nobs2 = self.nobs/2.
"""
nobs2 = self.nobs / 2.0
return -nobs2*np.log(2*np.pi)-nobs2*np.log(1/(2*nobs2) *\
np.dot(np.transpose(self.endog -
np.dot(self.exog, params)),
Expand Down Expand Up @@ -1125,7 +1137,7 @@ def HC3_se(self):
#TODO: this needs a test
def norm_resid(self):
"""
Residuals, normalized to have unit length and unit variance.
Residuals, normalized to have unit variance.
Returns
-------
Expand All @@ -1135,10 +1147,16 @@ def norm_resid(self):
-----
This method is untested
"""

if not hasattr(self, 'resid'):
raise ValueError('need normalized residuals to estimate standard '
'deviation')
return self.wresid * recipr(np.sqrt(self.scale))
raise ValueError('Method requires residuals.')
if np.all(self.wresid <= 0.0):
# This is a very exact check, does not account for numerical error
from warnings import warn
warn("All residuals are 0, cannot compute normed residuals.")
return self.wresid
else:
return self.wresid / np.sqrt(np.mean(self.wresid ** 2))

def compare_f_test(self, restricted):
'''use F test to test whether restricted model is correct
Expand Down Expand Up @@ -1507,13 +1525,14 @@ def el_test(self, b0_vals, param_nums, return_weights=0,
ret_params=0, method='nm',
stochastic_exog=1, return_params=0):
"""
Tests single or joint hypotheses of the regression parameters.
Tests single or joint hypotheses of the regression parameters using
Empirical Likelihood.
Parameters
----------
b0_vals : 1darray
The hypthesized value of the parameter to be tested
The hypothesized value of the parameter to be tested
param_nums : 1darray
The parameter number to be tested
Expand Down Expand Up @@ -1542,7 +1561,7 @@ def el_test(self, b0_vals, param_nums, return_weights=0,
-------
res : tuple
The p-value and -2 times the log likelihood ratio for the
The p-value and -2 times the log-likelihood ratio for the
hypothesized values.
Examples
Expand Down Expand Up @@ -1603,18 +1622,19 @@ def conf_int_el(self, param_num, sig=.05, upper_bound=None, lower_bound=None,
method='nm', stochastic_exog=1):
"""
Computes the confidence interval for the parameter given by param_num
using Empirical Likelihood
Parameters
----------
param_num : float
The parameter thats confidence interval is desired
The parameter for which the confidence interval is desired
sig : float
The significance level. Default is .05
upper_bound : float
Tha mximum value the upper limit can be. Default is the
The maximum value the upper limit can be. Default is the
99.9% confidence value under OLS assumptions.
lower_bound : float
Expand Down Expand Up @@ -1652,7 +1672,7 @@ def conf_int_el(self, param_num, sig=.05, upper_bound=None, lower_bound=None,
For alpha=.05, the value is 3.841459.
To ensure optimization terminated successfully, it is suggested to
do test_beta([lower_limit], [param_num])
do el_test([lower_limit], [param_num])
If the optimization does not terminate successfully, consider switching
optimization algorithms.
Expand Down

0 comments on commit 635172a

Please sign in to comment.