New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes release #316

Merged
merged 5 commits into from Jun 18, 2012
View
@@ -1,6 +1,29 @@
Release History
===============
0.4.1
-----
This is a backwards compatible (according to our test suite) release with
bug fixes and code cleanup.
*Bug Fixes*
* build and distribution fixes
* lowess correct distance calculation
* genmod correction CDFlink derivative
* adfuller _autolag correct calculation of optimal lag
* het_arch, het_lm : fix autolag and store options
* GLSAR: incorrect whitening for lag>1
*Other Changes*
* add lowess and other functions to api and documentation
* rename lowess module (old import path will be removed at next release)
* new robust sandwich covariance estimators, moved out of sandbox
* compatibility with pandas 0.8
0.4.0
-----
@@ -0,0 +1,15 @@
'''temporary compatibility module
TODO: remove in 0.5.0
'''
from statsmodels.stats.sandwich_covariance import *
#from statsmodels.stats.moment_helpers import se_cov
#not in __all__
def cov_hac_simple(results, nlags=None, weights_func=weights_bartlett,
use_correction=True):
c = cov_hac(results, nlags=nlags, weights_func=weights_func,
use_correction=use_correction)
return c, se_cov(c)
@@ -301,7 +301,7 @@ def acorr_ljungbox(x, lags=None, boxpierce=False):
pvalbp = stats.chi2.sf(qboxpierce, lags)
return qljungbox, pval, qboxpierce, pvalbp
def acorr_lm(x, maxlag=None, autolag='AIC', store=False):
def acorr_lm(x, maxlag=None, autolag='AIC', store=False, regresults=False):
'''Lagrange Multiplier tests for autocorrelation
This is a generic Lagrange Multiplier test for autocorrelation. I don't
@@ -343,11 +343,14 @@ def acorr_lm(x, maxlag=None, autolag='AIC', store=False):
'''
if regresults:
store = True
x = np.asarray(x)
nobs = x.shape[0]
if maxlag is None:
#for adf from Greene referencing Schwert 1989
maxlag = 12. * np.power(nobs/100., 1/4.)#nobs//4 #TODO: check default, or do AIC/BIC
maxlag = int(ceil(12. * np.power(nobs/100., 1/4.)))#nobs//4 #TODO: check default, or do AIC/BIC
xdiff = np.diff(x)
@@ -363,22 +366,24 @@ def acorr_lm(x, maxlag=None, autolag='AIC', store=False):
#search for lag length with highest information criteria
#Note: I use the same number of observations to have comparable IC
results = {}
for mlag in range(1,maxlag):
for mlag in range(1, maxlag+1):
results[mlag] = OLS(xshort, xdall[:,:mlag+1]).fit()
if autolag.lower() == 'aic':
bestic, icbestlag = max((v.aic,k) for k,v in results.iteritems())
bestic, icbestlag = min((v.aic,k) for k,v in results.iteritems())
elif autolag.lower() == 'bic':
icbest, icbestlag = max((v.bic,k) for k,v in results.iteritems())
icbest, icbestlag = min((v.bic,k) for k,v in results.iteritems())
else:
raise ValueError("autolag can only be None, 'AIC' or 'BIC'")
#rerun ols with best ic
xdall = lagmat(x[:,None], icbestlag, trim='forward')
xdall = lagmat(x[:,None], icbestlag, trim='both')
nobs = xdall.shape[0]
xdall = np.c_[np.ones((nobs,1)), xdall]
xshort = x[-nobs:]
usedlag = icbestlag
if regresults:
resstore.results = results
else:
usedlag = maxlag
@@ -393,11 +398,12 @@ def acorr_lm(x, maxlag=None, autolag='AIC', store=False):
if store:
resstore.resols = resols
resstore.usedlag = usedlag
return fval, fpval, lm, lmpval, resstore
return lm, lmpval, fval, fpval, resstore
else:
return lm, lmpval, fval, fpval
def het_arch(resid, maxlag=None, autolag=None, store=False, ddof=0):
def het_arch(resid, maxlag=None, autolag=None, store=False, regresults=False,
ddof=0):
'''Enlge's Test for Autoregressive Conditional Heteroscedasticity (ARCH)
Parameters
@@ -438,7 +444,8 @@ def het_arch(resid, maxlag=None, autolag=None, store=False, ddof=0):
'''
return acorr_lm(resid**2, maxlag=maxlag, autolag=autolag, store=False)
return acorr_lm(resid**2, maxlag=maxlag, autolag=autolag, store=store,
regresults=regresults)
def acorr_breush_godfrey(results, nlags=None, store=False):
@@ -110,7 +110,7 @@
__all__ = ['cov_cluster', 'cov_cluster_2groups', 'cov_hac', 'cov_nw_panel',
'cov_white_simple',
'cov_hc0', 'cov_hc1', 'cov_hc2', 'cov_hc3',
'se_cov']
'se_cov', 'weights_bartlett', 'weights_uniform']
@@ -231,13 +231,14 @@ def test_het_breush_pagan(self):
def test_het_white(self):
res = self.res
#TODO: regressiontest compare with Greene or Gretl or Stata
#TODO: regressiontest, compare with Greene or Gretl or Stata
hw = smsdia.het_white(res.resid, res.model.exog)
hw_values = (33.503722896538441, 2.9887960597830259e-06,
7.7945101228430946, 1.0354575277704231e-06)
assert_almost_equal(hw, hw_values)
def test_het_arch(self):
#test het_arch and indirectly het_lm against R
#> library(FinTS)
#> at = ArchTest(residuals(fm), lags=4)
#> mkhtest(at, 'archtest_4', 'chi2')
@@ -256,6 +257,24 @@ def test_het_arch(self):
compare_t_est(at4[:2], archtest_4, decimal=(12, 13))
compare_t_est(at12[:2], archtest_12, decimal=(12, 13))
def test_het_arch2(self):
#test autolag options, this also test het_lm
#unfortunately optimal lag=1 for this data
resid = self.res.resid
res1 = smsdia.het_arch(resid, maxlag=1, autolag=None, store=True)
rs1 = res1[-1]
res2 = smsdia.het_arch(resid, maxlag=5, autolag='aic', store=True)
rs2 = res2[-1]
assert_almost_equal(rs2.resols.params, rs1.resols.params, decimal=13)
assert_almost_equal(res2[:4], res1[:4], decimal=13)
#test that smallest lag, maxlag=1 works
res3 = smsdia.het_arch(resid, maxlag=1, autolag='aic')
assert_almost_equal(res3[:4], res1[:4], decimal=13)
def test_acorr_breush_godfrey(self):
res = self.res
@@ -195,7 +195,7 @@ def adfuller(x, maxlag=None, regression="c", autolag='AIC',
if maxlag is None:
#from Greene referencing Schwert 1989
maxlag = int(round(12. * np.power(nobs/100., 1/4.)))
maxlag = int(ceil(12. * np.power(nobs/100., 1/4.)))
xdiff = np.diff(x)
xdall = lagmat(xdiff[:,None], maxlag, trim='both', original='in')
@@ -693,10 +693,10 @@ def levinson_durbin(s, nlags=10, isacov=False):
def grangercausalitytests(x, maxlag, addconst=True, verbose=True):
'''four tests for granger causality of 2 timeseries
'''four tests for granger non causality of 2 timeseries
all four tests give similar results
`params_ftest` and `ssr_ftest` are equivalent based of F test which is
`params_ftest` and `ssr_ftest` are equivalent based on F test which is
identical to lmtest:grangertest in R
Parameters
@@ -724,15 +724,24 @@ def grangercausalitytests(x, maxlag, addconst=True, verbose=True):
TODO: convert to class and attach results properly
The Null hypothesis for grangercausalitytests is that the time series in
the second column, x2, Granger causes the time series in the first column,
x1. This means that past values of x2 have a statistically significant
effect on the current value of x1, taking also past values of x1 into
account, as regressors. We reject the null hypothesis of x2 Granger
causing x1 if the pvalues are below a desired size of the test.
the second column, x2, does NOT Granger cause the time series in the first
column, x1. Grange causality means that past values of x2 have a
statistically significant effect on the current value of x1, taking past
values of x1 into account as regressors. We reject the null hypothesis
that x2 does not Granger cause x1 if the pvalues are below a desired size
of the test.
'params_ftest', 'ssr_ftest' are based on F test
The null hypothesis for all four test is that the coefficients
corresponding to past values of the second time series are zero.
'ssr_chi2test', 'lrtest' are based on chi-square test
'params_ftest', 'ssr_ftest' are based on F distribution
'ssr_chi2test', 'lrtest' are based on chi-square distribution
References
----------
http://en.wikipedia.org/wiki/Granger_causality
Greene: Econometric Analysis
'''
from scipy import stats # lazy import
ProTip! Use n and p to navigate between commits in a pull request.