Skip to content

Commit

Permalink
ENH: Added condno and eigenvalue caches to OLS and Quantile regression
Browse files Browse the repository at this point in the history
summaries.
  • Loading branch information
guyrt committed Sep 23, 2013
1 parent 096be81 commit 4e0c77c
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 44 deletions.
43 changes: 28 additions & 15 deletions statsmodels/regression/linear_model.py
Expand Up @@ -709,6 +709,7 @@ def yule_walker(X, order=1, method="unbiased", df=None, inv=False, demean=True):
else:
return rho, np.sqrt(sigmasq)


class RegressionResults(base.LikelihoodModelResults):
"""
This class summarizes the fit of a linear regression model.
Expand Down Expand Up @@ -1000,6 +1001,22 @@ def bic(self):
return (-2 * self.llf + np.log(self.nobs) * (self.df_model +
self.k_constant))

@cache_readonly
def eigenvals(self):
"""
Return eigenvalues sorted in decreasing order.
"""
eigvals = np.linalg.linalg.eigvalsh(np.dot(self.model.wexog.T, self.model.wexog))
return np.sort(eigvals)[::-1]

@cache_readonly
def condition_number(self):
"""
Return condition number of exogenous matrix, calculated as ratio of largest to smallest eigenvalue.
"""
eigvals = self.eigenvals
return np.sqrt(eigvals[0]/eigvals[-1])

#TODO: make these properties reset bse
def _HCCM(self, scale):
H = np.dot(self.model.pinv_wexog,
Expand Down Expand Up @@ -1194,16 +1211,12 @@ def summary(self, yname=None, xname=None, title=None, alpha=.05):
jb, jbpv, skew, kurtosis = jarque_bera(self.wresid)
omni, omnipv = omni_normtest(self.wresid)

#TODO: reuse condno from somewhere else ?
#condno = np.linalg.cond(np.dot(self.wexog.T, self.wexog))
wexog = self.model.wexog
eigvals = np.linalg.linalg.eigvalsh(np.dot(wexog.T, wexog))
eigvals = np.sort(eigvals) #in increasing order
condno = np.sqrt(eigvals[-1]/eigvals[0])
eigvals = self.eigenvals
condno = self.condition_number

self.diagn = dict(jb=jb, jbpv=jbpv, skew=skew, kurtosis=kurtosis,
omni=omni, omnipv=omnipv, condno=condno,
mineigval=eigvals[0])
mineigval=eigvals[-1])

#TODO not used yet
#diagn_left_header = ['Models stats']
Expand Down Expand Up @@ -1265,12 +1278,12 @@ def summary(self, yname=None, xname=None, title=None, alpha=.05):
if self.model.exog.shape[0] < self.model.exog.shape[1]:
wstr = "The input rank is higher than the number of observations."
etext.append(wstr)
if eigvals[0] < 1e-10:
if eigvals[-1] < 1e-10:
wstr = "The smallest eigenvalue is %6.3g. This might indicate "
wstr += "that there are\n"
wstr += "strong multicollinearity problems or that the design "
wstr += "matrix is singular."
wstr = wstr % eigvals[0]
wstr = wstr % eigvals[-1]
etext.append(wstr)
elif condno > 1000: #TODO: what is recommended
wstr = "The condition number is large, %6.3g. This might "
Expand Down Expand Up @@ -1334,14 +1347,14 @@ def summary2(self, yname=None, xname=None, title=None, alpha=.05,
from statsmodels.stats.stattools import (jarque_bera,
omni_normtest,
durbin_watson)
from numpy.linalg import (cond, eigvalsh)
from statsmodels.compatnp.collections import OrderedDict
jb, jbpv, skew, kurtosis = jarque_bera(self.wresid)
omni, omnipv = omni_normtest(self.wresid)
dw = durbin_watson(self.wresid)
condno = cond(self.model.wexog)
eigvals = eigvalsh(np.dot(self.model.wexog.T, self.model.wexog))
eigvals = np.sort(eigvals) #in increasing order

eigvals = self.eigenvals
condno = self.condition_number

diagnostic = OrderedDict([
('Omnibus:', "%.3f" % omni),
('Prob(Omnibus):', "%.3f" % omnipv),
Expand All @@ -1361,10 +1374,10 @@ def summary2(self, yname=None, xname=None, title=None, alpha=.05,
smry.add_dict(diagnostic)

# Warnings
if eigvals[0] < 1e-10:
if eigvals[-1] < 1e-10:
warn = "The smallest eigenvalue is %6.3g. This might indicate that\
there are strong multicollinearity problems or that the design\
matrix is singular." % eigvals[0]
matrix is singular." % eigvals[-1]
smry.add_text(warn)
if condno > 1000:
warn = "* The condition number is large (%.g). This might indicate \
Expand Down
16 changes: 6 additions & 10 deletions statsmodels/regression/quantile_regression.py
Expand Up @@ -365,12 +365,8 @@ def summary(self, yname=None, xname=None, title=None, alpha=.05):
jb, jbpv, skew, kurtosis = jarque_bera(self.wresid)
omni, omnipv = omni_normtest(self.wresid)

#TODO: reuse condno from somewhere else ?
#condno = np.linalg.cond(np.dot(self.wexog.T, self.wexog))
wexog = self.model.wexog
eigvals = np.linalg.linalg.eigvalsh(np.dot(wexog.T, wexog))
eigvals = np.sort(eigvals) #in increasing order
condno = np.sqrt(eigvals[-1]/eigvals[0])
eigvals = self.eigenvals
condno = self.condition_number

self.diagn = dict(jb=jb, jbpv=jbpv, skew=skew, kurtosis=kurtosis,
omni=omni, omnipv=omnipv, condno=condno,
Expand Down Expand Up @@ -420,13 +416,13 @@ def summary(self, yname=None, xname=None, title=None, alpha=.05):
#title="")

#add warnings/notes, added to text format only
etext =[]
if eigvals[0] < 1e-10:
etext = []
if eigvals[-1] < 1e-10:
wstr = "The smallest eigenvalue is %6.3g. This might indicate "
wstr += "that there are\n"
wstr = "strong multicollinearity problems or that the design "
wstr += "strong multicollinearity problems or that the design "
wstr += "matrix is singular."
wstr = wstr % eigvals[0]
wstr = wstr % eigvals[-1]
etext.append(wstr)
elif condno > 1000: #TODO: what is recommended
wstr = "The condition number is large, %6.3g. This might "
Expand Down
11 changes: 5 additions & 6 deletions statsmodels/tsa/stattools.py
Expand Up @@ -3,13 +3,12 @@
"""

import numpy as np
from scipy import stats, signal
from scipy import stats
from statsmodels.regression.linear_model import OLS, yule_walker
from statsmodels.tools.tools import add_constant
from tsatools import lagmat, lagmat2ds, add_trend
#from statsmodels.sandbox.tsa import var
from adfvalues import mackinnonp, mackinnoncrit
#from statsmodels.sandbox.rls import RLS


#NOTE: now in two places to avoid circular import
#TODO: I like the bunch pattern for this too.
Expand Down Expand Up @@ -69,7 +68,6 @@ def _autolag(mod, endog, exog, startlag, maxlag, method, modargs=(),
elif method == "bic":
icbest, bestlag = min((v.bic,k) for k,v in results.iteritems())
elif method == "t-stat":
lags = sorted(results.keys())[::-1]
#stop = stats.norm.ppf(.95)
stop = 1.6448536269514722
for lag in range(startlag + maxlag, startlag - 1, -1):
Expand Down Expand Up @@ -464,12 +462,12 @@ def pacf_yw(x, nlags=40, method='unbiased'):
This solves yule_walker for each desired lag and contains
currently duplicate calculations.
'''
xm = x - x.mean()
pacf = [1.]
for k in range(1, nlags+1):
pacf.append(yule_walker(x, k, method=method)[0][-1])
return np.array(pacf)


#NOTE: this is incorrect.
def pacf_ols(x, nlags=40):
'''Calculate partial autocorrelations
Expand Down Expand Up @@ -787,7 +785,8 @@ def grangercausalitytests(x, maxlag, addconst=True, verbose=True):
x = np.asarray(x)

if x.shape[0] <= 3 * maxlag + int(addconst):
raise ValueError("")
raise ValueError("Insufficient observations. Maximum allowable "
"lag is {0}".format(int((x.shape[0] - int(addconst)) / 3) - 1))

resli = {}

Expand Down
36 changes: 23 additions & 13 deletions statsmodels/tsa/tests/test_stattools.py
Expand Up @@ -214,24 +214,34 @@ def __init__(self):
self.teststat = -1.8208817


def test_grangercausality():
# some example data
mdata = macrodata.load().data
mdata = mdata[['realgdp','realcons']]
data = mdata.view((float,2))
data = np.diff(np.log(data), axis=0)

#R: lmtest:grangertest
r_result = [0.243097, 0.7844328, 195, 2] #f_test
gr = grangercausalitytests(data[:,1::-1], 2, verbose=False)
assert_almost_equal(r_result, gr[2][0]['ssr_ftest'], decimal=7)
assert_almost_equal(gr[2][0]['params_ftest'], gr[2][0]['ssr_ftest'],
decimal=7)
class TestGrangerCausality(object):

def test_grangercausality(self):
# some example data
mdata = macrodata.load().data
mdata = mdata[['realgdp', 'realcons']]
data = mdata.view((float, 2))
data = np.diff(np.log(data), axis=0)

#R: lmtest:grangertest
r_result = [0.243097, 0.7844328, 195, 2] # f_test
gr = grangercausalitytests(data[:, 1::-1], 2, verbose=False)
assert_almost_equal(r_result, gr[2][0]['ssr_ftest'], decimal=7)
assert_almost_equal(gr[2][0]['params_ftest'], gr[2][0]['ssr_ftest'], decimal=7)

def test_granger_fails_on_nobs_check(self):
# Test that if maxlag is too large, Granger Test raises a clear error.
X = np.random.rand(10, 2)
grangercausalitytests(X, 2) # This should pass.
assert_raises(ValueError, grangercausalitytests, X, 3)



def test_pandasacovf():
s = Series(range(1, 11))
assert_almost_equal(acovf(s), acovf(s.values))


def test_acovf2d():
dta = sunspots.load_pandas().data
dta.index = Index(dates_from_range('1700', '2008'))
Expand Down

0 comments on commit 4e0c77c

Please sign in to comment.