-
Notifications
You must be signed in to change notification settings - Fork 2.9k
/
example_ols_table.py
76 lines (55 loc) · 2.08 KB
/
example_ols_table.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
"""Example: statsmodels.OLS
"""
from statsmodels.datasets.longley import load
import statsmodels.api as sm
from statsmodels.iolib.table import (SimpleTable, default_txt_fmt,
default_latex_fmt, default_html_fmt)
import numpy as np
data = load()
data_orig = (data.endog.copy(), data.exog.copy())
#.. Note: In this example using zscored/standardized variables has no effect on
#.. regression estimates. Are there no numerical problems?
rescale = 0
#0: no rescaling, 1:demean, 2:standardize, 3:standardize and transform back
rescale_ratio = data.endog.std()/data.exog.std(0)
if rescale > 0:
# rescaling
data.endog -= data.endog.mean()
data.exog -= data.exog.mean(0)
if rescale > 1:
data.endog *= 1./data.endog.std()
#data.exog *= 1000./data.exog.var(0)
data.exog /= data.exog.std(0)
#rescale_ratio = data.exog.var(0)/data.endog.var()
#skip because mean has been removed, but dimension is hardcoded in table
data.exog = sm.tools.add_constant(data.exog)
ols_model = sm.OLS(data.endog, data.exog)
ols_results = ols_model.fit()
# the Longley dataset is well known to have high multicollinearity
# one way to find the condition number is as follows
#Find OLS parameters for model with one explanatory variable dropped
resparams = np.nan * np.ones((7,7))
res = sm.OLS(data.endog, data.exog).fit()
resparams[:,0] = res.params
indall = range(7)
for i in range(6):
ind = indall[:]
del ind[i]
res = sm.OLS(data.endog, data.exog[:,ind]).fit()
resparams[ind,i+1] = res.params
if rescale == 1:
pass
if rescale == 3:
resparams[:-1,:] *= rescale_ratio[:,None]
txt_fmt1 = default_txt_fmt
numformat = '%10.4f'
txt_fmt1 = dict(data_fmts = [numformat])
rowstubs = data.names[1:] + ['const']
headers = ['all'] + ['drop %s' % name for name in data.names[1:]]
tabl = SimpleTable(resparams, headers, rowstubs, txt_fmt=txt_fmt1)
nanstring = numformat%np.nan
nn = len(nanstring)
nanrep = ' '*(nn-1)
nanrep = nanrep[:nn//2] + '-' + nanrep[nn//2:]
print 'Longley data - sensitivity to dropping an explanatory variable'
print str(tabl).replace(nanstring, nanrep)