forked from statsmodels/statsmodels
-
Notifications
You must be signed in to change notification settings - Fork 0
/
example_ols_table.py
69 lines (52 loc) · 1.95 KB
/
example_ols_table.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
"""Example: statsmodels.OLS
"""
from statsmodels.datasets.longley import load
import statsmodels.api as sm
from statsmodels.iolib.table import SimpleTable, default_txt_fmt
import numpy as np
data = load()
data_orig = (data.endog.copy(), data.exog.copy())
#.. Note: In this example using zscored/standardized variables has no effect on
#.. regression estimates. Are there no numerical problems?
rescale = 0
#0: no rescaling, 1:demean, 2:standardize, 3:standardize and transform back
rescale_ratio = data.endog.std() / data.exog.std(0)
if rescale > 0:
# rescaling
data.endog -= data.endog.mean()
data.exog -= data.exog.mean(0)
if rescale > 1:
data.endog /= data.endog.std()
data.exog /= data.exog.std(0)
#skip because mean has been removed, but dimension is hardcoded in table
data.exog = sm.tools.add_constant(data.exog, prepend=False)
ols_model = sm.OLS(data.endog, data.exog)
ols_results = ols_model.fit()
# the Longley dataset is well known to have high multicollinearity
# one way to find the condition number is as follows
#Find OLS parameters for model with one explanatory variable dropped
resparams = np.nan * np.ones((7, 7))
res = sm.OLS(data.endog, data.exog).fit()
resparams[:, 0] = res.params
indall = range(7)
for i in range(6):
ind = indall[:]
del ind[i]
res = sm.OLS(data.endog, data.exog[:, ind]).fit()
resparams[ind, i + 1] = res.params
if rescale == 1:
pass
if rescale == 3:
resparams[:-1, :] *= rescale_ratio[:, None]
txt_fmt1 = default_txt_fmt
numformat = '%10.4f'
txt_fmt1 = dict(data_fmts=[numformat])
rowstubs = data.names[1:] + ['const']
headers = ['all'] + ['drop %s' % name for name in data.names[1:]]
tabl = SimpleTable(resparams, headers, rowstubs, txt_fmt=txt_fmt1)
nanstring = numformat % np.nan
nn = len(nanstring)
nanrep = ' ' * (nn - 1)
nanrep = nanrep[:nn // 2] + '-' + nanrep[nn // 2:]
print 'Longley data - sensitivity to dropping an explanatory variable'
print str(tabl).replace(nanstring, nanrep)