forked from statsmodels/statsmodels
-
Notifications
You must be signed in to change notification settings - Fork 0
/
example_wls.py
90 lines (77 loc) · 2.74 KB
/
example_wls.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
"""Weighted Least Squares
"""
import numpy as np
from scipy import stats
import statsmodels.api as sm
import matplotlib.pyplot as plt
from statsmodels.sandbox.regression.predstd import wls_prediction_std
from statsmodels.iolib.table import (SimpleTable, default_txt_fmt)
np.random.seed(1024)
# WLS Estimation
# --------------
# Artificial data: Heteroscedasticity 2 groups
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
# Model assumptions:
#
# * Misspecificaion: true model is quadratic, estimate only linear
# * Independent noise/error term
# * Two groups for error variance, low and high variance groups
nsample = 50
x = np.linspace(0, 20, nsample)
X = np.c_[x, (x - 5)**2, np.ones(nsample)]
beta = [0.5, -0.01, 5.]
sig = 0.5
w = np.ones(nsample)
w[nsample * 6 / 10:] = 3
y_true = np.dot(X, beta)
e = np.random.normal(size=nsample)
y = y_true + sig * w * e
X = X[:, [0, 2]]
#WLS knowing the true variance ratio of heteroscedasticity
#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
mod_wls = sm.WLS(y, X, weights=1. / w)
res_wls = mod_wls.fit()
print res_wls.summary()
#OLS vs. WLS
#-----------
# Estimate an OLS model for comparison
res_ols = sm.OLS(y, X).fit()
# Compare the estimated parameters in WLS and OLS
print res_ols.params
print res_wls.params
# Compare the WLS standard errors to heteroscedasticity corrected OLS standard
# errors:
se = np.vstack([[res_wls.bse], [res_ols.bse], [res_ols.HC0_se],
[res_ols.HC1_se], [res_ols.HC2_se], [res_ols.HC3_se]])
se = np.round(se, 4)
colnames = 'x1', 'const'
rownames = 'WLS', 'OLS', 'OLS_HC0', 'OLS_HC1', 'OLS_HC3', 'OLS_HC3'
tabl = SimpleTable(se, colnames, rownames, txt_fmt=default_txt_fmt)
print tabl
# Calculate OLS prediction interval
covb = res_ols.cov_params()
prediction_var = res_ols.mse_resid + (X * np.dot(covb, X.T).T).sum(1)
prediction_std = np.sqrt(prediction_var)
tppf = stats.t.ppf(0.975, res_ols.df_resid)
# Draw a plot to compare predicted values in WLS and OLS:
prstd, iv_l, iv_u = wls_prediction_std(res_wls)
plt.figure();
plt.plot(x, y, 'o', x, y_true, 'b-');
plt.plot(x, res_ols.fittedvalues, 'r--');
plt.plot(x, res_ols.fittedvalues + tppf * prediction_std, 'r--');
plt.plot(x, res_ols.fittedvalues - tppf * prediction_std, 'r--');
plt.plot(x, res_wls.fittedvalues, 'g--.');
plt.plot(x, iv_u, 'g--');
plt.plot(x, iv_l, 'g--');
#@savefig wls_ols_0.png
plt.title('blue: true, red: OLS, green: WLS');
# Feasible Weighted Least Squares (2-stage FWLS)
# ----------------------------------------------
resid1 = res_ols.resid[w == 1.]
var1 = resid1.var(ddof=int(res_ols.df_model) + 1)
resid2 = res_ols.resid[w != 1.]
var2 = resid2.var(ddof=int(res_ols.df_model) + 1)
w_est = w.copy()
w_est[w != 1.] = np.sqrt(var2) / np.sqrt(var1)
res_fwls = sm.WLS(y, X, 1. / w_est).fit()
print res_fwls.summary()