-
Notifications
You must be signed in to change notification settings - Fork 2.9k
/
example_gls.py
73 lines (54 loc) · 2.33 KB
/
example_gls.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
"""Generalized Least Squares
"""
import statsmodels.api as sm
import numpy as np
data = sm.datasets.longley.load()
data.exog = sm.add_constant(data.exog)
# The Longley dataset is a time series dataset
# Let's assume that the data is heteroskedastic and that we know
# the nature of the heteroskedasticity. We can then define
# `sigma` and use it to give us a GLS model
# First we will obtain the residuals from an OLS fit
ols_resid = sm.OLS(data.endog, data.exog).fit().resid
# Assume that the error terms follow an AR(1) process with a trend
# resid[i] = beta_0 + rho*resid[i-1] + e[i]
# where e ~ N(0,some_sigma**2)
# and that rho is simply the correlation of the residuals
# a consistent estimator for rho is to regress the residuals
# on the lagged residuals
resid_fit = sm.OLS(ols_resid[1:], sm.add_constant(ols_resid[:-1])).fit()
print resid_fit.tvalues[0]
print resid_fit.pvalues[0]
# While we don't have strong evidence that the errors follow an AR(1)
# process we continue
rho = resid_fit.params[0]
# As we know, an AR(1) process means that near-neighbors have a stronger
# relation so we can give this structure by using a toeplitz matrix
from scipy.linalg import toeplitz
toeplitz(range(5))
#.. array([[0, 1, 2, 3, 4],
#.. [1, 0, 1, 2, 3],
#.. [2, 1, 0, 1, 2],
#.. [3, 2, 1, 0, 1],
#.. [4, 3, 2, 1, 0]])
order = toeplitz(range(len(ols_resid)))
# so that our error covariance structure is actually rho**order
# which defines an autocorrelation structure
sigma = rho**order
gls_model = sm.GLS(data.endog, data.exog, sigma=sigma)
gls_results = gls_model.fit()
# of course, the exact rho in this instance is not known so it
# it might make more sense to use feasible gls, which currently only
# has experimental support
# We can use the GLSAR model with one lag, to get to a similar result
glsar_model = sm.GLSAR(data.endog, data.exog, 1)
glsar_results = glsar_model.iterative_fit(1)
# comparing gls and glsar results, we see that there are some small
# differences in the parameter estimates and the resulting standard
# errors of the parameter estimate. This might be do to the numerical
# differences in the algorithm, e.g. the treatment of initial conditions,
# because of the small number of observations in the longley dataset.
print gls_results.params
print glsar_results.params
print gls_results.bse
print glsar_results.bse