Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Fetching contributors…

Cannot retrieve contributors at this time

74 lines (54 sloc) 2.383 kB
"""Generalized Least Squares
"""
import statsmodels.api as sm
import numpy as np
data = sm.datasets.longley.load()
data.exog = sm.add_constant(data.exog)
# The Longley dataset is a time series dataset
# Let's assume that the data is heteroskedastic and that we know
# the nature of the heteroskedasticity. We can then define
# `sigma` and use it to give us a GLS model
# First we will obtain the residuals from an OLS fit
ols_resid = sm.OLS(data.endog, data.exog).fit().resid
# Assume that the error terms follow an AR(1) process with a trend
# resid[i] = beta_0 + rho*resid[i-1] + e[i]
# where e ~ N(0,some_sigma**2)
# and that rho is simply the correlation of the residuals
# a consistent estimator for rho is to regress the residuals
# on the lagged residuals
resid_fit = sm.OLS(ols_resid[1:], sm.add_constant(ols_resid[:-1])).fit()
print resid_fit.tvalues[0]
print resid_fit.pvalues[0]
# While we don't have strong evidence that the errors follow an AR(1)
# process we continue
rho = resid_fit.params[0]
# As we know, an AR(1) process means that near-neighbors have a stronger
# relation so we can give this structure by using a toeplitz matrix
from scipy.linalg import toeplitz
toeplitz(range(5))
#.. array([[0, 1, 2, 3, 4],
#.. [1, 0, 1, 2, 3],
#.. [2, 1, 0, 1, 2],
#.. [3, 2, 1, 0, 1],
#.. [4, 3, 2, 1, 0]])
order = toeplitz(range(len(ols_resid)))
# so that our error covariance structure is actually rho**order
# which defines an autocorrelation structure
sigma = rho**order
gls_model = sm.GLS(data.endog, data.exog, sigma=sigma)
gls_results = gls_model.fit()
# of course, the exact rho in this instance is not known so it
# it might make more sense to use feasible gls, which currently only
# has experimental support
# We can use the GLSAR model with one lag, to get to a similar result
glsar_model = sm.GLSAR(data.endog, data.exog, 1)
glsar_results = glsar_model.iterative_fit(1)
# comparing gls and glsar results, we see that there are some small
# differences in the parameter estimates and the resulting standard
# errors of the parameter estimate. This might be do to the numerical
# differences in the algorithm, e.g. the treatment of initial conditions,
# because of the small number of observations in the longley dataset.
print gls_results.params
print glsar_results.params
print gls_results.bse
print glsar_results.bse
Jump to Line
Something went wrong with that request. Please try again.