Skip to content


Subversion checkout URL

You can clone with
Download ZIP
Fetching contributors…

Cannot retrieve contributors at this time

74 lines (54 sloc) 2.383 kB
"""Generalized Least Squares
import statsmodels.api as sm
import numpy as np
data = sm.datasets.longley.load()
data.exog = sm.add_constant(data.exog)
# The Longley dataset is a time series dataset
# Let's assume that the data is heteroskedastic and that we know
# the nature of the heteroskedasticity. We can then define
# `sigma` and use it to give us a GLS model
# First we will obtain the residuals from an OLS fit
ols_resid = sm.OLS(data.endog, data.exog).fit().resid
# Assume that the error terms follow an AR(1) process with a trend
# resid[i] = beta_0 + rho*resid[i-1] + e[i]
# where e ~ N(0,some_sigma**2)
# and that rho is simply the correlation of the residuals
# a consistent estimator for rho is to regress the residuals
# on the lagged residuals
resid_fit = sm.OLS(ols_resid[1:], sm.add_constant(ols_resid[:-1])).fit()
print resid_fit.tvalues[0]
print resid_fit.pvalues[0]
# While we don't have strong evidence that the errors follow an AR(1)
# process we continue
rho = resid_fit.params[0]
# As we know, an AR(1) process means that near-neighbors have a stronger
# relation so we can give this structure by using a toeplitz matrix
from scipy.linalg import toeplitz
#.. array([[0, 1, 2, 3, 4],
#.. [1, 0, 1, 2, 3],
#.. [2, 1, 0, 1, 2],
#.. [3, 2, 1, 0, 1],
#.. [4, 3, 2, 1, 0]])
order = toeplitz(range(len(ols_resid)))
# so that our error covariance structure is actually rho**order
# which defines an autocorrelation structure
sigma = rho**order
gls_model = sm.GLS(data.endog, data.exog, sigma=sigma)
gls_results =
# of course, the exact rho in this instance is not known so it
# it might make more sense to use feasible gls, which currently only
# has experimental support
# We can use the GLSAR model with one lag, to get to a similar result
glsar_model = sm.GLSAR(data.endog, data.exog, 1)
glsar_results = glsar_model.iterative_fit(1)
# comparing gls and glsar results, we see that there are some small
# differences in the parameter estimates and the resulting standard
# errors of the parameter estimate. This might be do to the numerical
# differences in the algorithm, e.g. the treatment of initial conditions,
# because of the small number of observations in the longley dataset.
print gls_results.params
print glsar_results.params
print gls_results.bse
print glsar_results.bse
Jump to Line
Something went wrong with that request. Please try again.