# Spatial Lag - Fixed Effects Panel Model

This notebook introduces the Spatial Lag model for Fixed Effects Panel data. It is based on the estimation procedure outline in:
- Anselin, Le Gallo and Jayet (2008). Spatial Panel Econometrics.
- Elhorst (2014). Spatial Econometrics, From Cross-Sectional Data to Spatial Panels.

In [2]:
import libpysal
import spreg
import numpy as np
import numpy.linalg as la
from scipy import sparse as sp
from scipy.sparse.linalg import splu as SuperLU
from spreg.utils import RegressionPropsY, RegressionPropsVM, inverse_prod, set_warn
from spreg.sputils import spdot, spfill_diagonal, spinv
import spreg.diagnostics as DIAG
import spreg.user_output as USER
import spreg.summary_output as SUMMARY
try:
    from scipy.optimize import minimize_scalar
    minimize_scalar_available = True
except ImportError:
    minimize_scalar_available = False
    
from spreg.panel_utils import check_panel, demean_panel

### Read data

In [3]:
# Open data on NCOVR US County Homicides (3085 areas).
nat = libpysal.examples.load_example("NCOVR")
db = libpysal.io.open(nat.get_path("NAT.dbf"), "r")
# Create spatial weight matrix
nat_shp = libpysal.examples.get_path("NAT.shp")
w = libpysal.weights.Queen.from_shapefile(nat_shp)
w.transform = 'r'
# Define dependent variable
name_y = ["HR70", "HR80", "HR90"]
y = np.array([db.by_col(name) for name in name_y]).T
# Define independent variables
name_x = ["RD70", "RD80", "RD90", "PS70", "PS80", "PS90"]
x = np.array([db.by_col(name) for name in name_x]).T

epsilon = 0.0000001

### Transform variables

In [4]:
# Check the data structure and converts from wide to long if needed.
bigy, bigx, name_y, name_x = check_panel(y, x, w, name_y, name_x)

Similarly, assuming x[:, 0:T] refers to T periods of k1, x[:, T+1:2T] refers to k2, etc.


Demeaning the variables using 
$$
y^\ast = Q_0 y
$$ 

where $Q_0 = J_T \otimes I_N$ and $J_T = I_T - \iota \cdot \iota' / t$

In [5]:
n = w.n
t = bigy.shape[0] // n
k = bigx.shape[1]
# Demeaned variables
y = demean_panel(bigy, n, t)
x = demean_panel(bigx, n, t)
# Big W matrix
W = w.full()[0]
W_nt = np.kron(np.identity(t), W)
Wsp = w.sparse
Wsp_nt = sp.kron(sp.identity(t), Wsp)
# Lag dependent variable
ylag = spdot(W_nt, y)

### Estimation

First, I'll compute the residuals of these two regressions:
$$
y = X\beta_0 + e_0
$$
and
$$
Wy = X\beta_1 + e_1
$$

In [6]:
# b0, b1, e0 and e1
xtx = spdot(x.T, x)
xtxi = la.inv(xtx)
xty = spdot(x.T, y)
xtyl = spdot(x.T, ylag)
b0 = spdot(xtxi, xty)
b1 = spdot(xtxi, xtyl)
e0 = y - spdot(x, b0)
e1 = ylag - spdot(x, b1)

Then, maximize the concentrated log-likehood function with respect to $\rho$:
$$
L = \frac{NT}{2} \ln (e'_r e_r) - T \ln | I_N - \rho W |
$$

where $e_r = e_0 - \rho e_1$. 

In [7]:
def lag_c_loglik_sp(rho, n, t, e0, e1, I, Wsp):
    # concentrated log-lik for lag model, sparse algebra
    if isinstance(rho, np.ndarray):
        if rho.shape == (1, 1):
            rho = rho[0][0]
    er = e0 - rho * e1
    sig2 = spdot(er.T, er)
    nlsig2 = (n*t / 2.0) * np.log(sig2)
    a = I - rho * Wsp
    LU = SuperLU(a.tocsc())
    jacob = t * np.sum(np.log(np.abs(LU.U.diagonal())))
    clike = nlsig2 - jacob
    return clike

In [8]:
I = sp.identity(n)
res = minimize_scalar(lag_c_loglik_sp, 0.0, bounds=(-1.0, 1.0),
                      args=(n, t, e0, e1, I, Wsp), method='bounded',
                      options={"xatol": epsilon})

rho = res.x[0][0]
rho

0.19030427258738358

Calculate betas as:
$$
\beta = \beta_o - \rho \beta_1
$$

In [9]:
# b, residuals and predicted values
b = b0 - rho * b1
betas = np.vstack((b, rho))   # rho added as last coefficient
betas

array([[ 0.80058859],
       [-2.60035232],
       [ 0.19030427]])

Calculate $\sigma^2$ as:
$$
\sigma^2 = (e_0 - \rho \cdot e_1)' (e_0 - \rho \cdot e_1)
$$

In [10]:
# compute full log-likelihood, including constants
ln2pi = np.log(2.0 * np.pi)
llik = -res.fun - (n*t) / 2.0 * ln2pi - (n*t) / 2.0
logll = llik[0][0]

# Calculate sigma2
u = e0 - rho * e1
sig2 = spdot(u.T, u) / (n*t)

### Variance matrix

$$
Var[\beta, \delta, \sigma^2] = 
\begin{pmatrix}
\frac{X'X}{\sigma^2}               &                                               &  \\ 
X' (I_T \otimes \tilde{W}) X \beta & T \cdot tr(\tilde{W}^2 + \tilde{W}'\tilde{W}) + \beta' X' (I_T \otimes \tilde{W}'\tilde{W}) X \beta &  \\ 
0                                  & \frac{T}{\sigma^2} tr(\tilde{W}) & \frac{NT}{2 \sigma^4} \\
\end{pmatrix}
$$

where $\tilde{W} = W (I_N - \rho W)^{-1}$

In [11]:
predy = y - u
xb = spdot(x, b)
predy_e = inverse_prod(
    Wsp_nt, xb, rho, inv_method="power_exp", threshold=epsilon)
e_pred = y - predy_e

In [12]:
# information matrix
a = -rho * W
spfill_diagonal(a, 1.0)
ai = spinv(a)
wai = spdot(W, ai)
tr1 = wai.diagonal().sum() #same for sparse and dense

wai2 = spdot(wai, wai)
tr2 = wai2.diagonal().sum()

waiTwai = spdot(wai.T, wai)
tr3 = waiTwai.diagonal().sum()

wai_nt = np.kron(np.identity(t), wai)
wpredy = spdot(wai_nt, xb)
xTwpy = spdot(x.T, wpredy)

waiTwai_nt = np.kron(np.identity(t), waiTwai)
wTwpredy = spdot(waiTwai_nt, xb)
wpyTwpy = spdot(xb.T, wTwpredy)

# order of variables is beta, rho, sigma2
v1 = np.vstack(
    (xtx / sig2, xTwpy.T / sig2, np.zeros((1, k))))
v2 = np.vstack(
    (xTwpy / sig2, t*(tr2 + tr3) + wpyTwpy / sig2, t * tr1 / sig2))
v3 = np.vstack(
    (np.zeros((k, 1)), t * tr1 / sig2, n * t / (2.0 * sig2 ** 2)))

v = np.hstack((v1, v2, v3))

vm1 = la.inv(v)  # vm1 includes variance for sigma2
vm = vm1[:-1, :-1]  # vm is for coefficients only
vm

array([[ 2.60652741e-02,  2.17096431e-02, -7.43325993e-05],
       [ 2.17096431e-02,  2.43590248e-01,  4.26859202e-04],
       [-7.43325993e-05,  4.26859202e-04,  2.55970568e-04]])

In [14]:
np.set_printoptions(suppress=True)

In [15]:
np.round(v, 4)

array([[  41.5266,   -3.733 ,   18.2844,    0.    ],
       [  -3.733 ,    4.4529,   -8.5097,    0.    ],
       [  18.2844,   -8.5097, 3949.156 ,   21.8227],
       [   0.    ,    0.    ,   21.8227,   20.7451]])

In [16]:
np.round(v1, 4)

array([[41.5266, -3.733 ],
       [-3.733 ,  4.4529],
       [18.2844, -8.5097],
       [ 0.    ,  0.    ]])

In [17]:
np.round(v2, 4)

array([[  18.2844],
       [  -8.5097],
       [3949.156 ],
       [  21.8227]])

# R section

In [2]:
### load library
library("splm")

### set options
options(prompt = "R> ",  continue = "+ ", width = 70, useFancyQuotes = FALSE, warn=-1)

In [5]:
## read data
nat <- read.csv("data/NAT.csv", header = TRUE)
## set formula
fm <- HR ~ RD + PS
wnat <- as.matrix(read.csv("data/NAT_w.csv"))
## standardization
wnat <- wnat/apply(wnat, 1, sum)
## make it a listw
lwnat <- mat2listw(wnat)

col_order <- c("FIPSNO", "YEAR", "HR", "RD", "PS")
nat <- nat[, col_order]

In [6]:
fixed_lag = spml(HR ~ RD + PS, data=nat, listw=lwnat, effect="individual",
                 model="within", spatial.error = "none", lag=TRUE)

Registered S3 methods overwritten by 'spatialreg':
  method                   from 
  residuals.stsls          spdep
  deviance.stsls           spdep
  coef.stsls               spdep
  print.stsls              spdep
  summary.stsls            spdep
  print.summary.stsls      spdep
  residuals.gmsar          spdep
  deviance.gmsar           spdep
  coef.gmsar               spdep
  fitted.gmsar             spdep
  print.gmsar              spdep
  summary.gmsar            spdep
  print.summary.gmsar      spdep
  print.lagmess            spdep
  summary.lagmess          spdep
  print.summary.lagmess    spdep
  residuals.lagmess        spdep
  deviance.lagmess         spdep
  coef.lagmess             spdep
  fitted.lagmess           spdep
  logLik.lagmess           spdep
  fitted.SFResult          spdep
  print.SFResult           spdep
  fitted.ME_res            spdep
  print.ME_res             spdep
  print.lagImpact          spdep
  plot.lagImpact           spdep
  summary.lagImpact      

In [7]:
summary(fixed_lag)

Spatial panel fixed effects lag model
 

Call:
spml(formula = HR ~ RD + PS, data = nat, listw = lwnat, model = "within", 
    effect = "individual", lag = TRUE, spatial.error = "none")

Residuals:
      Min.    1st Qu.     Median    3rd Qu.       Max. 
-26.487055  -1.566302  -0.090734   1.283922  48.447210 

Spatial autoregressive coefficient:
       Estimate Std. Error t-value  Pr(>|t|)    
lambda 0.190304   0.015999  11.895 < 2.2e-16 ***

Coefficients:
   Estimate Std. Error t-value  Pr(>|t|)    
RD  0.80059    0.16145  4.9588 7.092e-07 ***
PS -2.60035    0.49355 -5.2687 1.374e-07 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1


In [8]:
fixed_lag$logLik

In [9]:
fixed_lag$vcov

0,1,2
0.0002559706,0.0,0.0
0.0,0.02606527,0.02170964
0.0,0.02170964,0.24359025
