# Spatial Lag - Fixed Effects Panel Model

This notebook introduces the Spatial Lag model for Fixed Effects Panel data. It is based on the estimation procedure outline in:
- Anselin, Le Gallo and Jayet (2008). Spatial Panel Econometrics.
- Elhorst (2014). Spatial Econometrics, From Cross-Sectional Data to Spatial Panels.

In [1]:
import libpysal
import spreg
import numpy as np
import numpy.linalg as la
from scipy import sparse as sp
from scipy.sparse.linalg import splu as SuperLU
from spreg.utils import RegressionPropsY, RegressionPropsVM, inverse_prod, set_warn
from spreg.sputils import spdot, spfill_diagonal, spinv
import spreg.diagnostics as DIAG
import spreg.user_output as USER
import spreg.summary_output as SUMMARY
try:
    from scipy.optimize import minimize_scalar
    minimize_scalar_available = True
except ImportError:
    minimize_scalar_available = False
    
from spreg.panel_utils import check_panel, demean_panel

np.set_printoptions(suppress=True, precision=8)

### Read data

In [2]:
# Open data on NCOVR US County Homicides (3085 areas).
nat = libpysal.examples.load_example("NCOVR")
db = libpysal.io.open(nat.get_path("NAT.dbf"), "r")
# Create spatial weight matrix
nat_shp = libpysal.examples.get_path("NAT.shp")
w = libpysal.weights.Queen.from_shapefile(nat_shp)
w.transform = 'r'
# Define dependent variable
name_y = ["HR70", "HR80", "HR90"]
y = np.array([db.by_col(name) for name in name_y]).T
# Define independent variables
name_x = ["RD70", "RD80", "RD90", "PS70", "PS80", "PS90"]
x = np.array([db.by_col(name) for name in name_x]).T

epsilon = 0.0000001

### Transform variables

In [3]:
# Check the data structure and converts from wide to long if needed.
bigy, bigx, name_y, name_x = check_panel(y, x, w, name_y, name_x)

Similarly, assuming x[:, 0:T] refers to T periods of k1, x[:, T+1:2T] refers to k2, etc.


Demeaning the variables using 
$$
y^\ast = Q_0 y
$$ 

where $Q_0 = J_T \otimes I_N$ and $J_T = I_T - \iota \cdot \iota' / t$

In [4]:
n = w.n
t = bigy.shape[0] // n
k = bigx.shape[1]
# Demeaned variables
y = demean_panel(bigy, n, t)
x = demean_panel(bigx, n, t)
# Big W matrix
W = w.full()[0]
W_nt = np.kron(np.identity(t), W)
Wsp = w.sparse
Wsp_nt = sp.kron(sp.identity(t), Wsp)
# Lag variables
ylag = spdot(W_nt, y)
xlag = spdot(W_nt, x)

### Estimation

First, I'll compute the residuals of these two regressions:
$$
y = X\beta_0 + e_0
$$
and
$$
Wy = X\beta_1 + e_1
$$

In [5]:
def err_c_loglik_sp(lam, n, t, y, ylag, x, xlag, I, Wsp):
    # concentrated log-lik for error model, no constants, LU
    if isinstance(lam, np.ndarray):
        if lam.shape == (1,1):
            lam = lam[0][0]
    ys = y - lam * ylag
    xs = x - lam * xlag
    ysys = np.dot(ys.T, ys)
    xsxs = np.dot(xs.T, xs)
    xsxsi = np.linalg.inv(xsxs)
    xsys = np.dot(xs.T, ys)
    x1 = np.dot(xsxsi, xsys)
    x2 = np.dot(xsys.T, x1)
    ee = ysys - x2
    sig2 = ee[0][0]
    nlsig2 = (n*t / 2.0) * np.log(sig2)
    a = I - lam * Wsp
    LU = SuperLU(a.tocsc())
    jacob = t * np.sum(np.log(np.abs(LU.U.diagonal()))) 
    # this is the negative of the concentrated log lik for minimization
    clik = nlsig2 - jacob
    return clik

Then, maximize the concentrated log-likehood function with respect to $\rho$:
$$
L = \frac{NT}{2} \ln (e'_r e_r) - T \ln | I_N - \rho W |
$$

where $e_r = e_0 - \rho e_1$. 

In [6]:
I = sp.identity(n)
res = minimize_scalar(err_c_loglik_sp, 0.0, bounds=(-1.0, 1.0),
                      args=(n, t, y, ylag, x, xlag, I, Wsp), method='bounded', 
                      options={"xatol": epsilon})

lam = res.x
lam

0.19434596620938455

Calculate betas as:
$$
\beta = \beta_o - \rho \beta_1
$$

Calculate $\sigma^2$ as:
$$
\sigma^2 = (e_0 - \rho \cdot e_1)' (e_0 - \rho \cdot e_1)
$$

In [7]:
# compute full log-likelihood
ln2pi = np.log(2.0 * np.pi)
llik = -res.fun - (n*t) / 2.0 * ln2pi - (n*t) / 2.0
logll = llik

# b, residuals and predicted values
ys = y - lam * ylag
xs = x - lam * xlag
xsxs = np.dot(xs.T, xs)
xsxsi = np.linalg.inv(xsxs)
xsys = np.dot(xs.T, ys)
b = np.dot(xsxsi, xsys)

betas = np.vstack((b, lam))

u = y - np.dot(x, b)
predy = y - u

### Variance matrix

$$
Var[\beta, \delta, \sigma^2] = 
\begin{pmatrix}
\frac{X'X}{\sigma^2}               &                                               &  \\ 
X' (I_T \otimes \tilde{W}) X \beta & T \cdot tr(\tilde{W}^2 + \tilde{W}'\tilde{W}) + \beta' X' (I_T \otimes \tilde{W}'\tilde{W}) X \beta &  \\ 
0                                  & \frac{T}{\sigma^2} tr(\tilde{W}) & \frac{NT}{2 \sigma^4} \\
\end{pmatrix}
$$

where $\tilde{W} = W (I_N - \rho W)^{-1}$

In [8]:
# residual variance
e_filtered = u - lam * spdot(W_nt, u)
sig2 = np.dot(e_filtered.T, e_filtered) / (n*t)

# variance-covariance matrix betas
varb = sig2 * xsxsi

In [9]:
# variance-covariance matrix lambda, sigma

a = -lam * W
spfill_diagonal(a, 1.0)
ai = spinv(a)
wai = spdot(W, ai)
tr1 = wai.diagonal().sum()

wai2 = spdot(wai, wai)
tr2 = wai2.diagonal().sum()

waiTwai = spdot(wai.T, wai)
tr3 = waiTwai.diagonal().sum()

v1 = np.vstack((t * (tr2 + tr3),
                t * tr1 / sig2))
v2 = np.vstack((t * tr1 / sig2,
                t * n / (2.0 * sig2 ** 2)))

v = np.hstack((v1, v2))

vm1 = np.linalg.inv(v)

# create variance matrix for beta, lambda
vv = np.hstack((varb, np.zeros((k, 1))))
vv1 = np.hstack(
    (np.zeros((1, k)), vm1[0, 0] * np.ones((1, 1))))

vm = np.vstack((vv, vv1))
vm

array([[0.03017989, 0.02725093, 0.        ],
       [0.02725093, 0.30312215, 0.        ],
       [0.        , 0.        , 0.00025681]])

### Using classes

# R section

In [1]:
### set options
options(prompt = "R> ",  continue = "+ ", width = 70, useFancyQuotes = FALSE, warn=-1)

### load library
library("splm")

Loading required package: spdep

Loading required package: sp

Loading required package: spData

To access larger datasets in this package, install the
spDataLarge package with: `install.packages('spDataLarge',
repos='https://nowosad.github.io/drat/', type='source')`

Loading required package: sf

Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1



In [4]:
## read data
nat <- read.csv("data/NAT.csv", header = TRUE)
## set formula
fm <- HR ~ RD + PS
wnat <- as.matrix(read.csv("data/NAT_w.csv"))
## standardization
wnat <- wnat/apply(wnat, 1, sum)
## make it a listw
lwnat <- mat2listw(wnat)

col_order <- c("FIPSNO", "YEAR", "HR", "RD", "PS")
nat <- nat[, col_order]

In [5]:
fixed_lag = spml(HR ~ RD + PS, data=nat, listw=lwnat, effect="individual",
                 model="within", spatial.error = "b", lag=FALSE)

Registered S3 methods overwritten by 'spatialreg':
  method                   from 
  residuals.stsls          spdep
  deviance.stsls           spdep
  coef.stsls               spdep
  print.stsls              spdep
  summary.stsls            spdep
  print.summary.stsls      spdep
  residuals.gmsar          spdep
  deviance.gmsar           spdep
  coef.gmsar               spdep
  fitted.gmsar             spdep
  print.gmsar              spdep
  summary.gmsar            spdep
  print.summary.gmsar      spdep
  print.lagmess            spdep
  summary.lagmess          spdep
  print.summary.lagmess    spdep
  residuals.lagmess        spdep
  deviance.lagmess         spdep
  coef.lagmess             spdep
  fitted.lagmess           spdep
  logLik.lagmess           spdep
  fitted.SFResult          spdep
  print.SFResult           spdep
  fitted.ME_res            spdep
  print.ME_res             spdep
  print.lagImpact          spdep
  plot.lagImpact           spdep
  summary.lagImpact      

In [6]:
summary(fixed_lag)

Spatial panel fixed effects error model
 

Call:
spml(formula = HR ~ RD + PS, data = nat, listw = lwnat, model = "within", 
    effect = "individual", lag = FALSE, spatial.error = "b")

Residuals:
      Min.    1st Qu.     Median    3rd Qu.       Max. 
-27.238335  -1.600550  -0.097525   1.304874  48.048799 

Spatial error parameter:
    Estimate Std. Error t-value  Pr(>|t|)    
rho 0.194346   0.016025  12.127 < 2.2e-16 ***

Coefficients:
   Estimate Std. Error t-value  Pr(>|t|)    
RD  0.86979    0.17180  5.0627 4.133e-07 ***
PS -2.96607    0.54448 -5.4475 5.107e-08 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1


In [21]:
betas

array([[ 0.86979232],
       [-2.96606744],
       [ 0.19434604]])

In [23]:
vm

array([[3.01798911e-02, 2.72509296e-02, 0.00000000e+00],
       [2.72509296e-02, 3.03122177e-01, 0.00000000e+00],
       [0.00000000e+00, 0.00000000e+00, 2.56811350e-04]])