## Spatial Random Effects Panel Model

(KKP 2007)

In [7]:
%load_ext autoreload
%autoreload 2

import numpy as np
import numpy.linalg as la
from scipy import sparse as sp
from scipy.sparse.linalg import splu as SuperLU
from utils import RegressionPropsY, RegressionPropsVM, inverse_prod, set_warn
from sputils import spdot, spfill_diagonal, spinv, spbroadcast
try:
    from scipy.optimize import minimize_scalar
    minimize_scalar_available = True
except ImportError:
    minimize_scalar_available = False

import ols as OLS
from utils import optim_moments, RegressionPropsY, get_spFilter
from utils import spdot, set_warn
from panel_utils import check_panel, demean_panel
import user_output as USER
import summary_output as SUMMARY
import regimes as REGI
from libpysal import weights

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Read data

In [8]:
# First import libpysal to load the spatial analysis tools.
import libpysal
from libpysal.examples import load_example
from libpysal.weights import Queen

# Open data on NCOVR US County Homicides (3085 areas).
nat = load_example('Natregimes')
db = libpysal.io.open(nat.get_path('natregimes.dbf'),'r')
nat_shp = libpysal.examples.get_path("natregimes.shp")
w = Queen.from_shapefile(nat_shp)
w.transform = 'r'

name_y = ['HR70','HR80','HR90']
y = np.array([db.by_col(name) for name in name_y]).T

name_x = ['RD70','RD80','RD90','PS70','PS80','PS90']
x = np.array([db.by_col(name) for name in name_x]).T

method = "full"
epsilon = 0.0000001

In [9]:
n_rows = USER.check_arrays(y, x)
bigy, bigx, name_y, name_x = check_panel(y, x, w, name_y, name_x)

Similarly, assuming x[:, 0:T] refers to T periods of k1, x[:, T+1:2T] refers to k2, etc.


In [10]:
n = w.n
t = bigy.shape[0] // n
k = bigx.shape[1]
# Demeaned variables
y = demean_panel(bigy, n, t)
x = demean_panel(bigx, n, t)
# Lag dependent variable
W = np.kron(np.identity(t), w.full()[0])
Wsp = sp.kron(sp.identity(t), w.sparse)
ylag = spdot(W, y)

In [11]:
# b0, b1, e0 and e1
xtx = spdot(x.T, x)
xtxi = la.inv(xtx)
xty = spdot(x.T, y)
xtyl = spdot(x.T, ylag)
b0 = spdot(xtxi, xty)
b1 = spdot(xtxi, xtyl)
e0 = y - spdot(x, b0)
e1 = ylag - spdot(x, b1)
methodML = method.upper()

In [12]:
def lag_c_loglik(rho, n, t, e0, e1, W):
    # concentrated log-lik for lag model, no constants, brute force
    er = e0 - rho * e1
    sig2 = spdot(er.T, er) / (n*t)
    nlsig2 = (n*t / 2.0) * np.log(sig2)
    a = -rho * W
    spfill_diagonal(a, 1.0)
    jacob = t * np.log(np.linalg.det(a))
    # this is the negative of the concentrated log lik for minimization
    clik = nlsig2 - jacob
    return clik

In [13]:
res = minimize_scalar(lag_c_loglik, 0.0, bounds=(-1.0, 1.0),
                      args=(n, t, e0, e1, W), method='bounded',
                      tol=epsilon)

rho = res.x[0][0]
rho



0.10241610744076503

In [14]:
# compute full log-likelihood, including constants
ln2pi = np.log(2.0 * np.pi)
llik = -res.fun - (n*t) / 2.0 * ln2pi - (n*t) / 2.0
logll = llik[0][0]

# b, residuals and predicted values

b = b0 - rho * b1
betas = np.vstack((b, rho))   # rho added as last coefficient
u = e0 - rho * e1
predy = y - u

xb = spdot(x, b)

predy_e = inverse_prod(
    sp.csr_matrix(W), xb, rho, inv_method="power_exp", threshold=epsilon)
e_pred = y - predy_e
sig2 = spdot(u.T, u)

In [15]:
# information matrix
# if w should be kept sparse, how can we do the following:
a = -rho * W
spfill_diagonal(a, 1.0)
ai = spinv(a)
wai = spdot(W, ai)
tr1 = wai.diagonal().sum() #same for sparse and dense

wai2 = spdot(wai, wai)
tr2 = wai2.diagonal().sum()

waiTwai = spdot(wai.T, wai)
tr3 = waiTwai.diagonal().sum()
### to here

wpredy = W @ predy_e
wpyTwpy = spdot(wpredy.T, wpredy)
xTwpy = spdot(x.T, wpredy)

# order of variables is beta, rho, sigma2

v1 = np.vstack(
    (xtx / sig2, xTwpy.T / sig2, np.zeros((1, k))))
v2 = np.vstack(
    (xTwpy / sig2, t*(tr2 + tr3) + wpyTwpy / sig2, t * tr1 / sig2))
v3 = np.vstack(
    (np.zeros((k, 1)), t * tr1 / sig2, n * t / (2.0 * sig2 ** 2)))

v = np.hstack((v1, v2, v3))

vm1 = la.inv(v)  # vm1 includes variance for sigma2
vm = vm1[:-1, :-1]  # vm is for coefficients only

In [16]:
print(v1.shape)
print(v2.shape)
print(v3.shape)
print(v.shape)

(4, 2)
(4, 1)
(4, 1)
(4, 4)


In [17]:
vm

array([[ 2.42947683e+02,  2.03673981e+02, -2.42252432e-05],
       [ 2.03673981e+02,  2.26568340e+03,  1.45645572e-04],
       [-2.42252432e-05,  1.45645573e-04,  9.36462823e-05]])

In [18]:
betas

array([[ 0.80898512],
       [-2.69017237],
       [ 0.10241611]])

In [49]:
betas

array([[3.6644243 ],
       [1.34296953],
       [0.22554397]])

In [60]:
n_rows = USER.check_arrays(y, x)
bigy, bigx, name_y, name_x = check_panel(y, x, w, name_y, name_x)
USER.check_weights(w, bigy, w_required=True, time=True)
x_constant, name_x, warn = USER.check_constant(bigx,name_x)
# set_warn(warn)
title = "GM SPATIAL ERROR PANEL MODEL - RANDOM EFFECTS (KKP)"
name_x = USER.set_name_x(name_x, x_constant)
regimes=None
name_regimes=None
if regimes is not None:
    regimes = regimes
    name_regimes = USER.set_name_ds(name_regimes)
    regimes_l = _set_regimes(w, bigy.shape[0])
    name_x_r = name_x
    x_constant, name_x = REGI.Regimes_Frame.__init__(x_constant, regimes_l, constant_regi=False, 
                                                          cols2regi='all', names=name_x)


Similarly, assumingx[:, 0:T] refers to T periods of k1, x[:, T+1:2T] refersto k2, etc.


In [3]:
def _moments_kkp(ws, u, i, trace_w2=None):
    '''
    Compute G and g matrices for the KKP model.
    ...

    Parameters
    ----------

    ws          : Sparse matrix
                  Spatial weights sparse matrix   

    u           : array
                  Residuals. nx1 array assumed to be aligned with w
    
    i		    : integer
                  0 if Q0, 1 if Q1
    trace_w2    : float
                  trace of WW. Computed in 1st step and saved for step 2.

    Returns
    -------

    moments     : list
                  List of two arrays corresponding to the matrices 'G' and
                  'g', respectively.
    trace_w2    : float
                  trace of WW. Computed in 1st step and saved for step 2.

    '''
    N = ws.shape[0]
    T = u.shape[0]//N
    if i == 0:
        Q = SP.kron(SP.identity(T) - np.ones((T,T))/T,SP.identity(N))
    else:
        Q = SP.kron(np.ones((T,T))/T,SP.identity(N))
    Tw = SP.kron(SP.identity(T),ws)
    ub = Tw.dot(u)
    ubb = Tw.dot(ub)
    Qu = Q.dot(u)
    Qub = Q.dot(ub)
    Qubb = Q.dot(ubb)
    G11 = float(2*np.dot(u.T,Qub))
    G12 = float(-np.dot(ub.T,Qub))
    G21 = float(2*np.dot(ubb.T,Qub))
    G22 = float(-np.dot(ubb.T,Qubb))
    G31 = float(np.dot(u.T,Qubb)+np.dot(ub.T,Qub))
    G32 = float(-np.dot(ub.T,Qubb))
    if trace_w2 == None:
        trace_w2 = (ws.power(2)).sum()
    G23 = ((T-1)**(1-i))*trace_w2
    if i == 0:
        G = np.array([[G11,G12,N*(T-1)**(1-i)],[G21,G22,G23],[G31,G32,0]])/(N*(T-1)**(1-i))
    else:
        G = np.array([[G11,G12,0,N*(T-1)**(1-i)],[G21,G22,0,G23],[G31,G32,0,0]])/(N*(T-1)**(1-i))
    g1 = float(np.dot(u.T,Qu))
    g2 = float(np.dot(ub.T,Qub))    
    g3 = float(np.dot(u.T,Qub))
    g = np.array([[g1,g2,g3]]).T / (N*(T-1)**(1-i))                            
    return [G, g], trace_w2

In [4]:
def _get_Tau(ws, trace_w2):
    '''
    Computes Tau as in :cite:`KKP2007`.
    ...
    
    Parameters
    ----------
    ws          : Sparse matrix
                  Spatial weights sparse matrix   
    trace_w2    : float
                  trace of WW. Computed in 1st step of _moments_kkp
    '''
    N = ws.shape[0]
    T12 = 2*trace_w2/N
    wtw = ws.T.dot(ws)
    T22 = wtw.power(2).sum()
    wtpw = ws.T + ws
    T23 = wtw.multiply(wtpw).sum()
    d_wwpwtw = ws.multiply(ws.T).sum(0)+wtw.diagonal()
    T33 = d_wwpwtw.sum()
    Tau = np.array([[2*N,T12,0],[T12,T22,T23],[0,T23,T33]])/N
    return Tau

In [19]:
ols = OLS.BaseOLS(y=y, x=x)
x, y, n, k, xtx = ols.x, ols.y, ols.n, ols.k, ols.xtx
N = w.n
T = y.shape[0]//N

In [20]:
moments, trace_w2 = _moments_kkp(w.sparse, ols.u, 0)
lambda1, sig_v = optim_moments(moments, all_par=True)

TypeError: only size-1 arrays can be converted to Python scalars

In [22]:
x.shape

(3085, 6)

In [36]:
N = w.n
T = bigy.shape[0] // N
k = bigx.shape[1]
# Demeaned variables
ymean = bigy.reshape((N, T)).mean(axis=1)
bigy_dm = bigy - np.tile(ymean, T)[:, None]
xmean = bigx[:, 0].reshape((N, T)).mean(axis=1)[:, None]
for i in range(1, k):
    xmeani = bigx[:, i].reshape((N, T)).mean(axis=1)
    xmean = np.hstack((xmean, xmeani[:, None]))
bigx_dm = bigx - np.tile(xmean, (T, 1))
# Lag dependent variable
bigW = np.kron(np.identity(T), w.full()[0])
bigylag = bigW @ bigy