In [1]:
import numpy as np
import scipy as sp
import pandas as pd
import geopandas as gp
import pysal as ps
import matplotlib
import matplotlib.pyplot as plt
%pylab inline
from gwr.sel_bw import Sel_BW
from gwr.gwr import GWR
from spglm.family import Gaussian

Populating the interactive namespace from numpy and matplotlib


In [24]:
#Data

data = ps.open(ps.examples.get_path('GData_utm.csv'))
shp = gp.read_file('/Users/toshan/dev/pysal/pysal/examples/georgia/G_utm.shp')


# Prep data into design matrix and coordinates

y = np.array(data.by_col['PctBach']).reshape((-1,1))
pov = np.array(data.by_col['PctPov']).reshape((-1,1))
rural = np.array(data.by_col['PctRural']).reshape((-1,1))
black = np.array(data.by_col['PctBlack']).reshape((-1,1))
X = np.hstack([pov, rural, black])


#Coordinates for calibration points
u = data.by_col['X']
v = data.by_col['Y']
coords = zip(u,v)

In [25]:
#Find optimal bandwidth using golden section search to minimize AICc

#Instantiate bandwidth selection class - bisquare NN (adaptive)
bw = Sel_BW(coords, y, X, kernel='bisquare', fixed=False)

#Find optimal bandwidth by minimizing AICc using golden section search algorithm
bw = bw.search(search='golden_section', criterion='AICc')
print bw

93.0


In [95]:
#Instantiate GWR model and then estimate parameters and diagnostics using fit method
model = GWR(coords, y, X, 93, family=Gaussian(), fixed=False, kernel='bisquare')
results = model.fit()

In [96]:
def corr(cov):
    sd = np.diag(np.sqrt(np.diag(cov)))
    try:
        invsd = np.linalg.inv(sd)
        cors = np.dot(np.dot(invsd, cov), invsd)
    except:
        cors = np.zeros((sd.shape))
    return cors

def local_MC(gwr):
    x = gwr.X
    w = gwr.W 
    nvar = x.shape[1]
    print nvar
    nrow = len(w)
    if nvar > 3:
        corr_mat = np.ndarray((nrow, int(sp.special.factorial(nvar-1))))
    else:
        corr_mat = np.ndarray((nrow, nvar))
    print corr_mat.shape
    if gwr.model.constant:
        vifs_mat = np.ndarray((nrow, nvar-1))
    else: 
        vifs_mat = np.ndarray((nrow, nvar))
    vdp_idx = np.ndarray((nrow, nvar))
    vdp_pi = np.ndarray((nrow, nvar, nvar))

    for i in range(nrow):
        wi = w[i]
        sw = np.sum(wi)
        wi = wi/sw
        tag = 0
        for j in range(nvar-1):
            for k in range(j+1, nvar):
                #print j,k
                corr_mat[i, tag] = corr(np.cov(x[:,j], x[:, k], aweights=wi))[0][1]
                tag = tag + 1
        if gwr.model.constant:
            corr_mati = corr(np.cov(x[:,1:].T, aweights=wi))
            vifs_mat[i,] = np.diag(np.linalg.solve(corr_mati, np.identity((nvar-1))))

        else:
            corr_mati = corr(np.cov(x.T, aweights=wi))
            vifs_mat[i,] = np.diag(np.linalg.solve(corr_mati, np.identity((nvar))))
        xw = x * wi.reshape((nrow,1))
        sxw = np.sqrt(np.sum(xw**2, axis=0))
        sxw = np.transpose(xw.T / sxw.reshape((nvar,1))) 
        svdx = np.linalg.svd(sxw)    
        vdp_idx[i,] = svdx[1][0]/svdx[1]

        phi = np.dot(svdx[2].T, np.diag(1/svdx[1]))

        phi = np.transpose(phi**2)
        pi_ij = phi / np.sum(phi, axis=0)
        vdp_pi[i,:,:] = pi_ij
    local_CN = vdp_idx[:, nvar-1]
    VDP = vdp_pi[:,nvar-1,:]
    return corr_mat, vifs_mat, local_CN, VDP
    
a, b, c, d = local_MC(results)

4
(159, 6)


In [94]:
b[0:5]

array([[1.43813463, 1.08723815, 1.39045659],
       [1.56773558, 1.08332428, 1.58317025],
       [1.44499911, 1.09091733, 1.42425801],
       [1.91099558, 1.05029384, 1.84397695],
       [2.45049987, 1.08611463, 2.3423565 ]])

In [97]:
b[0:5]

array([[1.43813463, 1.08723815, 1.39045659],
       [1.56773558, 1.08332428, 1.58317025],
       [1.44499911, 1.09091733, 1.42425801],
       [1.91099558, 1.05029384, 1.84397695],
       [2.45049987, 1.08611463, 2.3423565 ]])

In [242]:
from scipy.stats import pearsonr

print pearsonr(inc, crime)[0]
print pearsonr(inc, plumb)[0]
print pearsonr(inc, op)[0]

print pearsonr(crime, plumb)[0]
print pearsonr(crime, op)[0]

print pearsonr(plumb, op)[0]



[-0.69558977]
[-0.25609056]
[0.15321546]
[0.43269397]
[-0.06526484]
[0.19200661]


In [247]:
print np.mean(a[:, 0])

print np.mean(a[:, 1])

print np.mean(a[:, 2])

print np.mean(a[:, 3])

print np.mean(a[:, 4])

print np.mean(a[:, 5])

-0.6651485730332158
-0.15337212082835333
0.1638505688372199
0.3755214471538651
-0.05501956725183264
0.21024044463298272


In [195]:
sp.special.factorial(2)

array(2.)

In [None]:
inc = np.array(data.by_col['INC']).reshape((-1,1))
crime = np.array(data.by_col['CRIME']).reshape((-1,1))
plumb = np.array(data.by_col['PLUMB']).reshape((-1,1))
op = np.array(data.by_col['OPEN']).reshape((-1,1))
