In [1]:
import numpy as np
import scipy as sp
import pandas as pd
import geopandas as gp
import pysal as ps
import matplotlib
import matplotlib.pyplot as plt
%pylab inline
from gwr.sel_bw import Sel_BW
from gwr.gwr import GWR
from spglm.family import Gaussian

Populating the interactive namespace from numpy and matplotlib


In [22]:
#Data

data = ps.open(ps.examples.get_path('GData_utm.csv'))
shp = gp.read_file('/Users/toshan/dev/pysal/pysal/examples/georgia/G_utm.shp')


# Prep data into design matrix and coordinates

#Dependent variable
y = data.PctBach.reshape((-1,1))

#Design matrix - covariates - intercept added automatically
pov = data.PctPov.reshape((-1,1))
rural = data.PctRural.reshape((-1,1))
blk = data.PctBlack.reshape((-1,1))
X = np.hstack([pov, rural, blk])
labels = ['Intercept', 'PctPov', 'PctRural', 'PctAfricanAmer']

#Coordinates for calibration points
u = shp.X
v = shp.Y
coords = zip(u,v)

AttributeError: 'csvWrapper' object has no attribute 'PctBach'

In [3]:
#Find optimal bandwidth using golden section search to minimize AICc

#Instantiate bandwidth selection class - bisquare NN (adaptive)
bw = Sel_BW(coords, y, X, kernel='bisquare', fixed=False)

#Find optimal bandwidth by minimizing AICc using golden section search algorithm
bw = bw.search(search='golden_section', criterion='AICc')
print bw

100.0


In [17]:
#Instantiate GWR model and then estimate parameters and diagnostics using fit method
model = GWR(coords, y, X, 46, family=Gaussian(), fixed=False, kernel='bisquare')
results = model.fit()

In [21]:
coords

[(824645.5, 3864805),
 (797981.7, 3872640),
 (777040.1, 3858779),
 (739255.8, 3866604),
 (707834.3, 3854188),
 (686891.4, 3855274),
 (670055.9, 3862318),
 (658870.4, 3842167),
 (635964.3, 3854592),
 (732702.3, 3844809),
 (818917.1, 3839931),
 (796905.6, 3841086),
 (772600.0, 3832429),
 (842085.9, 3827075),
 (695325.1, 3822135),
 (764386.1, 3812502),
 (662257.4, 3789664),
 (653026.6, 3813760),
 (845701.3, 3813323),
 (733846.7, 3812828),
 (870749.9, 3810303),
 (794419.5, 3803344),
 (819317.3, 3807616),
 (734240.9, 3794110),
 (699011.5, 3793408),
 (765397.3, 3789005),
 (879541.2, 3785425),
 (815753.1, 3783949),
 (848638.8, 3785405),
 (845701.3, 3813323),
 (733728.4, 3733248),
 (772634.6, 3764306),
 (803747.1, 3769623),
 (668031.4, 3764766),
 (863291.8, 3756777),
 (724646.8, 3757187),
 (695329.2, 3758093),
 (832508.6, 3762905),
 (891228.5, 3749769),
 (919396.5, 3752562),
 (830735.9, 3750903),
 (759231.9, 3735253),
 (800384.3, 3742691),
 (665933.8, 3740622),
 (825920.1, 3717990),
 (678778.6

In [19]:
def corr(cov):
    sd = np.diag(np.sqrt(np.diag(cov)))
    try:
        invsd = np.linalg.inv(sd)
        cors = np.dot(np.dot(invsd, cov), invsd)
    except:
        cors = np.zeros((sd.shape))
    return cors

def local_MC(gwr):
    x = gwr.X
    w = gwr.W 
    nvar = x.shape[1]
    print nvar
    nrow = len(w)
    if nvar > 3:
        corr_mat = np.ndarray((nrow, int(sp.special.factorial(nvar-1))))
    else:
        corr_mat = np.ndarray((nrow, nvar))
    print corr_mat.shape
    vifs_mat = np.ndarray((nrow, nvar-1))
    vdp_idx = np.ndarray((nrow, nvar))
    vdp_pi = np.ndarray((nrow, nvar, nvar))

    for i in range(nrow):
        wi = w[i]
        sw = np.sum(wi)
        wi = wi/sw
        tag = 0
        for j in range(nvar-1):
            for k in range(j+1, nvar):
                #print j,k
                corr_mat[i, tag] = corr(np.cov(x[:,j], x[:, k], aweights=wi))[0][1]
                tag = tag + 1
        corr_mati = corr(np.cov(x[:,1:].T, aweights=wi))
        vifs_mat[i,] = np.diag(np.linalg.solve(corr_mati, np.identity((nvar-1))))
        xw = x * wi.reshape((nrow,1))
        sxw = np.sqrt(np.sum(xw**2, axis=0))
        sxw = np.transpose(xw.T / sxw.reshape((nvar,1))) 
        svdx = np.linalg.svd(sxw)    
        vdp_idx[i,] = svdx[1][0]/svdx[1]

        phi = np.dot(svdx[2].T, np.diag(1/svdx[1]))

        phi = np.transpose(phi**2)
        pi_ij = phi / np.sum(phi, axis=0)
        vdp_pi[i,:,:] = pi_ij
    local_CN = vdp_idx[:, nvar-1]
    VDP = vdp_pi[:,nvar-1,:]
    return corr_mat, vifs_mat, local_CN, VDP
    
a, b, c, d = local_MC(results)

4
(172, 6)


In [20]:
c

array([20.46002317, 21.79172227, 19.48299078, 14.06058525, 11.47436374,
       11.85960676, 12.73049122, 12.16855332, 13.08327828, 12.89615792,
       17.61796233, 17.23103585, 16.13549261, 18.08386457, 10.10926828,
       12.36797785, 10.1586033 , 11.90945586, 16.80127275, 11.81173485,
       19.67235375, 11.88163437, 13.92711409, 10.28076355,  9.52535867,
        9.4481803 , 16.64381618, 10.48250123, 15.12967745, 16.80127275,
       10.18220271,  7.97155658,  8.54593105,  9.37466895, 15.97257809,
        9.35812255,  8.92003217, 11.47812055, 14.29596366, 15.05281651,
       11.03270176, 10.13807811,  7.4729687 ,  9.19325296, 12.51198521,
        9.40075148,  9.06731575,  9.26448661, 15.36513344, 15.6539687 ,
       17.45650684,  9.443149  , 17.23373273, 16.98683503, 16.37740105,
       13.51071938, 15.82753907,  9.26448661, 12.67378512, 15.61135984,
        9.26448661, 15.7541553 , 10.74584587, 16.09300478, 17.79789602,
       15.82753907, 10.21511948, 15.80106427, 14.48443503, 14.07

In [242]:
from scipy.stats import pearsonr

print pearsonr(inc, crime)[0]
print pearsonr(inc, plumb)[0]
print pearsonr(inc, op)[0]

print pearsonr(crime, plumb)[0]
print pearsonr(crime, op)[0]

print pearsonr(plumb, op)[0]



[-0.69558977]
[-0.25609056]
[0.15321546]
[0.43269397]
[-0.06526484]
[0.19200661]


In [247]:
print np.mean(a[:, 0])

print np.mean(a[:, 1])

print np.mean(a[:, 2])

print np.mean(a[:, 3])

print np.mean(a[:, 4])

print np.mean(a[:, 5])

-0.6651485730332158
-0.15337212082835333
0.1638505688372199
0.3755214471538651
-0.05501956725183264
0.21024044463298272


In [195]:
sp.special.factorial(2)

array(2.)

In [None]:
inc = np.array(data.by_col['INC']).reshape((-1,1))
crime = np.array(data.by_col['CRIME']).reshape((-1,1))
plumb = np.array(data.by_col['PLUMB']).reshape((-1,1))
op = np.array(data.by_col['OPEN']).reshape((-1,1))
