In [1]:
import numpy as np
import scipy as sp
import pandas as pd
import geopandas as gp
import pysal as ps
import matplotlib
import matplotlib.pyplot as plt
%pylab inline
from gwr.sel_bw import Sel_BW
from gwr.gwr import GWR
from spglm.family import Gaussian

Populating the interactive namespace from numpy and matplotlib


In [8]:
#Data

data = ps.open(ps.examples.get_path('GData_utm.csv'))
shp = gp.read_file('/Users/toshan/dev/pysal/pysal/examples/georgia/G_utm.shp')


# Prep data into design matrix and coordinates

y = np.array(data.by_col['PctBach']).reshape((-1,1))
pov = np.array(data.by_col['PctPov']).reshape((-1,1))
rural = np.array(data.by_col['PctRural']).reshape((-1,1))
black = np.array(data.by_col['PctBlack']).reshape((-1,1))
X = np.hstack([rural, pov, black])


#Coordinates for calibration points
u = data.by_col['X']
v = data.by_col['Y']
coords = zip(u,v)

In [9]:
#Find optimal bandwidth using golden section search to minimize AICc

#Instantiate bandwidth selection class - bisquare NN (adaptive)
bw = Sel_BW(coords, y, X, kernel='bisquare', fixed=False)

#Find optimal bandwidth by minimizing AICc using golden section search algorithm
bw = bw.search(search='golden_section', criterion='AICc')
print bw

93.0


In [10]:
#Instantiate GWR model and then estimate parameters and diagnostics using fit method
model = GWR(coords, y, X, 90, family=Gaussian(), fixed=False, kernel='bisquare')
results = model.fit()

In [96]:
def corr(cov):
    sd = np.diag(np.sqrt(np.diag(cov)))
    try:
        invsd = np.linalg.inv(sd)
        cors = np.dot(np.dot(invsd, cov), invsd)
    except:
        cors = np.zeros((sd.shape))
    return cors

def local_MC(gwr):
    x = gwr.X
    w = gwr.W 
    nvar = x.shape[1]
    print nvar
    nrow = len(w)
    if nvar > 3:
        corr_mat = np.ndarray((nrow, int(sp.special.factorial(nvar-1))))
    else:
        corr_mat = np.ndarray((nrow, nvar))
    print corr_mat.shape
    if gwr.model.constant:
        vifs_mat = np.ndarray((nrow, nvar-1))
    else: 
        vifs_mat = np.ndarray((nrow, nvar))
    vdp_idx = np.ndarray((nrow, nvar))
    vdp_pi = np.ndarray((nrow, nvar, nvar))

    for i in range(nrow):
        wi = w[i]
        sw = np.sum(wi)
        wi = wi/sw
        tag = 0
        for j in range(nvar-1):
            for k in range(j+1, nvar):
                #print j,k
                corr_mat[i, tag] = corr(np.cov(x[:,j], x[:, k], aweights=wi))[0][1]
                tag = tag + 1
        if gwr.model.constant:
            corr_mati = corr(np.cov(x[:,1:].T, aweights=wi))
            vifs_mat[i,] = np.diag(np.linalg.solve(corr_mati, np.identity((nvar-1))))

        else:
            corr_mati = corr(np.cov(x.T, aweights=wi))
            vifs_mat[i,] = np.diag(np.linalg.solve(corr_mati, np.identity((nvar))))
        xw = x * wi.reshape((nrow,1))
        sxw = np.sqrt(np.sum(xw**2, axis=0))
        sxw = np.transpose(xw.T / sxw.reshape((nvar,1))) 
        svdx = np.linalg.svd(sxw)    
        vdp_idx[i,] = svdx[1][0]/svdx[1]

        phi = np.dot(svdx[2].T, np.diag(1/svdx[1]))

        phi = np.transpose(phi**2)
        pi_ij = phi / np.sum(phi, axis=0)
        vdp_pi[i,:,:] = pi_ij
    local_CN = vdp_idx[:, nvar-1]
    VDP = vdp_pi[:,nvar-1,:]
    return corr_mat, vifs_mat, local_CN, VDP
    
a, b, c, d = local_MC(results)

4
(159, 6)


In [94]:
b[0:5]

array([[1.43813463, 1.08723815, 1.39045659],
       [1.56773558, 1.08332428, 1.58317025],
       [1.44499911, 1.09091733, 1.42425801],
       [1.91099558, 1.05029384, 1.84397695],
       [2.45049987, 1.08611463, 2.3423565 ]])

In [97]:
b[0:5]

array([[1.43813463, 1.08723815, 1.39045659],
       [1.56773558, 1.08332428, 1.58317025],
       [1.44499911, 1.09091733, 1.42425801],
       [1.91099558, 1.05029384, 1.84397695],
       [2.45049987, 1.08611463, 2.3423565 ]])

In [34]:
a,b,c,d = results.local_collinearity
d

array([[7.22345418e-01, 1.14617116e-05, 8.80525858e-01, 7.64077333e-02],
       [6.36658247e-01, 3.53980425e-04, 9.19002873e-01, 2.18737741e-01],
       [6.96514704e-01, 6.61678745e-05, 8.86703285e-01, 9.95814584e-02],
       [3.57213371e-01, 3.32953457e-03, 9.80840340e-01, 5.19864183e-01],
       [6.23938304e-02, 6.53836889e-03, 9.53529078e-01, 7.87852654e-01],
       [6.22648468e-02, 7.46462142e-01, 6.32379908e-01, 5.92369796e-01],
       [3.32294063e-03, 4.34807279e-01, 9.08489331e-01, 6.49288131e-01],
       [2.25194770e-05, 6.67988790e-01, 8.48438094e-01, 5.38494109e-01],
       [5.22945137e-01, 2.25836383e-02, 9.57591012e-01, 2.80013463e-01],
       [6.44358454e-01, 6.75831140e-05, 9.35236863e-01, 2.87223197e-01],
       [1.25718948e-02, 2.66976762e-02, 8.70404681e-01, 8.89607464e-01],
       [5.82829512e-01, 2.85225705e-02, 8.01582507e-01, 6.76451853e-02],
       [6.80610085e-01, 2.27730411e-03, 8.61587906e-01, 4.86525960e-02],
       [6.48477685e-01, 8.10735726e-04, 9.46885850e

In [35]:
pd.DataFrame(c).to_csv('local_cn.csv', index=False)

TypeError: to_csv() got an unexpected keyword argument 'dtype'

In [28]:
type(d[0][0])

numpy.float64

In [26]:
type(np.array(pd.read_csv('local_vdp.csv'))[0][0])

numpy.float64

In [36]:
np.array(ps.open('local_vdp.csv'), dtype=np.flo)

array([['0.7223454176524194', '1.1461711610046152e-05', '0.880525857868',
        '0.0764077332661'],
       ['0.6366582467490418', '0.00035398042519544986', '0.919002872741',
        '0.218737740988'],
       ['0.6965147040612314', '6.616787447927173e-05', '0.88670328481',
        '0.0995814583961'],
       ['0.35721337148597687', '0.003329534567610654', '0.980840339752',
        '0.519864182679'],
       ['0.06239383041047655', '0.006538368885290033', '0.953529078461',
        '0.787852654023'],
       ['0.06226484676515529', '0.7464621420141199', '0.632379907694',
        '0.5923697962'],
       ['0.0033229406337113914', '0.43480727891055093', '0.908489330504',
        '0.649288130647'],
       ['2.2519477021455274e-05', '0.6679887899361866', '0.848438094318',
        '0.538494109433'],
       ['0.5229451371361832', '0.022583638274960495', '0.957591012179',
        '0.280013463499'],
       ['0.644358454183175', '6.758311402724356e-05', '0.935236863401',
        '0.28722319745'],
  