https://github.com/pysal/mgwr/pull/60

In [1]:
import sys
sys.path.append("C:/Users/msachde1/Downloads/Research/Development/mgwr")

In [2]:
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import numpy as np

from mgwr.gwr import GWR
from spglm.family import Gaussian, Binomial, Poisson
from mgwr.gwr import MGWR
from mgwr.sel_bw import Sel_BW
import multiprocessing as mp
pool = mp.Pool()
from scipy import linalg
import numpy.linalg as la
from scipy import sparse as sp
from scipy.sparse import linalg as spla
from spreg.utils import spdot, spmultiply
from scipy import special
import libpysal as ps
import seaborn as sns
import matplotlib.pyplot as plt
from copy import deepcopy
import copy
from collections import namedtuple
import spglm

#### Clearwater data - downloaded from link: https://sgsup.asu.edu/sparc/multiscale-gwr

In [3]:
data_p = pd.read_csv("C:/Users/msachde1/Downloads/logistic_mgwr_data/landslides.csv") 

In [4]:
data_p.head()

Unnamed: 0,UserID,X,Y,Elev,Slope,SinAspct,CosAspct,AbsSouth,Landslid,DistStrm
0,1,616168.5625,5201076.5,1450.475,27.44172,0.409126,-0.912478,24.1499,1,8.506
1,2,624923.8125,5201008.5,1567.476,21.88343,-0.919245,-0.393685,66.816,1,15.561
2,3,615672.0,5199187.5,1515.065,38.8103,-0.535024,-0.844837,32.3455,1,41.238
3,4,615209.3125,5199112.0,1459.827,26.71631,-0.828548,-0.559918,55.9499,1,17.539
4,5,616354.6875,5198945.5,1379.442,27.55271,-0.872281,-0.489005,60.7248,1,35.023


### GWR Binomial model with independent variable, x = slope

In [5]:
coords = list(zip(data_p['X'],data_p['Y']))
y = np.array(data_p['Landslid']).reshape((-1,1)) 
elev = np.array(data_p['Elev']).reshape((-1,1))
slope = np.array(data_p['Slope']).reshape((-1,1))
SinAspct = np.array(data_p['SinAspct']).reshape(-1,1)
CosAspct = np.array(data_p['CosAspct']).reshape(-1,1)
X = np.hstack([elev,slope,SinAspct,CosAspct])
x = SinAspct

X_std = (X-X.mean(axis=0))/X.std(axis=0)
x_std = (x-x.mean(axis=0))/x.std(axis=0)
y_std = (y-y.mean(axis=0))/y.std(axis=0)

In [6]:
bw=Sel_BW(coords,y,x_std,family=Binomial(),constant=False).search()
gwr_mod=GWR(coords,y,x_std,bw=bw,family=Binomial(),constant=False).fit()
bw

100.0

##### Running the function with family = Binomial()

In [7]:
selector = Sel_BW(coords,y,x_std,family=Binomial(),multi=True,constant=False)
selector.search(verbose=True)

(239, 1)
Current iteration: 1 ,SOC: 0.0018522
Bandwidths: 50.0
(239, 1)
Current iteration: 2 ,SOC: 0.0090539
Bandwidths: 43.0
(239, 1)
Current iteration: 3 ,SOC: 0.0164077
Bandwidths: 43.0
(239, 1)
Current iteration: 4 ,SOC: 0.0277893
Bandwidths: 53.0
(239, 1)


Exception: one or more input arrays have missing/NaN values

In [8]:
mgwr_mod = MGWR(coords, y,x_std,selector,family=Binomial(),constant=False).fit()

HBox(children=(IntProgress(value=0, description='Inference', max=1), HTML(value='')))




#### Testing with random bw initialization

In [9]:
selector.search(verbose=True,init_multi=200)

Current iteration: 1 ,SOC: 0.0070803
Bandwidths: 100.0
Current iteration: 2 ,SOC: 0.0
Bandwidths: 100.0


array([100.])

#### Parameter check

In [10]:
mgwr_mod.aic, gwr_mod.aic

(319.4645202072096, 330.2296364641693)

In [11]:
np.sum(mgwr_mod.predy==gwr_mod.predy)

239

#### Multiple bandwidth check

In [12]:
bw=Sel_BW(coords,y,X_std,family=Binomial(),constant=True).search()
gwr_mod=GWR(coords,y,X_std,bw=bw,family=Binomial(),constant=True).fit()
bw

121.0

In [13]:
selector = Sel_BW(coords,y,X_std,family=Binomial(),multi=True,constant=True)
selector.search(verbose=True)

Current iteration: 1 ,SOC: 0.0031609
Bandwidths: 236.0, 235.0, 236.0, 238.0, 228.0
Current iteration: 2 ,SOC: 0.0004815
Bandwidths: 238.0, 235.0, 192.0, 238.0, 228.0
Current iteration: 3 ,SOC: 4.11e-05
Bandwidths: 238.0, 235.0, 192.0, 238.0, 228.0
Current iteration: 4 ,SOC: 3.4e-06
Bandwidths: 238.0, 235.0, 192.0, 238.0, 228.0


array([238., 235., 192., 238., 228.])

In [14]:
mgwr_mod = MGWR(coords, y,X_std,selector,family=Binomial(),constant=True).fit()

HBox(children=(IntProgress(value=0, description='Inference', max=1), HTML(value='')))




In [15]:
gwr_mod.aic, mgwr_mod.aic

(259.72499337648424, 261.98167514815555)

In [16]:
(gwr_mod.predy-mgwr_mod.predy)[:10]

array([[ 0.07645926],
       [ 0.10775708],
       [-0.01199206],
       [ 0.04968317],
       [ 0.05187675],
       [ 0.04308866],
       [ 0.05400206],
       [ 0.03465849],
       [ 0.05664243],
       [ 0.12148732]])

#### Global model check

In [17]:
selector=Sel_BW(coords,y,X_std,multi=True,family=Binomial(),constant=True)
selector.search(verbose=True,multi_bw_min=[239,239,239,239,239], multi_bw_max=[239,239,239,239,239])

Current iteration: 1 ,SOC: 0.0033277
Bandwidths: 239.0, 239.0, 239.0, 239.0, 239.0
Current iteration: 2 ,SOC: 0.0003028
Bandwidths: 239.0, 239.0, 239.0, 239.0, 239.0
Current iteration: 3 ,SOC: 1.82e-05
Bandwidths: 239.0, 239.0, 239.0, 239.0, 239.0
Current iteration: 4 ,SOC: 3.5e-06
Bandwidths: 239.0, 239.0, 239.0, 239.0, 239.0


array([239., 239., 239., 239., 239.])

In [18]:
mgwr_mod = MGWR(coords, y,X_std,selector,family=Binomial(),constant=True).fit()

HBox(children=(IntProgress(value=0, description='Inference', max=1), HTML(value='')))




In [19]:
gwr_mod.summary()

Model type                                                         Binomial
Number of observations:                                                 239
Number of covariates:                                                     5

Global Regression Results
---------------------------------------------------------------------------
Deviance:                                                           266.246
Log-likelihood:                                                    -133.123
AIC:                                                                276.246
AICc:                                                               276.504
BIC:                                                              -1015.246
Percent deviance explained:                                           0.182
Adj. percent deviance explained:                                      0.168

Variable                              Est.         SE  t(Est/SE)    p-value
------------------------------- ---------- ---------- ------

In [21]:
np.mean(mgwr_mod.params,axis=0)

array([ 0.39941768, -0.66123063,  0.59038645,  0.06518615, -0.32231525])