## Geographically Weighted Correlation Coefficient

In [1]:
import spglm
import numpy as np
import pandas as pd
import geopandas as gpd
import libpysal as ps
from mgwr.gwr import GWR, MGWR
from mgwr.sel_bw import Sel_BW

#### Prepare the Georgia dataset inputs

In [2]:
georgia_data = pd.read_csv(ps.examples.get_path('GData_utm.csv'))
georgia_shp = gpd.read_file(ps.examples.get_path('G_utm.shp'))

### Selecting just a single variable for GWR model i.e PctFB

In [3]:
X = georgia_data['PctFB'].values.reshape(-1,1)
y = georgia_data['PctBach'].values.reshape(-1,1) 
u = georgia_data['X']
v = georgia_data['Y']
g_coords = list(zip(u,v))

#### Standardization Routine

In [4]:
X = (X - X.mean(axis=0)) / X.std(axis=0)

y = (y - y.mean(axis=0)) / y.std(axis=0)

In [5]:
gwr_selector = Sel_BW(g_coords, y, X)
gwr_selector
gwr_bw = gwr_selector.search()
print(gwr_bw)
gwr_results = GWR(g_coords, y, X, gwr_bw).fit()

116.0


In [6]:
gwr_results.summary()

Model type                                                         Gaussian
Number of observations:                                                 159
Number of covariates:                                                     2

Global Regression Results
---------------------------------------------------------------------------
Residual sum of squares:                                             87.210
Log-likelihood:                                                    -177.864
AIC:                                                                359.729
AICc:                                                               361.883
BIC:                                                               -708.608
R2:                                                                   0.452
Adj. R2:                                                              0.448

Variable                              Est.         SE  t(Est/SE)    p-value
------------------------------- ---------- ---------- ------

### Generate pearson's correlation coefficent

In [7]:
from scipy.stats import pearsonr

corr, u = pearsonr(X.flatten(), y.flatten())
print(f'{corr:.2f}') # to 2 d.p

0.67


<p> Here we can see that the correlation coefficient is equal to the slope/coefficient for a single variable GWR after standardization and the intercept is zero. But If we comment out the standardization routine we'll see that the results becomes different and we have an intercept<p/>

### Selecting a Multiple variables for GWR i.e all the columns -- 'PctFB', 'PctBlack', 'PctRural'

In [1]:
import spglm
import numpy as np
import pandas as pd
import geopandas as gpd
import libpysal as ps
from mgwr.gwr import GWR, MGWR
from mgwr.sel_bw import Sel_BW

In [2]:
georgia_data = pd.read_csv(ps.examples.get_path('GData_utm.csv'))
georgia_shp = gpd.read_file(ps.examples.get_path('G_utm.shp'))

In [3]:
georgia_data.head()

Unnamed: 0,AreaKey,Latitude,Longitud,TotPop90,PctRural,PctBach,PctEld,PctFB,PctPov,PctBlack,ID,X,Y
0,13001,31.75339,-82.28558,15744,75.6,8.2,11.43,0.64,19.9,20.76,133,941396.6,3521764.0
1,13003,31.29486,-82.87474,6213,100.0,6.4,11.77,1.58,26.0,26.86,158,895553.0,3471916.0
2,13005,31.55678,-82.45115,9566,61.7,6.6,11.11,0.27,24.1,15.42,146,930946.4,3502787.0
3,13007,31.33084,-84.45401,3615,100.0,9.4,13.17,0.11,24.8,51.67,155,745398.6,3474765.0
4,13009,33.07193,-83.25085,39530,42.7,13.3,8.64,1.43,17.5,42.39,79,849431.3,3665553.0


In [4]:
g_y = georgia_data['PctBach'].values.reshape((-1,1))
g_X = georgia_data[['PctFB', 'PctBlack', 'PctRural']].values
u = georgia_data['X']
v = georgia_data['Y']


g_coords = list(zip(u,v))

In [5]:
g_X = (g_X - g_X.mean(axis=0)) / g_X.std(axis=0)

g_y = g_y.reshape((-1,1))

g_y = (g_y - g_y.mean(axis=0)) / g_y.std(axis=0)

In [6]:
gwr_selector = Sel_BW(g_coords, g_y, g_X)
gwr_bw = gwr_selector.search()
print(gwr_bw)
gwr_results = GWR(g_coords, g_y, g_X, gwr_bw).fit()

117.0


In [7]:
gwr_results.summary()

Model type                                                         Gaussian
Number of observations:                                                 159
Number of covariates:                                                     4

Global Regression Results
---------------------------------------------------------------------------
Residual sum of squares:                                             71.793
Log-likelihood:                                                    -162.399
AIC:                                                                332.798
AICc:                                                               335.191
BIC:                                                               -713.887
R2:                                                                   0.548
Adj. R2:                                                              0.540

Variable                              Est.         SE  t(Est/SE)    p-value
------------------------------- ---------- ---------- ------

In [8]:
from scipy.stats import pearsonr

corr, u = pearsonr(g_X[:,0], g_y.flatten())

print(f'{corr:.2f}') # to 2 d.p

0.67


As we can see from the results above the correlation coefficient for X2 is -0.62 which is different from its slope/coefficient of -0.084 even after standardizing the data. We have the intercept of 0 but that's about as much as we get. 