## Geographically Weighted Correlation Coefficient

### Import relevant dependencies

In [1]:
import pandas as pd
import numpy as np
import geopandas as gp
import libpysal as ps
from mgwr.gwr import GWR, MGWR, GWRResults
from mgwr.sel_bw import Sel_BW

In [2]:
georgia_data = pd.read_csv(ps.examples.get_path('GData_utm.csv'))
georgia_shp = gp.read_file(ps.examples.get_path('G_utm.shp'))

#### Prepare the Georgia dataset inputs

In [3]:
g_y = georgia_data['PctBach'].values.reshape((-1,1))
g_X = georgia_data[['PctFB', 'PctBlack', 'PctRural']].values
u = georgia_data['X']
v = georgia_data['Y']


g_coords = list(zip(u,v))

In [4]:
georgia_data.head()

Unnamed: 0,AreaKey,Latitude,Longitud,TotPop90,PctRural,PctBach,PctEld,PctFB,PctPov,PctBlack,ID,X,Y
0,13001,31.75339,-82.28558,15744,75.6,8.2,11.43,0.64,19.9,20.76,133,941396.6,3521764.0
1,13003,31.29486,-82.87474,6213,100.0,6.4,11.77,1.58,26.0,26.86,158,895553.0,3471916.0
2,13005,31.55678,-82.45115,9566,61.7,6.6,11.11,0.27,24.1,15.42,146,930946.4,3502787.0
3,13007,31.33084,-84.45401,3615,100.0,9.4,13.17,0.11,24.8,51.67,155,745398.6,3474765.0
4,13009,33.07193,-83.25085,39530,42.7,13.3,8.64,1.43,17.5,42.39,79,849431.3,3665553.0


#### Standardization Routine

I commented out the standardization routine here, because I will be doing it inside the _compute_betas_gwr()_ function of the `iwls.py`

In [5]:
# g_X = (g_X - g_X.mean(axis=0)) / g_X.std(axis=0)

# # g_y = g_y.reshape((-1,1))

# g_y = (g_y - g_y.mean(axis=0)) / g_y.std(axis=0)

#### Calibrate GWR model

In [7]:
gwr_selector = Sel_BW(g_coords, g_y, g_X)
gwr_bw = gwr_selector.search()
print(gwr_bw)
gwr_results = GWR(g_coords, g_y, g_X, gwr_bw).fit()

  x_stdz = (x - x.mean(axis=0)) / x.std(axis=0) # standardize the design matrix


ValueError: array must not contain infs or NaNs

In [6]:
gwr_selector = Sel_BW(g_coords, g_y, g_X)
gwr_bw = gwr_selector.search()
print(gwr_bw)
gwr_results = GWR(g_coords, g_y, g_X, gwr_bw).fit()

  x_stdz = (x - x.mean(axis=0)) / x.std(axis=0) # standardize the design matrix


[[            nan -3.98516575e-01 -3.82805697e-01  2.00641207e-01]
 [            nan  3.64222409e-01 -3.07649962e-02  1.10387586e+00]
 [            nan -6.98743622e-01 -6.90985589e-01 -3.13906405e-01]
 [            nan -8.28571534e-01  1.40105956e+00  1.10387586e+00]
 [            nan  2.42508742e-01  8.65496000e-01 -1.01724487e+00]
 [            nan -6.41943910e-01 -1.37948486e+00  1.10387586e+00]
 [            nan -1.71317729e-01 -9.20677718e-01 -2.06554745e-01]
 [            nan -2.52460174e-01 -1.04937456e+00  1.85834082e-01]
 [            nan -6.50058155e-01  2.27205812e-01 -8.58068268e-01]
 [            nan  4.77668734e-02 -9.10289632e-01 -1.47326243e-01]
 [            nan -5.77183053e-02  8.24520771e-01 -2.00191871e+00]
 [            nan -3.98516575e-01 -2.90467152e-01 -4.54574097e-01]
 [            nan -6.50058155e-01 -1.31657923e+00  1.10387586e+00]
 [            nan  5.10278811e-01  8.12401337e-01 -1.69536931e-01]
 [            nan -5.52687221e-01 -7.23881195e-01  3.85730276e

TypeError: cannot unpack non-iterable NoneType object

In [6]:
gwr_selector = Sel_BW(g_coords, g_y, g_X)
gwr_bw = gwr_selector.search()
print(gwr_bw)
gwr_results = GWR(g_coords, g_y, g_X, gwr_bw).fit()

[[1.000e+00 6.400e-01 2.076e+01 7.560e+01]
 [1.000e+00 1.580e+00 2.686e+01 1.000e+02]
 [1.000e+00 2.700e-01 1.542e+01 6.170e+01]
 [1.000e+00 1.100e-01 5.167e+01 1.000e+02]
 [1.000e+00 1.430e+00 4.239e+01 4.270e+01]
 [1.000e+00 3.400e-01 3.490e+00 1.000e+02]
 [1.000e+00 9.200e-01 1.144e+01 6.460e+01]
 [1.000e+00 8.200e-01 9.210e+00 7.520e+01]
 [1.000e+00 3.300e-01 3.133e+01 4.700e+01]
 [1.000e+00 1.190e+00 1.162e+01 6.620e+01]
 [1.000e+00 1.060e+00 4.168e+01 1.610e+01]
 [1.000e+00 6.400e-01 2.236e+01 5.790e+01]
 [1.000e+00 3.300e-01 4.580e+00 1.000e+02]
 [1.000e+00 1.760e+00 4.147e+01 6.560e+01]
 [1.000e+00 4.500e-01 1.485e+01 8.060e+01]
 [1.000e+00 1.160e+00 2.595e+01 6.320e+01]
 [1.000e+00 4.300e-01 5.219e+01 7.230e+01]
 [1.000e+00 7.200e-01 3.548e+01 7.340e+01]
 [1.000e+00 1.000e-01 5.889e+01 1.000e+02]
 [1.000e+00 2.140e+00 2.019e+01 4.710e+01]
 [1.000e+00 9.600e-01 3.094e+01 5.210e+01]
 [1.000e+00 8.500e-01 1.546e+01 6.850e+01]
 [1.000e+00 3.900e-01 9.100e-01 4.360e+01]
 [1.000e+00

TypeError: cannot unpack non-iterable NoneType object

In [6]:
gwr_selector = Sel_BW(g_coords, g_y, g_X)
gwr_bw = gwr_selector.search()
print(gwr_bw)
gwr_results = GWR(g_coords, g_y, g_X, gwr_bw).fit()

[[ 0.00000000e+00 -3.98516575e-01 -3.82805697e-01  2.00641207e-01]
 [ 0.00000000e+00  3.64222409e-01 -3.07649962e-02  1.10387586e+00]
 [ 0.00000000e+00 -6.98743622e-01 -6.90985589e-01 -3.13906405e-01]
 [ 0.00000000e+00 -8.28571534e-01  1.40105956e+00  1.10387586e+00]
 [ 0.00000000e+00  2.42508742e-01  8.65496000e-01 -1.01724487e+00]
 [ 0.00000000e+00 -6.41943910e-01 -1.37948486e+00  1.10387586e+00]
 [ 0.00000000e+00 -1.71317729e-01 -9.20677718e-01 -2.06554745e-01]
 [ 0.00000000e+00 -2.52460174e-01 -1.04937456e+00  1.85834082e-01]
 [ 0.00000000e+00 -6.50058155e-01  2.27205812e-01 -8.58068268e-01]
 [ 0.00000000e+00  4.77668734e-02 -9.10289632e-01 -1.47326243e-01]
 [ 0.00000000e+00 -5.77183053e-02  8.24520771e-01 -2.00191871e+00]
 [ 0.00000000e+00 -3.98516575e-01 -2.90467152e-01 -4.54574097e-01]
 [ 0.00000000e+00 -6.50058155e-01 -1.31657923e+00  1.10387586e+00]
 [ 0.00000000e+00  5.10278811e-01  8.12401337e-01 -1.69536931e-01]
 [ 0.00000000e+00 -5.52687221e-01 -7.23881195e-01  3.85730276e

TypeError: cannot unpack non-iterable NoneType object

In [None]:
gwr_results.summary()

In [26]:
# from sklearn.preprocessing import StandardScaler

In [None]:
# np.mean(gwr_results.params[:, 0])
# # gwr_results.influ

### Generate pearson's correlation coefficient

In [None]:
# res = GWRResults(gwr_results.model, gwr_results.params, gwr_results.predy, gwr_results.S, gwr_results.CCT, gwr_results.influ )

# a,s,d,f=res.local_collinearity()

# # res.local_collinearity()

# gwr_results.local_collinearity()
# np.mean(res.localR2)

# res.local_collinearity()

In [9]:
a,s,d,f = gwr_results.local_collinearity()

In [10]:
a[:, 0].mean(), a[:, 1].mean(), a[:, 2].mean()

(-0.15138960939787022, -0.5797078107906775, -0.004704691983920316)

#### Global correlation coefficient

In [13]:
from scipy.stats import pearsonr

corr, u = pearsonr(g_X[:,0], g_y.flatten())

print(f'{corr:.2f}') # to 2 d.p

0.67


 The cell below has my implementation of the _compute_betas_gwr()_ function from inside the conda environment `spglm/iwls.py` source code.  <br /> <br /> But when I do this and run the jupyter notebook cells I get the above error. 

In [None]:
def _compute_betas_gwr(y, x, wi):
    """
    compute MLE coefficients using iwls routine

    Methods: p189, Iteratively (Re)weighted Least Squares (IWLS),
    Fotheringham, A. S., Brunsdon, C., & Charlton, M. (2002).
    Geographically weighted regression: the analysis of spatially varying relationships.
    """

    xw = (x * wi)  # weight before standardization -> result is the weighted design matrix

    x_stdz = (x - x.mean(axis=0)) / x.std(axis=0) # standardize the design matrix 

    xw_stdz = (xw - xw.mean(axis=0)) / xw.std(axis=0)  # standardize the weighted design matrix

    y = (y - y.mean(axis=0)) / y.std(axis=0)  # standardize the dependent variable --> Making sure x and y are standardized


    xT = xw_stdz.T
    xtx = np.dot(xT, x_stdz)
    xtx_inv_xt = linalg.solve(xtx, xT)
    betas = np.dot(xtx_inv_xt, y)
    return betas, xtx_inv_xt