## Geographically Weighted Correlation Coefficient

### Import relevant dependencies

In [1]:
import pandas as pd
import numpy as np
import geopandas as gp
import libpysal as ps
from mgwr.gwr import GWR, MGWR, GWRResults
from mgwr.sel_bw import Sel_BW


georgia_data = pd.read_csv(ps.examples.get_path('GData_utm.csv'))
georgia_shp = gp.read_file(ps.examples.get_path('G_utm.shp'))

g_y = georgia_data['PctBach'].values.reshape((-1,1))
g_X = georgia_data[['PctFB', 'PctRural']].values
u = georgia_data['X']
v = georgia_data['Y']


g_coords = list(zip(u,v))
# georgia_data.head()

#### Standardization Routine

g_X_std = (g_X - g_X.mean(axis=0)) / g_X.std(axis=0) 

g_y_std = (g_y - g_y.mean(axis=0)) / g_y.std(axis=0)

#### Calibrate GWR model

In [2]:
g_y = georgia_data['PctBach'].values.reshape((-1,1))
g_X = georgia_data[['PctFB']].values
u = georgia_data['X']
v = georgia_data['Y']


g_coords = list(zip(u,v))

gwr_results = GWR(g_coords, g_y_std, g_X_std, 70).fit()

gwr_results.summary()

Model type                                                         Gaussian
Number of observations:                                                 159
Number of covariates:                                                     2

Global Regression Results
---------------------------------------------------------------------------
Residual sum of squares:                                             87.210
Log-likelihood:                                                    -177.864
AIC:                                                                359.729
AICc:                                                               361.883
BIC:                                                               -708.608
R2:                                                                   0.452
Adj. R2:                                                              0.448

Variable                              Est.         SE  t(Est/SE)    p-value
------------------------------- ---------- ---------- ------

### GW - Correlation Coefficient


$$
   \frac{(x_j - \overline x_i)(y_j - \overline y_i) w_ij}{\sqrt (x_j - \overline x_i)^2 . \sqrt (y_j - \overline y_i)^2 }\
$$

From Page 162-163 of [GWR - The analysis of spatially varying relationships](https://www.academia.edu/33626785/Geographically_Weighted_Regression_The_Analysis_of_Spatially_Varying_Relationships_Wiley_2002)


#### Implementation

In [3]:
import math

wi =  np.sqrt(gwr_results.model._build_wi(0, 60).reshape(-1,1))

X = g_X
Y = g_y

Xmean = (sum(X*wi)/len(X))
Ymean = (sum(Y*wi)/len(Y))

x_diff = [var-Xmean for var in X]
y_diff = [var-Ymean for var in Y]

xy =([a*b for a,b in list(zip(x_diff,y_diff))])*wi
sum_xy = sum(xy)

x_square = [a*a for a in x_diff]
y_square = [b*b for b in y_diff]

sum_x_square = sum(x_square)
sum_y_square = sum(y_square)

sum_x_square_sum_y_square = sum_x_square*sum_y_square
sqrt_sum_x_square_sum_y_square = math.sqrt(sum_x_square_sum_y_square)

r = sum_xy/sqrt_sum_x_square_sum_y_square

# print(wi*sum_xy)
print(r)


[0.08492393]


In [4]:
# wi =  gwr_results.model._build_wi(0, 60).reshape(-1,1)
# wi = np.sqrt(wi)

# xcc = g_X
# xcc_mean = (xcc).mean(axis=0)

# ycc = g_y
# ycc_mean = (ycc).mean(axis=0)

# numerator = (((xcc - xcc_mean) * (ycc - ycc_mean)))
# denom = (np.sqrt((xcc - xcc_mean)**2)) * (np.sqrt((ycc - ycc_mean)**2))

# r = (numerator/denom)
# # print(r)
# r.mean(axis=0)

### Local OLS and CorrCoef FOR 1 location (data point)

In [5]:
import statsmodels.api as sm
from sklearn.preprocessing import StandardScaler
from scipy.stats import pearsonr

scaler = StandardScaler()

wi_OLS =  gwr_results.model._build_wi(0, 60).reshape(-1,1)

# wi_OLS = np.sqrt(wi_OLS)

x = g_X
y= g_y

xi = x * wi_OLS
yi = y * wi_OLS

corr_coef, _ = pearsonr(xi.flatten(), yi.flatten())
print(corr_coef)

xi_std = scaler.fit_transform(xi)
yi_std = scaler.fit_transform(yi)


sm.OLS(yi_std, xi_std).fit().summary()

0.7279005604317853


0,1,2,3
Dep. Variable:,y,R-squared (uncentered):,0.53
Model:,OLS,Adj. R-squared (uncentered):,0.527
Method:,Least Squares,F-statistic:,178.1
Date:,"Fri, 23 Sep 2022",Prob (F-statistic):,1.11e-27
Time:,12:25:29,Log-Likelihood:,-165.61
No. Observations:,159,AIC:,333.2
Df Residuals:,158,BIC:,336.3
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
x1,0.7279,0.055,13.344,0.000,0.620,0.836

0,1,2,3
Omnibus:,43.479,Durbin-Watson:,2.054
Prob(Omnibus):,0.0,Jarque-Bera (JB):,92.157
Skew:,1.209,Prob(JB):,9.740000000000001e-21
Kurtosis:,5.839,Cond. No.,1.0


In [21]:
# gwr_results.params[0,1]
gwr_results.params[0,1]  # So its not the same thing as what I am getting because inside GWR is still using the old methodology
                        # of doing things. I need to go and update the code of GWR to get the output I am having as coefficient
                        # in the above cell. What I am having above is the correct thing actuallu. 

0.32697283203879474

#### Global correlation coefficients

In [23]:
from scipy.stats import pearsonr

# for i in range(3):
#     corr, u = pearsonr(g_X[:,i], g_y.flatten())
    
#     print(corr)

corr, u = pearsonr(g_y.flatten(), g_X.flatten() )
corr

0.6719466884233966

 The cell below has my implementation of the _compute_betas_gwr()_ function from inside of the conda environment `spglm/iwls.py` source code. 

In [None]:
def _compute_betas_gwr(y, x, wi):
    """
    compute MLE coefficients using iwls routine

    Methods: p189, Iteratively (Re)weighted Least Squares (IWLS),
    Fotheringham, A. S., Brunsdon, C., & Charlton, M. (2002).
    Geographically weighted regression: the analysis of spatially varying relationships.
    """

    xw = (x * wi).T  # weight before standardization -> result is the weighted design matrix

    xw_stdz = scaler.fit_transform(xw)   # standardize the design matrix -- after weighting
    
    x_stdz = scaler.fit_transform(x)     # standardize x 
                    
    y = scaler.fit_transform(y)          # standardize the y


    xtx = np.dot(xw_stdz, x_stdz)
    xtx_inv_xt = linalg.solve(xtx, xw_stdz)
    betas = np.dot(xtx_inv_xt, y)
    return betas, xtx_inv_xt