Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Gsoc19 - updates for Poisson and Binomial MGWR #60

Merged
merged 28 commits into from Jul 18, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
4ad7706
adding a univariate example for Poisson MGWR
mehak-sachdeva Jun 18, 2019
09ef414
adding important equations for reference
mehak-sachdeva Jun 18, 2019
3ed2f41
minor error update
mehak-sachdeva Jun 18, 2019
edf4bcf
cleaning
mehak-sachdeva Jun 18, 2019
f3c957b
adding the notebook for Binomial MGWR model
mehak-sachdeva Jun 18, 2019
e44555b
changes to accomodate Poisson and Binomial MGWR
mehak-sachdeva Jun 19, 2019
2c2edc3
changes to accomodate Poisson and Binomial MGWR
mehak-sachdeva Jun 19, 2019
48b9766
Merge remote-tracking branch 'upstream/master' into multi_bw_changes
mehak-sachdeva Jun 22, 2019
ac981d7
fixed merge conflicts
mehak-sachdeva Jun 22, 2019
279f518
fixing MGWR init error
mehak-sachdeva Jun 24, 2019
0dc24c3
moving binomial edits to other branch
mehak-sachdeva Jun 25, 2019
0167b26
binomial_mgwr_updates
mehak-sachdeva Jun 27, 2019
b487e6e
real data example notebook Poisson-MGWR
mehak-sachdeva Jun 27, 2019
f0cda34
copying simulated data example to this branch
mehak-sachdeva Jun 27, 2019
bdf6259
adding notebook for Binomial MGWR discussion
mehak-sachdeva Jun 27, 2019
fec7f95
updates to Poisson notebooks and adding Binomial notebooks for discus…
mehak-sachdeva Jul 4, 2019
6ee25bf
Poisson monte carlo script for reference
mehak-sachdeva Jul 4, 2019
0872fdc
adding Poisson Monte Carlo results notebook
mehak-sachdeva Jul 4, 2019
b92c1b5
adding result objects folder
mehak-sachdeva Jul 8, 2019
ff2eab5
changing inference calculations for Poisson MGWR
mehak-sachdeva Jul 16, 2019
db116e1
cleaning
mehak-sachdeva Jul 16, 2019
775d4ee
adding parameters to MC design for Poisson MGWR
mehak-sachdeva Jul 16, 2019
2614cb6
second try for inference - Poisson MGWR
mehak-sachdeva Jul 17, 2019
5d2a607
cleaning
mehak-sachdeva Jul 18, 2019
488783d
adding mega notebook for Poisson MGWR
mehak-sachdeva Jul 18, 2019
828efe3
Monte carlo results, code and notebook update
mehak-sachdeva Jul 18, 2019
eb7a4ba
adding structure to MC results ipynb
mehak-sachdeva Jul 18, 2019
9610e01
adding structure to all Poisson notebooks
mehak-sachdeva Jul 18, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
13 changes: 8 additions & 5 deletions mgwr/diagnostics.py
Expand Up @@ -11,34 +11,37 @@
def get_AICc(gwr):
"""
Get AICc value

Gaussian: p61, (2.33), Fotheringham, Brunsdon and Charlton (2002)

GWGLM: AICc=AIC+2k(k+1)/(n-k-1), Nakaya et al. (2005): p2704, (36)

"""
n = gwr.n
k = gwr.tr_S
y = gwr.y
mu = gwr.mu
#sigma2 = gwr.sigma2
if isinstance(gwr.family, Gaussian):
aicc = -2.0 * gwr.llf + 2.0 * n * (k + 1.0) / (
n - k - 2.0) #equivalent to below but
#can't control denominator of sigma without altering GLM familt code
#can't control denominator of sigma without altering GLM family code
#aicc = n*np.log(sigma2) + n*np.log(2.0*np.pi) + n*(n+k)/(n-k-2.0)
elif isinstance(gwr.family, (Poisson, Binomial)):
aicc = get_AIC(gwr) + 2.0 * k * (k + 1.0) / (n - k - 1.0)
#aicc = np.sum(gwr.family.resid_dev(y, mu)**2) + 2.0 * n * (k + 1.0) / (n - k - 2.0)
return aicc


def get_AIC(gwr):
"""
Get AIC calue
Get AIC value

Gaussian: p96, (4.22), Fotheringham, Brunsdon and Charlton (2002)

GWGLM: AIC(G)=D(G) + 2K(G), where D and K denote the deviance and the effective
number of parameters in the model with bandwidth G, respectively.

"""
k = gwr.tr_S
#deviance = -2*log-likelihood
Expand Down
70 changes: 49 additions & 21 deletions mgwr/gwr.py
Expand Up @@ -134,7 +134,7 @@ class GWR(GLM):
spherical : boolean
True for shperical coordinates (long-lat),
False for projected coordinates (defalut).

hat_matrix : boolean
True to store full n by n hat matrix,
False to not store full hat matrix to minimize memory footprint (defalut).
Expand Down Expand Up @@ -516,10 +516,10 @@ class GWRResults(GLMResults):

R2 : float
R-squared for the entire model (1- RSS/TSS)

adj_R2 : float
adjusted R-squared for the entire model

aic : float
Akaike information criterion

Expand Down Expand Up @@ -575,11 +575,11 @@ class GWRResults(GLMResults):
pDev : float
local percent of deviation accounted for; analogous to
r-squared for GLM's

D2 : float
percent deviance explained for GLM, equivaleng to R2 for
Gaussian.

adj_D2 : float
adjusted percent deviance explained, equivaleng to adjusted
R2 for Gaussian.
Expand Down Expand Up @@ -1438,8 +1438,8 @@ class MGWR(GWR):

"""

def __init__(self, coords, y, X, selector, sigma2_v1=True,
kernel='bisquare', fixed=False, constant=True,
def __init__(self, coords, y, X, selector, family, offset=None,
sigma2_v1=True, kernel='bisquare', fixed=False, constant=True,
spherical=False, hat_matrix=False):
"""
Initialize class
Expand All @@ -1448,17 +1448,18 @@ def __init__(self, coords, y, X, selector, sigma2_v1=True,
self.bws = self.selector.bw[0] #final set of bandwidth
self.bws_history = selector.bw[1] #bws history in backfitting
self.bw_init = self.selector.bw_init #initialization bandiwdth
self.family = Gaussian(
) # manually set since we only support Gassian MGWR for now
GWR.__init__(self, coords, y, X, self.bw_init, family=self.family,
sigma2_v1=sigma2_v1, kernel=kernel, fixed=fixed,
constant=constant, spherical=spherical,
self.family = family
self.offset = offset
# manually set since we only support Gassian MGWR for now
GWR.__init__(self, coords, y, X, self.bw_init, self.family,
self.offset,sigma2_v1=sigma2_v1, kernel=kernel,
fixed=fixed, constant=constant, spherical=spherical,
hat_matrix=hat_matrix)
self.selector = selector
self.sigma2_v1 = sigma2_v1
self.points = None
self.P = None
self.offset = None
self.family = family
self.exog_resid = None
self.exog_scale = None
self_fit_params = None
Expand All @@ -1483,7 +1484,14 @@ def _chunk_compute_R(self, chunk_id=0):

for i in range(n):
wi = self._build_wi(i, self.bw_init).reshape(-1, 1)
xT = (self.X * wi).T
if isinstance(self.family, Poisson):
wi=wi.reshape(-1,1)
rslt = iwls(self.y, self.X, self.family, self.offset, None, wi=wi)
inv_xtx_xt = rslt[5]
w = rslt[3]
xT = (self.X * w).T
else:
xT = (self.X * wi).T
P = np.linalg.solve(xT.dot(self.X), xT).dot(init_pR).T
pR[i, :, :] = P * self.X[i]

Expand All @@ -1502,8 +1510,16 @@ def _chunk_compute_R(self, chunk_id=0):
for i in range(len(chunk_index_Aj)):
index = chunk_index_Aj[i]
wi = self._build_wi(index, self.bws_history[iter_i, j])
xw = Xj * wi
pAj[i, :] = Xj[index] / np.sum(xw * Xj) * xw
if isinstance(self.family, Poisson):
Xj = Xj.reshape(-1,1)
wi = wi.reshape(-1,1)
rslt = iwls(self.y, Xj, self.family, self.offset, None, wi=wi)

w = rslt[3]
xw = Xj * w
else:
xw = Xj * wi
pAj[i, :] = (Xj[index] / np.sum(xw * Xj) * xw).reshape(-1)
pR[chunk_index_Aj, :, j] = pAj.dot(pRj_old)
err = pRj_old - pR[:, :, j]

Expand All @@ -1520,21 +1536,33 @@ def _chunk_compute_R(self, chunk_id=0):
def fit(self, n_chunks=1, pool=None):
"""
Compute MGWR inference by chunk to reduce memory footprint.

Parameters
----------

n_chunks : integer, optional
A number of chunks parameter to reduce memory usage.
A number of chunks parameter to reduce memory usage.
e.g. n_chunks=2 should reduce overall memory usage by 2.
pool : A multiprocessing Pool object to enable parallel fitting; default is None.

Returns
-------
: MGWRResults
"""
#self.fit_params['ini_params'] = ini_params
#self.fit_params['tol'] = tol
#self.fit_params['max_iter'] = max_iter

params = self.selector.params
predy = np.sum(self.X * params, axis=1).reshape(-1, 1)

if isinstance(self.family,Poisson):
predy = self.offset*(np.exp(np.sum(self.X * params, axis=1).reshape(-1, 1)))

elif isinstance(self.family,Binomial):
predy = 1/(1+np.exp(-1*np.sum(self.X * params, axis=1).reshape(-1, 1)))

else:
predy = np.sum(self.X * params, axis=1).reshape(-1, 1)

try:
from tqdm.autonotebook import tqdm #progress bar
Expand Down Expand Up @@ -1692,7 +1720,7 @@ class MGWRResults(GWRResults):

R2 : float
R-squared for the entire model (1- RSS/TSS)

adj_R2 : float
adjusted R-squared for the entire model

Expand Down
36 changes: 32 additions & 4 deletions mgwr/search.py
Expand Up @@ -4,6 +4,7 @@

import numpy as np
from copy import deepcopy
from spglm.family import Gaussian, Binomial, Poisson


def golden_section(a, c, delta, function, tol, max_iter, int_score=False,
Expand Down Expand Up @@ -164,8 +165,8 @@ def equal_interval(l_bound, u_bound, interval, function, int_score=False,
return opt_val, opt_score, output


def multi_bw(init, y, X, n, k, family, tol, max_iter, rss_score, gwr_func,
bw_func, sel_func, multi_bw_min, multi_bw_max, bws_same_times,
def multi_bw(init, y, X, n, k, family, offset, tol, max_iter, rss_score, gwr_func,
gwr_func_g, bw_func, bw_func_g, sel_func, multi_bw_min, multi_bw_max, bws_same_times,
verbose=False):
"""
Multiscale GWR bandwidth search procedure using iterative GAM backfitting
Expand All @@ -180,7 +181,17 @@ def multi_bw(init, y, X, n, k, family, tol, max_iter, rss_score, gwr_func,
err = optim_model.resid_response.reshape((-1, 1))
param = optim_model.params

XB = np.multiply(param, X)
if isinstance(family, Poisson):
XB = offset*np.exp(np.multiply(param,X))
elif isinstance(family, Binomial):
#v = np.multiply(X, param)
#XB = 1 / (1 + np.exp(-1 * v))
#XB = v + ((1 / (mu * (1 - mu))) * (y - mu))
XB = 1/(1+np.exp(-1*np.multiply(param,X)))
#XB=np.log(XB/(1-XB))
else:
XB = np.multiply(param, X)

if rss_score:
rss = np.sum((err)**2)
iters = 0
Expand All @@ -205,6 +216,10 @@ def tqdm(x, desc=''): #otherwise, just passthrough the range
temp_y = XB[:, j].reshape((-1, 1))
temp_y = temp_y + err
temp_X = X[:, j].reshape((-1, 1))

#if isinstance(family, Binomial):
#bw_class = bw_func_g(temp_y, temp_X)
#else:
bw_class = bw_func(temp_y, temp_X)

if np.all(bw_stable_counter == bws_same_times):
Expand All @@ -217,10 +232,16 @@ def tqdm(x, desc=''): #otherwise, just passthrough the range
else:
bw_stable_counter = np.ones(k)

#if isinstance(family, Binomial):
#optim_model = gwr_func_g(temp_y, temp_X, bw)

#else:
optim_model = gwr_func(temp_y, temp_X, bw)
err = optim_model.resid_response.reshape((-1, 1))
param = optim_model.params.reshape((-1, ))
#new_XB[:,j] = 1/(1+np.exp(-1*np.sum(temp_X * param, axis=1).reshape(-1)))
new_XB[:, j] = optim_model.predy.reshape(-1)
#new_XB[:, j] = np.log(new_XB[:,j]/(1-new_XB[:,j]))
params[:, j] = param
bws[j] = bw

Expand All @@ -230,7 +251,14 @@ def tqdm(x, desc=''): #otherwise, just passthrough the range
XB = new_XB

if rss_score:
predy = np.sum(np.multiply(params, X), axis=1).reshape((-1, 1))
if isinstance(family, Poisson):
predy = offset*(np.exp(np.sum(X * params, axis=1).reshape(-1, 1)))

elif isinstance(family,Binomial):
predy = 1/(1+np.exp(-1*np.sum(X * params, axis=1).reshape(-1, 1)))

else:
predy = np.sum(np.multiply(params, X), axis=1).reshape((-1, 1))
new_rss = np.sum((y - predy)**2)
score = np.abs((new_rss - rss) / new_rss)
rss = new_rss
Expand Down
32 changes: 27 additions & 5 deletions mgwr/sel_bw.py
Expand Up @@ -136,7 +136,7 @@ class Sel_BW(object):
>>> pov = np.array(data.by_col('PctPov')).reshape((-1,1))
>>> african_amer = np.array(data.by_col('PctBlack')).reshape((-1,1))
>>> X = np.hstack([rural, pov, african_amer])

Golden section search AICc - adaptive bisquare

>>> bw = Sel_BW(coords, y, X).search(criterion='AICc')
Expand Down Expand Up @@ -213,7 +213,7 @@ def search(self, search_method='golden_section', criterion='AICc',
min value used in bandwidth search
bw_max : float
max value used in bandwidth search
multi_bw_min : list
multi_bw_min : list
min values used for each covariate in mgwr bandwidth search.
Must be either a single value or have one value for
each covariate including the intercept
Expand Down Expand Up @@ -374,12 +374,34 @@ def _mbw(self):
bws_same_times = self.bws_same_times

def gwr_func(y, X, bw):
family = self.family
#if isinstance(family, Binomial):
#family = Gaussian()
return GWR(coords, y, X, bw, family=family, kernel=kernel,
fixed=fixed, offset=offset, constant=False,
spherical=self.spherical, hat_matrix=False).fit(
lite=True, pool=self.pool)

def gwr_func_g(y, X, bw):
family = self.family
family = Gaussian()
return GWR(coords, y, X, bw, family=family, kernel=kernel,
fixed=fixed, offset=offset, constant=False,
spherical=self.spherical, hat_matrix=False).fit(
lite=True, pool=self.pool)

def bw_func(y, X):
family = self.family
#if isinstance(family, Binomial):
#family = Gaussian()
selector = Sel_BW(coords, y, X, X_glob=[], family=family,
kernel=kernel, fixed=fixed, offset=offset,
constant=False, spherical=self.spherical)
return selector

def bw_func_g(y, X):
family = self.family
family = Gaussian()
selector = Sel_BW(coords, y, X, X_glob=[], family=family,
kernel=kernel, fixed=fixed, offset=offset,
constant=False, spherical=self.spherical)
Expand All @@ -391,9 +413,9 @@ def sel_func(bw_func, bw_min=None, bw_max=None):
bw_min=bw_min, bw_max=bw_max, interval=interval, tol=tol,
max_iter=max_iter, pool=self.pool, verbose=False)

self.bw = multi_bw(self.init_multi, y, X, n, k, family, self.tol_multi,
self.max_iter_multi, self.rss_score, gwr_func,
bw_func, sel_func, multi_bw_min, multi_bw_max,
self.bw = multi_bw(self.init_multi, y, X, n, k, family, offset, self.tol_multi,
self.max_iter_multi, self.rss_score, gwr_func,gwr_func_g,
bw_func,bw_func_g,sel_func, multi_bw_min, multi_bw_max,
bws_same_times, verbose=self.verbose)

def _init_section(self, X_glob, X_loc, coords, constant):
Expand Down