Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fixes for weighted kernel fits #1103

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 3 additions & 0 deletions statsmodels/nonparametric/kde.py
Expand Up @@ -150,6 +150,7 @@ def fit(self, kernel="gau", bw="scott", fft=True, weights=None,
self.kernel = kernel_switch[kernel](h=bw) # we instantiate twice,
# should this passed to funcs?
# put here to ensure empty cache after re-fit with new options
self.kernel.weights = weights
self._cache = resettable_cache()

@cache_readonly
Expand Down Expand Up @@ -334,6 +335,8 @@ def kdensity(X, kernel="gau", bw="scott", weights=None, gridsize=None,
weights = np.ones(nobs)
q = nobs
else:
# ensure weights is a numpy array
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure if this is the best solution - but the error being thrown if weights was passed as, for example, a list, was very uninformative.

weights = np.asarray(weights)
if len(weights) != len(clip_x):
msg = "The length of the weights must be the same as the given X."
raise ValueError(msg)
Expand Down
13 changes: 11 additions & 2 deletions statsmodels/sandbox/nonparametric/kernels.py
Expand Up @@ -48,6 +48,7 @@ def __init__(self, n, kernels = None, H = None):
kernels = Gaussian()

self._kernels = kernels
self.weights = None

if H is None:
H = np.matrix( np.identity(n))
Expand All @@ -66,13 +67,17 @@ def setH(self, value):
H = property(getH, setH, doc="Kernel bandwidth matrix")

def density(self, xs, x):

n = len(xs)
#xs = self.inDomain( xs, xs, x )[0]

if len(xs)>0: ## Need to do product of marginal distributions
#w = np.sum([self(self._Hrootinv * (xx-x).T ) for xx in xs])/n
#vectorized doesn't work:
w = np.mean(self((xs-x) * self._Hrootinv )) #transposed
if self.weights is not None:
w = np.mean(self((xs-x) * self._Hrootinv).T * self.weights)/sum(self.weights)
else:
w = np.mean(self((xs-x) * self._Hrootinv )) #transposed
#w = np.mean([self(xd) for xd in ((xs-x) * self._Hrootinv)] ) #transposed
return w
else:
Expand Down Expand Up @@ -137,6 +142,7 @@ def __init__(self, shape, h = 1.0, domain = None, norm = None):
norm = 1.0
self._normconst = norm
self.domain = domain
self.weights = None
if callable(shape):
self._shape = shape
else:
Expand Down Expand Up @@ -185,7 +191,10 @@ def density(self, xs, x):
xs = xs[:,None]
if len(xs)>0:
h = self.h
w = 1/h * np.mean(self((xs-x)/h), axis=0)
if self.weights is not None:
w = 1/h * np.sum(self((xs-x)/h).T * self.weights, axis=1)
else:
w = 1/h * np.mean(self((xs-x)/h), axis=0)
return w
else:
return np.nan
Expand Down