Skip to content

Commit

Permalink
BUG: gaussian_kde: make it work for integer array input. Closes #1181.
Browse files Browse the repository at this point in the history
Do some PEP8 cleanup while we're at it.
  • Loading branch information
rgommers committed Jan 3, 2012
1 parent f01c026 commit 8317921
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 24 deletions.
49 changes: 25 additions & 24 deletions scipy/stats/kde.py
Expand Up @@ -196,7 +196,7 @@ def evaluate(self, points):
the dimensionality of the KDE.
"""
points = atleast_2d(points).astype(self.dataset.dtype)
points = atleast_2d(points)

d, m = points.shape
if d != self.d:
Expand All @@ -209,24 +209,24 @@ def evaluate(self, points):
self.d)
raise ValueError(msg)

result = zeros((m,), points.dtype)
result = zeros((m,), dtype=np.float)

if m >= self.n:
# there are more points than data, so loop over data
for i in range(self.n):
diff = self.dataset[:,i,newaxis] - points
diff = self.dataset[:, i, newaxis] - points
tdiff = dot(self.inv_cov, diff)
energy = sum(diff*tdiff,axis=0)/2.0
result += exp(-energy)
energy = sum(diff*tdiff,axis=0) / 2.0
result = result + exp(-energy)
else:
# loop over points
for i in range(m):
diff = self.dataset - points[:,i,newaxis]
diff = self.dataset - points[:, i, newaxis]
tdiff = dot(self.inv_cov, diff)
energy = sum(diff*tdiff,axis=0)/2.0
result[i] = sum(exp(-energy),axis=0)
energy = sum(diff * tdiff, axis=0) / 2.0
result[i] = sum(exp(-energy), axis=0)

result /= self._norm_factor
result = result / self._norm_factor

return result

Expand Down Expand Up @@ -263,15 +263,16 @@ def integrate_gaussian(self, mean, cov):
raise ValueError("covariance does not have dimension %s" % self.d)

# make mean a column vector
mean = mean[:,newaxis]
mean = mean[:, newaxis]

sum_cov = self.covariance + cov

diff = self.dataset - mean
tdiff = dot(linalg.inv(sum_cov), diff)

energies = sum(diff*tdiff,axis=0)/2.0
result = sum(exp(-energies),axis=0)/sqrt(linalg.det(2*pi*sum_cov))/self.n
energies = sum(diff * tdiff, axis=0) / 2.0
result = sum(exp(-energies), axis=0) / sqrt(linalg.det(2 * pi * \
sum_cov)) / self.n

return result

Expand Down Expand Up @@ -300,11 +301,11 @@ def integrate_box_1d(self, low, high):

stdev = ravel(sqrt(self.covariance))[0]

normalized_low = ravel((low - self.dataset)/stdev)
normalized_high = ravel((high - self.dataset)/stdev)
normalized_low = ravel((low - self.dataset) / stdev)
normalized_high = ravel((high - self.dataset) / stdev)

value = np.mean(special.ndtr(normalized_high) -
special.ndtr(normalized_low))
value = np.mean(special.ndtr(normalized_high) - \
special.ndtr(normalized_low))
return value


Expand Down Expand Up @@ -332,10 +333,10 @@ def integrate_box(self, low_bounds, high_bounds, maxpts=None):
extra_kwds = {}

value, inform = mvn.mvnun(low_bounds, high_bounds, self.dataset,
self.covariance, **extra_kwds)
self.covariance, **extra_kwds)
if inform:
msg = ('An integral in mvn.mvnun requires more points than %s' %
(self.d * 1000))
(self.d * 1000))
warnings.warn(msg)

return value
Expand Down Expand Up @@ -373,14 +374,14 @@ def integrate_kde(self, other):
sum_cov = small.covariance + large.covariance
result = 0.0
for i in range(small.n):
mean = small.dataset[:,i,newaxis]
mean = small.dataset[:, i, newaxis]
diff = large.dataset - mean
tdiff = dot(linalg.inv(sum_cov), diff)

energies = sum(diff*tdiff,axis=0)/2.0
result += sum(exp(-energies),axis=0)
energies = sum(diff * tdiff, axis=0) / 2.0
result += sum(exp(-energies), axis=0)

result /= sqrt(linalg.det(2*pi*sum_cov))*large.n*small.n
result /= sqrt(linalg.det(2 * pi * sum_cov)) * large.n * small.n

return result

Expand All @@ -403,9 +404,9 @@ def resample(self, size=None):
size = self.n

norm = transpose(multivariate_normal(zeros((self.d,), float),
self.covariance, size=size))
self.covariance, size=size))
indices = randint(0, self.n, size=size)
means = self.dataset[:,indices]
means = self.dataset[:, indices]

return means + norm

Expand Down
8 changes: 8 additions & 0 deletions scipy/stats/tests/test_kdeoth.py
Expand Up @@ -155,3 +155,11 @@ def test_gaussian_kde_monkeypatch():

assert_array_almost_equal_nulp(y1, y2, nulp=10)


def test_kde_integer_input():
"""Regression test for #1181."""
x1 = np.arange(5)
kde = stats.gaussian_kde(x1)
y_expected = [0.13480721, 0.18222869, 0.19514935, 0.18222869, 0.13480721]
assert_array_almost_equal(kde(x1), y_expected, decimal=6)

0 comments on commit 8317921

Please sign in to comment.