Skip to content

Commit

Permalink
Added tests for weighted KDE
Browse files Browse the repository at this point in the history
  • Loading branch information
tommyod committed Apr 1, 2018
1 parent 51bcf3e commit 40cae2c
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 8 deletions.
16 changes: 8 additions & 8 deletions KDEpy/kde.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,15 @@ def __init__(self, kernel='gaussian', bw=1):

self.bw = bw

def fit(self, data, boundaries=None):
def fit(self, data, weights=None, boundaries=None):
"""
Fit the kernel density estimator to the data.
Boundaries may be a tuple.
"""
self._data = np.asarray_chkfinite(data)

# If no weights are passed, weight each data point as unity
self.weights = self._set_weights(weights)

if not boundaries:
boundaries = (-np.inf, np.inf)
Expand All @@ -72,7 +75,7 @@ def _bw_selection(self):

return _bw_methods[self.bw](self._data)

def evaluate_naive(self, grid_points, weights=None):
def evaluate_naive(self, grid_points):
"""
Naive evaluation. Used primarily for testing.
grid_points : np.array, evaluation points
Expand All @@ -81,16 +84,13 @@ def evaluate_naive(self, grid_points, weights=None):
"""
# Return the array converted to a float type
grid_points = np.asfarray(grid_points)

# If no weights are passed, weight each data point as unity
weights = self._set_weights(weights)

# Create zeros on the grid points
evaluated = np.zeros_like(grid_points)

# For every data point, compute the kernel and add to the grid
bw = self._bw_selection()
for weight, data_point in zip(weights, self._data):
for weight, data_point in zip(self.weights, self._data):
evaluated += weight * self.kernel(grid_points - data_point, bw=bw)

return evaluated
Expand Down Expand Up @@ -152,7 +152,7 @@ def _eval_sorted(self, data_sorted, weights_sorted, grid_point, len_data,
weighted_estimates = np.dot(kernel_estimates, weights_subset)
return np.sum(weighted_estimates)

def evaluate_sorted(self, grid_points, weights=None, tolerance=10e-6):
def evaluate_sorted(self, grid_points, tolerance=10e-6):
"""
Evaluated by sorting and using binary search.
Expand All @@ -163,7 +163,7 @@ def evaluate_sorted(self, grid_points, weights=None, tolerance=10e-6):
# return self.evaluate_naive(grid_points, weights = weights)

# If no weights are passed, weight each data point as unity
weights = self._set_weights(weights)
weights = self.weights

# Sort the data and the weights
indices = np.argsort(self._data)
Expand Down
31 changes: 31 additions & 0 deletions KDEpy/tests/test_kde.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,37 @@ def test_naive_vs_sorted_eval(self, data, bw, n, kernel):
atol=10e-2, rtol=0)


@pytest.mark.parametrize("data, weights",
[(np.array([0, 1, 2]), np.array([1, 5, 1])),
(np.array([-9, 1, 9]), np.array([3, 5, 1])),
(np.array([-3, 0, 2]), np.array([4, 5, 0]))])
def test_weighted_naive(self, data, weights):
x = np.linspace(np.min(data), np.max(data), num=10)
kde = KDE(kernel='gaussian', bw=1).fit(data, weights=weights)
y1 = kde.evaluate_naive(x)

data_weighted = np.repeat(data, weights)
kde = KDE(kernel='gaussian', bw=1).fit(data_weighted)
y2 = kde.evaluate_naive(x)

assert np.allclose(y1, y2)

@pytest.mark.parametrize("data, weights",
[(np.array([0, 1, 2]), np.array([1, 5, 1])),
(np.array([-9, 1, 9]), np.array([3, 5, 1])),
(np.array([-3, 0, 2]), np.array([4, 5, 0]))])
def test_weighted_sorted(self, data, weights):
x = np.linspace(np.min(data), np.max(data), num=10)
kde = KDE(kernel='gaussian', bw=1).fit(data, weights=weights)
y1 = kde.evaluate_sorted(x)

data_weighted = np.repeat(data, weights)
kde = KDE(kernel='gaussian', bw=1).fit(data_weighted)
y2 = kde.evaluate_sorted(x)

assert np.allclose(y1, y2)


if __name__ == "__main__":
# --durations=10 <- May be used to show potentially slow tests
pytest.main(args=['.', '--doctest-modules', '-v'])
Expand Down

0 comments on commit 40cae2c

Please sign in to comment.