Skip to content

Commit

Permalink
ran black (#107)
Browse files Browse the repository at this point in the history
  • Loading branch information
tommyod committed Feb 10, 2022
1 parent 183587a commit b1a34fd
Show file tree
Hide file tree
Showing 13 changed files with 58 additions and 89 deletions.
2 changes: 1 addition & 1 deletion KDEpy/TreeKDE.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ def evaluate(self, grid_points=None, eps=10e-4):
# TODO: Is this epsilon value sensible?
# Scipy 1.3.0 introduced error: ValueError: ndarray is not C-contiguous
grid_point = np.ascontiguousarray(grid_point)
indices = tree.query_ball_point(x=grid_point, r=kernel_radius, p=self.norm, eps=eps * obs ** 0.5)
indices = tree.query_ball_point(x=grid_point, r=kernel_radius, p=self.norm, eps=eps * obs**0.5)

# Use broadcasting to find x-values (distances)
x = grid_point - self.data[indices]
Expand Down
6 changes: 3 additions & 3 deletions KDEpy/bw_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def _fixed_point(t, N, I_sq, a2):
ell = 7

# Fast evaluation of |f^l|^2 using the DCT, see Plancherel theorem
f = (0.5) * np.pi ** (2 * ell) * np.sum(np.power(I_sq, ell) * a2 * np.exp(-I_sq * np.pi ** 2 * t))
f = (0.5) * np.pi ** (2 * ell) * np.sum(np.power(I_sq, ell) * a2 * np.exp(-I_sq * np.pi**2 * t))

# Norm of a function, should never be negative
if f <= 0:
Expand All @@ -76,7 +76,7 @@ def _fixed_point(t, N, I_sq, a2):
time = np.power((2 * const * K0 / (N * f)), (2.0 / (3.0 + 2.0 * s)))

# Step two: estimate |f^s| from t_s
f = (0.5) * np.pi ** (2 * s) * np.sum(np.power(I_sq, s) * a2 * np.exp(-I_sq * np.pi ** 2 * time))
f = (0.5) * np.pi ** (2 * s) * np.sum(np.power(I_sq, s) * a2 * np.exp(-I_sq * np.pi**2 * time))

# This is the minimizer of the AMISE
t_opt = np.power(2 * N * np.sqrt(np.pi) * f, -2.0 / 5)
Expand Down Expand Up @@ -149,7 +149,7 @@ def improved_sheather_jones(data, weights=None):
if not dims == 1:
raise ValueError("ISJ is only available for 1D data.")

n = 2 ** 10
n = 2**10

# weights <= 0 still affect calculations unless we remove them
if weights is not None:
Expand Down
20 changes: 10 additions & 10 deletions KDEpy/kernel_funcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,12 +141,12 @@ def volume_unit_ball(d, p=2):
Mathematics Magazine 78, no. 5 (2005): 390–95.
https://doi.org/10.2307/30044198.
"""
return 2.0 ** d * gamma(1 + 1 / p) ** d / gamma(1 + d / p)
return 2.0**d * gamma(1 + 1 / p) ** d / gamma(1 + d / p)


def epanechnikov(x, dims=1):
normalization = 2 / (dims + 2)
dist_sq = x ** 2
dist_sq = x**2
out = np.zeros_like(dist_sq)
mask = dist_sq < 1
out[mask] = (1 - dist_sq)[mask] / normalization
Expand All @@ -155,7 +155,7 @@ def epanechnikov(x, dims=1):

def gaussian(x, dims=1):
normalization = dims * gauss_integral(dims - 1)
dist_sq = x ** 2
dist_sq = x**2
return np.exp(-dist_sq / 2) / normalization


Expand All @@ -182,7 +182,7 @@ def tri(x, dims=1):

def biweight(x, dims=1):
normalization = 8 / ((dims + 2) * (dims + 4))
dist_sq = x ** 2
dist_sq = x**2
out = np.zeros_like(dist_sq)
mask = dist_sq < 1
out[mask] = np.maximum(0, (1 - dist_sq) ** 2)[mask] / normalization
Expand All @@ -191,7 +191,7 @@ def biweight(x, dims=1):

def triweight(x, dims=1):
normalization = 48 / ((dims + 2) * (dims + 4) * (dims + 6))
dist_sq = x ** 2
dist_sq = x**2
out = np.zeros_like(dist_sq)
mask = dist_sq < 1
out[mask] = np.maximum(0, (1 - dist_sq) ** 3)[mask] / normalization
Expand All @@ -202,7 +202,7 @@ def tricube(x, dims=1):
normalization = 162 / ((dims + 3) * (dims + 6) * (dims + 9))
out = np.zeros_like(x)
mask = x < 1
out[mask] = np.maximum(0, (1 - x ** 3) ** 3)[mask] / normalization
out[mask] = np.maximum(0, (1 - x**3) ** 3)[mask] / normalization
return out


Expand Down Expand Up @@ -320,7 +320,7 @@ def evaluate(self, x, bw=1, norm=2):
else:
distances = np.abs(x).ravel()

return self.function(distances / real_bw, dims) / ((real_bw ** dims) * volume_func(dims))
return self.function(distances / real_bw, dims) / ((real_bw**dims) * volume_func(dims))

__call__ = evaluate

Expand All @@ -333,9 +333,9 @@ def evaluate(self, x, bw=1, norm=2):
biweight = Kernel(biweight, var=1 / 7, support=1)
triweight = Kernel(triweight, var=1 / 9, support=1)
tricube = Kernel(tricube, var=35 / 243, support=1)
cosine = Kernel(cosine, var=(1 - (8 / np.pi ** 2)), support=1)
logistic = Kernel(logistic, var=(np.pi ** 2 / 3), support=np.inf)
sigmoid = Kernel(sigmoid, var=(np.pi ** 2 / 4), support=np.inf)
cosine = Kernel(cosine, var=(1 - (8 / np.pi**2)), support=1)
logistic = Kernel(logistic, var=(np.pi**2 / 3), support=np.inf)
sigmoid = Kernel(sigmoid, var=(np.pi**2 / 4), support=np.inf)

_kernel_functions = {
"gaussian": gaussian,
Expand Down
12 changes: 6 additions & 6 deletions KDEpy/tests/test_FFTKDE.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def test_against_naive_KDE(data, bw):
"""

# Higher accuracy when num gets larger
x = np.linspace(min(data) - bw, max(data) + bw, num=2 ** 10)
x = np.linspace(min(data) - bw, max(data) + bw, num=2**10)

y1 = NaiveKDE("epa", bw=bw).fit(data, weights=None).evaluate(x)
y2 = FFTKDE("epa", bw=bw).fit(data, weights=None).evaluate(x)
Expand All @@ -96,7 +96,7 @@ def test_against_naive_KDE_w_weights(data, bw):
"""

# Higher accuracy when num gets larger
x = np.linspace(min(data) - bw, max(data) + bw, num=2 ** 10)
x = np.linspace(min(data) - bw, max(data) + bw, num=2**10)
weights = np.arange(len(data)) + 1

y1 = NaiveKDE("epa", bw=bw).fit(data, weights=weights).evaluate(x)
Expand All @@ -113,19 +113,19 @@ def FFTKDE_test_grid_inside_data_1D():
https://github.com/tommyod/KDEpy/issues/7
"""
data = np.array([0, 1, 2, 3, 4, 5])
grid = np.linspace(-1, 6, num=2 ** 6)
grid = np.linspace(-1, 6, num=2**6)
FFTKDE().fit(data).evaluate(grid) # This should cause no problem

with pytest.raises(ValueError):
bad_grid = np.linspace(2, 6, num=2 ** 6)
bad_grid = np.linspace(2, 6, num=2**6)
FFTKDE().fit(data).evaluate(bad_grid)

with pytest.raises(ValueError):
bad_grid = np.linspace(-2, 4, num=2 ** 6)
bad_grid = np.linspace(-2, 4, num=2**6)
FFTKDE().fit(data).evaluate(bad_grid)

with pytest.raises(ValueError):
bad_grid = np.linspace(0, 5, num=2 ** 6)
bad_grid = np.linspace(0, 5, num=2**6)
FFTKDE().fit(data).evaluate(bad_grid)


Expand Down
6 changes: 3 additions & 3 deletions KDEpy/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def test_api_models_kernels_bandwidths_2D(kde1, kde2, bw, kernel):
data = np.array([[0, 0], [0, 1], [0, 0.5], [-1, 1]])
weights = [1, 2, 1, 0.8]

points = 2 ** 5
points = 2**5

# Chained expression
x1, y1 = kde1(kernel=kernel, bw=bw).fit(data, weights).evaluate(points)
Expand Down Expand Up @@ -117,7 +117,7 @@ def test_api_2D_data(estimator):
n = 16
data = np.concatenate((np.random.randn(n).reshape(-1, 1), np.random.randn(n).reshape(-1, 1)), axis=1)

grid_points = 2 ** 5 # Grid points in each dimension
grid_points = 2**5 # Grid points in each dimension
N = 16 # Number of contours

fig, axes = plt.subplots(ncols=3, figsize=(10, 3))
Expand Down Expand Up @@ -185,7 +185,7 @@ def test_fitting_twice(estimator):
"""Fitting several times should re-fit the BW.
Issue: https://github.com/tommyod/KDEpy/issues/78
"""
x_grid = np.linspace(-100, 100, 2 ** 6)
x_grid = np.linspace(-100, 100, 2**6)

# Create two data sets
data = np.arange(-5, 6)
Expand Down
6 changes: 3 additions & 3 deletions KDEpy/tests/test_estimator_vs_estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import itertools
import pytest

N = 2 ** 5
N = 2**5

estimators = [NaiveKDE, TreeKDE, FFTKDE]
estimators_2 = list(itertools.combinations(estimators, 2))
Expand Down Expand Up @@ -67,8 +67,8 @@ def test_vs_simple_weighted_kernels(estimators, kernel, bw):
est1, est2 = NaiveKDE, TreeKDE

np.random.seed(13)
data = np.random.randn(2 ** 8) * 10
weights = np.random.randn(2 ** 8) ** 2 + 1
data = np.random.randn(2**8) * 10
weights = np.random.randn(2**8) ** 2 + 1
x1, y1 = est1(bw=100).fit(data, weights)()
x1, y2 = est2(bw=100).fit(data, weights)()
import matplotlib.pyplot as plt
Expand Down
2 changes: 1 addition & 1 deletion KDEpy/tests/test_kernel_funcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ def test_standard_deviation_equals_one(self, fname, function):
else:
a, b = -10, 10

x = np.linspace(a, b, num=2 ** 11)
x = np.linspace(a, b, num=2**11)

# Scale so that standard deviation should be 10 instead of one
# Since 1**1 = 1, but 10**2 = 100
Expand Down
6 changes: 3 additions & 3 deletions KDEpy/tests/test_sorted_grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ def test_regression_issue_grids(self):
assert not grid_is_sorted(grid)

# More minimal example, should also fail.
grid_x = np.linspace(-2, 2, 2 ** 5)
grid_y = np.linspace(-2, 2, 2 ** 4)
grid_x = np.linspace(-2, 2, 2**5)
grid_y = np.linspace(-2, 2, 2**4)
grid = np.stack(np.meshgrid(grid_x, grid_y), -1).reshape(-1, 2)
assert not grid_is_sorted(grid)

Expand All @@ -50,7 +50,7 @@ def test_regression_issue_code(self):
import KDEpy

# Create bimodal 2D data
data = np.vstack((np.random.randn(2 ** 8, 2), np.random.randn(2 ** 8, 2) + (0, 5)))
data = np.vstack((np.random.randn(2**8, 2), np.random.randn(2**8, 2) + (0, 5)))

# Create 2D grid
grid_size = 20
Expand Down
4 changes: 1 addition & 3 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,9 +175,7 @@
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
(master_doc, "KDEpy.tex", "KDEpy Documentation", "tommyod", "manual")
]
latex_documents = [(master_doc, "KDEpy.tex", "KDEpy Documentation", "tommyod", "manual")]


# -- Options for manual page output ------------------------------------------
Expand Down
21 changes: 5 additions & 16 deletions docs/source/example_stocks_kde.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@

def weight_function(arr):
"""
Weight function for weighting samples backwards in time less.
"""
Weight function for weighting samples backwards in time less.
"""
k = 0.005
return 0.045 * np.exp(-arr * k)

Expand All @@ -42,24 +42,13 @@ def weight_function(arr):
# This is not computatationally efficient, but it's reasonably fast
# The following UNIX command combines the images to a GIF
# $ convert -delay 10 -loop 0 kde*.png stocks_animation.gif
points = (
list(range(1, 20))
+ list(range(20, 100, 2))
+ list(range(100, 300, 3))
+ list(range(300, len(stock_data), 5))
)
points = list(range(1, 20)) + list(range(20, 100, 2)) + list(range(100, 300, 3)) + list(range(300, len(stock_data), 5))
for i, num_points in enumerate(points):

fig, (ax1, ax2) = plt.subplots(
1, 2, figsize=(10, 3), gridspec_kw={"width_ratios": [3, 1]}, sharey="row"
)
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 3), gridspec_kw={"width_ratios": [3, 1]}, sharey="row")

# Create a kernel density estimate, the bandwidth is found by trial and error
x, y = (
FFTKDE(bw=0.002)
.fit(stock_data[:num_points], weights=weights[num_points - 1 :: -1])
.evaluate()
)
x, y = FFTKDE(bw=0.002).fit(stock_data[:num_points], weights=weights[num_points - 1 :: -1]).evaluate()

# The left-most plot
ax1.set_title("Stock data (IEX)")
Expand Down
27 changes: 11 additions & 16 deletions docs/source/examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def main():
data2 = np.concatenate((gen_random(n) + 1, gen_random(n) + 4), axis=1)
data = np.concatenate((data1, data2))

grid_points = 2 ** 7 # Grid points in each dimension
grid_points = 2**7 # Grid points in each dimension
N = 8 # Number of contours
x, z = FFTKDE(bw=1).fit(data)((grid_points, grid_points))
x, y = np.unique(x[:, 0]), np.unique(x[:, 1])
Expand All @@ -72,11 +72,9 @@ def main():
from matplotlib import cm

ax = plt.subplot(2, 3, 4, projection="3d")
plt.title(
"Kernels normalized in any\ndimension for any $p$-norm", fontsize=FONTSIZE
)
plt.title("Kernels normalized in any\ndimension for any $p$-norm", fontsize=FONTSIZE)
data = np.array([[0, 0]])
grid_points = 2 ** 6 # Grid points in each dimension
grid_points = 2**6 # Grid points in each dimension
x, z = FFTKDE(kernel="gaussian", bw=1, norm=2).fit(data)((grid_points, grid_points))
x, y = np.unique(x[:, 0]), np.unique(x[:, 1])
x, y = np.meshgrid(x, y)
Expand Down Expand Up @@ -117,16 +115,13 @@ def main():
plt.grid(True, ls="--", zorder=-15)

plt.subplot(2, 3, 6)
data = np.random.gamma(10, 100, size=(10 ** 6))
data = np.random.gamma(10, 100, size=(10**6))
st = time.perf_counter()
x, y = FFTKDE(kernel="gaussian", bw=100).fit(data)(2 ** 10)
x, y = FFTKDE(kernel="gaussian", bw=100).fit(data)(2**10)
timed = (time.perf_counter() - st) * 1000
plt.plot(x, y)
plt.title(
(
"One million observations on\n1024 grid"
+ " points in {} ms".format(int(round(timed, 0)))
),
("One million observations on\n1024 grid" + " points in {} ms".format(int(round(timed, 0)))),
fontsize=FONTSIZE,
)
data = np.random.choice(data, size=100, replace=False)
Expand All @@ -145,7 +140,7 @@ def main():
plt.figure(figsize=(6, 3))
##############################
np.random.seed(42)
data = norm(loc=0, scale=1).rvs(2 ** 3)
data = norm(loc=0, scale=1).rvs(2**3)
x, y = TreeKDE(kernel="gaussian", bw="silverman").fit(data).evaluate()
plt.plot(x, y, label="KDE estimate")
##############################
Expand All @@ -162,7 +157,7 @@ def main():
plt.figure(figsize=(6, 3))
##############################

data = norm(loc=0, scale=1).rvs(2 ** 6)
data = norm(loc=0, scale=1).rvs(2**6)
for bw in [0.1, "silverman", 1.5]:
x, y = FFTKDE(kernel="triweight", bw=bw).fit(data).evaluate()
plt.plot(x, y, label="KDE estimate, bw={}".format(bw))
Expand All @@ -181,7 +176,7 @@ def main():
plt.figure(figsize=(6, 3))

np.random.seed(42)
data = norm(loc=0, scale=1).rvs(2 ** 3)
data = norm(loc=0, scale=1).rvs(2**3)

for kde in [NaiveKDE, TreeKDE, FFTKDE]:
x, y = kde(kernel="gaussian", bw="silverman").fit(data).evaluate()
Expand Down Expand Up @@ -210,13 +205,13 @@ def main():

plt.scatter(x, y, label="Points")

x_interpol = np.linspace(min(x) - 1, max(x) + 1, num=2 ** 6)
x_interpol = np.linspace(min(x) - 1, max(x) + 1, num=2**6)
y_interpol = np.interp(x_interpol, x, y)

plt.plot(x_interpol, y_interpol, "--", label="Interpol")

kernel = FFTKDE._available_kernels["box"]
kernel_grid = np.linspace(-kernel.support, kernel.support, num=2 ** 6)
kernel_grid = np.linspace(-kernel.support, kernel.support, num=2**6)
bw = 0.02
kernel_weights = kernel(kernel_grid, bw=bw)
kernel_weights /= np.sum(kernel_weights)
Expand Down

0 comments on commit b1a34fd

Please sign in to comment.