Skip to content

Commit

Permalink
Merge pull request #13 from tommyod/black
Browse files Browse the repository at this point in the history
Ran black formatter on all files
  • Loading branch information
tommyod committed Feb 11, 2019
2 parents a49bfc1 + 406b0dc commit be89879
Show file tree
Hide file tree
Showing 23 changed files with 871 additions and 768 deletions.
3 changes: 1 addition & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,7 @@ install:

# Linting
before_script:
- $PYTHON -m flake8 --show-source --ignore=F811,W293,W391,W292,W291,W504
--max-line-length=79 --exclude="*examples.py,testing.py,*kde.py" KDEpy
- $PYTHON -m flake8 --show-source --ignore=F811,W293,W391,W292,W291,W504,W503 --max-line-length=88 --exclude="*examples.py,testing.py,*kde.py" KDEpy

# If a tag was used, build wheels and deply to pypi
script:
Expand Down
30 changes: 16 additions & 14 deletions KDEpy/BaseKDE.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def __init__(self, kernel: str, bw: float):

# Verify that the choice of kernel is valid, and set the function
akernels = sorted(list(self._available_kernels.keys()))
msg = 'Kernel must be a string or callable. Opts: {}'.format(akernels)
msg = "Kernel must be a string or callable. Opts: {}".format(akernels)
if isinstance(kernel, str):
kernel = kernel.strip().lower()
if kernel not in akernels:
Expand All @@ -61,23 +61,24 @@ def __init__(self, kernel: str, bw: float):

# The `bw` paramter may either be a positive number, a string, or
# array-like such that each point in the data has a uniue bw
if (isinstance(bw, numbers.Number) and bw > 0):
if isinstance(bw, numbers.Number) and bw > 0:
self.bw = bw
elif isinstance(bw, str):
amethods = sorted(list(self._bw_methods.keys()))
if bw.lower() not in set(m.lower() for m in amethods):
msg = 'bw not recognized. Options are: {}'.format(amethods)
msg = "bw not recognized. Options are: {}".format(amethods)
raise ValueError(msg)
self.bw = self._bw_methods[bw]
elif isinstance(bw, (np.ndarray, Sequence)):
self.bw = bw
else:
raise ValueError('Bandwidth must be > 0, array-like or a string.')
raise ValueError("Bandwidth must be > 0, array-like or a string.")

# Test quickly that the method has done what is was supposed to do
assert callable(self.kernel)
assert (isinstance(self.bw, (np.ndarray, Sequence, numbers.Number)) or
callable(self.bw))
assert isinstance(self.bw, (np.ndarray, Sequence, numbers.Number)) or callable(
self.bw
)

@abstractmethod
def fit(self, data, weights=None):
Expand All @@ -103,7 +104,7 @@ def fit(self, data, weights=None):
obs, dims = data.shape

if not obs > 0:
raise ValueError('Data must contain at least one data point.')
raise ValueError("Data must contain at least one data point.")
assert dims > 0
self.data = data

Expand All @@ -112,7 +113,7 @@ def fit(self, data, weights=None):
self.weights = self._process_sequence(weights).ravel()
self.weights = self.weights / np.sum(self.weights)
if not obs == len(self.weights):
raise ValueError('Number of data obs must match weights')
raise ValueError("Number of data obs must match weights")
else:
self.weights = weights

Expand All @@ -136,8 +137,8 @@ def evaluate(self, grid_points=None, bw_to_scalar=True):
dimension. If a tuple, the number of grid points in each
dimension. If array-like, grid points of shape (obs, dims).
"""
if not hasattr(self, 'data'):
raise ValueError('Must call fit before evaluating.')
if not hasattr(self, "data"):
raise ValueError("Must call fit before evaluating.")

# -------------- Set up the bandwidth depending on inputs -------------
if isinstance(self.bw, (np.ndarray, Sequence)):
Expand Down Expand Up @@ -166,7 +167,7 @@ def evaluate(self, grid_points=None, bw_to_scalar=True):

obs, dims = grid_points.shape
if not obs > 0:
raise ValueError('Grid must contain at least one data point.')
raise ValueError("Grid must contain at least one data point.")
self.grid_points = grid_points

# Test quickly that the method has done what is was supposed to do
Expand Down Expand Up @@ -200,9 +201,9 @@ def _process_sequence(sequence_array_like):
elif len(sequence_array_like.shape) == 2:
out = sequence_array_like
else:
raise ValueError('Must be of shape (obs, dims)')
raise ValueError("Must be of shape (obs, dims)")
else:
raise TypeError('Must be of shape (obs, dims)')
raise TypeError("Must be of shape (obs, dims)")
return np.asarray_chkfinite(out, dtype=np.float)

def _evalate_return_logic(self, evaluated, grid_points):
Expand All @@ -225,5 +226,6 @@ def __call__(self, *args, **kwargs):

if __name__ == "__main__":
import pytest

# --durations=10 <- May be used to show potentially slow tests
pytest.main(args=['.', '--doctest-modules', '-v'])
pytest.main(args=[".", "--doctest-modules", "-v"])
36 changes: 19 additions & 17 deletions KDEpy/FFTKDE.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ class FFTKDE(BaseKDE):
"""

def __init__(self, kernel='gaussian', bw=1, norm=2):
def __init__(self, kernel="gaussian", bw=1, norm=2):
self.norm = norm
super().__init__(kernel, bw)
assert isinstance(self.norm, numbers.Number) and self.norm > 0
Expand Down Expand Up @@ -138,29 +138,31 @@ def evaluate(self, grid_points=None):
elif isinstance(self.bw, numbers.Number) and self.bw > 0:
bw = self.bw
else:
raise ValueError('The bw must be a callable or a number.')
raise ValueError("The bw must be a callable or a number.")
self.bw = bw

# Step 0 - Make sure data points are inside of the grid
min_grid = np.min(self.grid_points, axis=0)
max_grid = np.max(self.grid_points, axis=0)

min_data = np.min(self.data, axis=0)
max_data = np.max(self.data, axis=0)
if not ((min_grid < min_data).all() and (max_grid > max_data).all()):
raise ValueError('Every data point must be inside of the grid.')
raise ValueError("Every data point must be inside of the grid.")

# Step 1 - Obtaining the grid counts
# TODO: Consider moving this to the fitting phase instead
data = linear_binning(self.data, grid_points=self.grid_points,
weights=self.weights)
data = linear_binning(
self.data, grid_points=self.grid_points, weights=self.weights
)

# Step 2 - Computing kernel weights
g_shape = self.grid_points.shape[1]
num_grid_points = np.array(list(len(np.unique(self.grid_points[:, i]))
for i in range(g_shape)))
num_grid_points = np.array(
list(len(np.unique(self.grid_points[:, i])) for i in range(g_shape))
)

num_intervals = (num_grid_points - 1)
num_intervals = num_grid_points - 1
dx = (max_grid - min_grid) / num_intervals

# Find the real bandwidth, the support times the desired bw factor
Expand All @@ -180,8 +182,7 @@ def evaluate(self, grid_points=None):
assert (dx * L <= real_bw).all()

# Evaluate the kernel once
grids = [np.linspace(-dx * L, dx * L, int(L * 2 + 1)) for (dx, L)
in zip(dx, L)]
grids = [np.linspace(-dx * L, dx * L, int(L * 2 + 1)) for (dx, L) in zip(dx, L)]
kernel_grid = cartesian(grids)
kernel_weights = self.kernel(kernel_grid, bw=self.bw, norm=self.norm)

Expand All @@ -192,17 +193,18 @@ def evaluate(self, grid_points=None):
# Step 3 - Performing the convolution

# The following code block surpressed the warning:
# anaconda3/lib/python3.6/site-packages/mkl_fft/_numpy_fft.py:
# FutureWarning: Using a non-tuple sequence for multidimensional ...
# output = mkl_fft.rfftn_numpy(a, s, axes)
# anaconda3/lib/python3.6/site-packages/mkl_fft/_numpy_fft.py:
# FutureWarning: Using a non-tuple sequence for multidimensional ...
# output = mkl_fft.rfftn_numpy(a, s, axes)
with warnings.catch_warnings():
warnings.simplefilter("ignore")
ans = convolve(data, kernel_weights, mode='same').reshape(-1, 1)
ans = convolve(data, kernel_weights, mode="same").reshape(-1, 1)

return self._evalate_return_logic(ans, self.grid_points)


if __name__ == "__main__":
import pytest

# --durations=10 <- May be used to show potentially slow tests
pytest.main(args=['.', '--doctest-modules', '-v', '--capture=sys'])
pytest.main(args=[".", "--doctest-modules", "-v", "--capture=sys"])
12 changes: 8 additions & 4 deletions KDEpy/NaiveKDE.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ class NaiveKDE(BaseKDE):
- Scipy implementation, at ``scipy.stats.gaussian_kde``.
"""

def __init__(self, kernel='gaussian', bw=1, norm=2):
def __init__(self, kernel="gaussian", bw=1, norm=2):
super().__init__(kernel, bw)
self.norm = norm

Expand Down Expand Up @@ -127,8 +127,11 @@ def evaluate(self, grid_points=None):

# TODO: Implementation w.r.t grid points for faster evaluation
# See the SciPy evaluation for how this can be done
weights = (itertools.repeat(1 / self.data.shape[0]) if self.weights
is None else self.weights)
weights = (
itertools.repeat(1 / self.data.shape[0])
if self.weights is None
else self.weights
)

for weight, data_point, bw in zip(weights, self.data, bw):
x = self.grid_points - data_point
Expand All @@ -139,5 +142,6 @@ def evaluate(self, grid_points=None):

if __name__ == "__main__":
import pytest

# --durations=10 <- May be used to show potentially slow tests
pytest.main(args=['.', '--doctest-modules', '-v'])
pytest.main(args=[".", "--doctest-modules", "-v"])
12 changes: 7 additions & 5 deletions KDEpy/TreeKDE.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ class TreeKDE(BaseKDE):
- Scipy implementation, at ``scipy.spatial.KDTree``.
"""

def __init__(self, kernel='gaussian', bw=1, norm=2.):
def __init__(self, kernel="gaussian", bw=1, norm=2.0):
super().__init__(kernel, bw)
self.norm = norm

Expand Down Expand Up @@ -146,7 +146,7 @@ def evaluate(self, grid_points=None, eps=10e-4):
# Compute the kernel radius
maximal_bw = np.max(bw)
if not eps > 0:
raise ValueError('eps must be > 0.')
raise ValueError("eps must be > 0.")
kernel_radius = self.kernel.practical_support(maximal_bw, eps)

# Since we iterate through grid points, we need the maximum bw to
Expand All @@ -155,8 +155,9 @@ def evaluate(self, grid_points=None, eps=10e-4):

# Query for data points that are close to this grid point
# TODO: Is this epsilon value sensible?
indices = tree.query_ball_point(x=grid_point, r=kernel_radius,
p=self.norm, eps=eps * obs**0.5)
indices = tree.query_ball_point(
x=grid_point, r=kernel_radius, p=self.norm, eps=eps * obs ** 0.5
)

# Use broadcasting to find x-values (distances)
x = grid_point - self.data[indices]
Expand All @@ -178,5 +179,6 @@ def evaluate(self, grid_points=None, eps=10e-4):

if __name__ == "__main__":
import pytest

# --durations=10 <- May be used to show potentially slow tests
pytest.main(args=['.', '--doctest-modules', '-v'])
pytest.main(args=[".", "--doctest-modules", "-v"])
4 changes: 2 additions & 2 deletions KDEpy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
from KDEpy.FFTKDE import FFTKDE

# If this is incremented, also increment in setup.py
__version__ = '0.6.9'
__author__ = 'tommyod'
__version__ = "0.6.9"
__author__ = "tommyod"

TreeKDE = TreeKDE
NaiveKDE = NaiveKDE
Expand Down

0 comments on commit be89879

Please sign in to comment.