Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/tommyod/KDEpy
Browse files Browse the repository at this point in the history
  • Loading branch information
tommyod committed Feb 23, 2019
2 parents 637761e + d0c6f22 commit 286ea0b
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 2 deletions.
29 changes: 27 additions & 2 deletions KDEpy/bw_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"""
import numpy as np
import scipy
import warnings
from KDEpy.binning import linear_binning
from KDEpy.utils import autogrid
from scipy import fftpack
Expand Down Expand Up @@ -249,11 +250,35 @@ def silvermans_rule(data):
raise ValueError("Data must be of length > 0.")

sigma = np.std(data, ddof=1)
# scipy.norm.ppf(.75) - scipy.norm.ppf(.25) -> 1.3489795003921634
# scipy.stats.norm.ppf(.75) - scipy.stats.norm.ppf(.25) -> 1.3489795003921634
IQR = (np.percentile(data, q=75) - np.percentile(data, q=25)) / 1.3489795003921634

sigma = min(sigma, IQR)
return sigma * (obs * 3 / 4.0) ** (-1 / 5)

# The logic below is not related to silverman's rule, but if the data is constant
# it's nice to return a value instead of getting an error. A warning will be raised.
if sigma > 0:
return sigma * (obs * 3 / 4.0) ** (-1 / 5)
else:
# stats.norm.ppf(.99) - stats.norm.ppf(.01) = 4.6526957480816815
IQR = (
np.percentile(data, q=99) - np.percentile(data, q=1)
) / 4.6526957480816815
if IQR > 0:
bw = IQR * (obs * 3 / 4.0) ** (-1 / 5)
warnings.warn(
"Silverman's rule failed. Too many idential values. \
Setting bw = {}".format(
bw
)
)
return bw

# Here, all values are basically constant
warnings.warn(
"Silverman's rule failed. Too many idential values. Setting bw = 1.0"
)
return 1.0


_bw_methods = {
Expand Down
23 changes: 23 additions & 0 deletions KDEpy/tests/test_NaiveKDE.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,29 @@ def test_against_scipy_density(bw, n, expected_result):
assert np.allclose(y, expected_result)


def test_constant_values_silverman():
"""
Test that a data set with constant values does not fail when using silverman's rule.
Tests with "almost" constant values should also get a bw assigned automatically,
although silverman's rule technically does not do this.
https://github.com/tommyod/KDEpy/issues/9
"""

data = np.ones(100, dtype=float)
kde = NaiveKDE(bw="silverman").fit(data)
with pytest.warns(UserWarning):
kde.evaluate()
assert np.isclose(kde.bw, 1.0)

data = np.ones(1000, dtype=float)
data[0] = 0.0
data[999] = 2.0
kde = NaiveKDE(bw="silverman").fit(data)
with pytest.warns(UserWarning):
kde.evaluate()


if __name__ == "__main__":
# --durations=10 <- May be used to show potentially slow tests
pytest.main(args=[".", "--doctest-modules", "-v"])

0 comments on commit 286ea0b

Please sign in to comment.