Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: Allow custom bandwidth functions in KDEUnivariate fit #6997

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
43 changes: 31 additions & 12 deletions statsmodels/nonparametric/kde.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def fit(self, kernel="gau", bw="normal_reference", fft=True, weights=None,
- "triw" for triweight
- "uni" for uniform

bw : str, float
bw : str, float, callable
The bandwidth to use. Choices are:

- "scott" - 1.059 * A * nobs ** (-1/5.), where A is
Expand All @@ -106,6 +106,10 @@ def fit(self, kernel="gau", bw="normal_reference", fft=True, weights=None,
calculated from the kernel. Equivalent (up to 2 dp) to the
"scott" bandwidth for gaussian kernels. See bandwidths.py
- If a float is given, it is the bandwidth.
- If a callable is given, it's return value is used.
The callable should take exactly two parameters ie. fn(X, kern)
X - the clipped input data
kern - the kernel instance used

fft : bool
Whether or not to use FFT. FFT implementation is more
Expand Down Expand Up @@ -281,10 +285,14 @@ def kdensity(X, kernel="gau", bw="normal_reference", weights=None, gridsize=None
- "tri" for triangular
- "triw" for triweight
- "uni" for uniform
bw : str, float
bw : str, float, callable
"scott" - 1.059 * A * nobs ** (-1/5.), where A is min(std(X),IQR/1.34)
"silverman" - .9 * A * nobs ** (-1/5.), where A is min(std(X),IQR/1.34)
If a float is given, it is the bandwidth.
If a callable is given, it's return value is used.
The callable should take exactly two parameters ie. fn(X, kern)
X - the clipped input data
kern - the kernel instance used
weights : array or None
Optional weights. If the X value is clipped, then this weight is
also dropped.
Expand Down Expand Up @@ -342,11 +350,14 @@ def kdensity(X, kernel="gau", bw="normal_reference", weights=None, gridsize=None
# Get kernel object corresponding to selection
kern = kernel_switch[kernel]()

# if bw is None, select optimal bandwidth for kernel
try:
bw = float(bw)
except:
bw = bandwidths.select_bandwidth(X, bw, kern)
if callable(bw):
bw = float(bw(X, kern)) # user passed a callable custom bandwidth function
else:
# if bw is None, select optimal bandwidth for kernel
try:
bw = float(bw)
except:
bw = bandwidths.select_bandwidth(X, bw, kern) # will cross-val fit this pattern?
bw *= adjust

a = np.min(X, axis=0) - cut * bw
Expand Down Expand Up @@ -395,10 +406,14 @@ def kdensityfft(X, kernel="gau", bw="normal_reference", weights=None, gridsize=N
"par" for Parzen
"rect" for rectangular
"tri" for triangular
bw : str, float
bw : str, float, callable
"scott" - 1.059 * A * nobs ** (-1/5.), where A is min(std(X),IQR/1.34)
"silverman" - .9 * A * nobs ** (-1/5.), where A is min(std(X),IQR/1.34)
If a float is given, it is the bandwidth.
If a callable is given, it's return value is used.
The callable should take exactly two parameters ie. fn(X, kern)
X - the clipped input data
kern - the kernel instance used
weights : array or None
WEIGHTS ARE NOT CURRENTLY IMPLEMENTED.
Optional weights. If the X value is clipped, then this weight is
Expand Down Expand Up @@ -453,10 +468,14 @@ def kdensityfft(X, kernel="gau", bw="normal_reference", weights=None, gridsize=N
# Get kernel object corresponding to selection
kern = kernel_switch[kernel]()

try:
bw = float(bw)
except:
bw = bandwidths.select_bandwidth(X, bw, kern) # will cross-val fit this pattern?
if callable(bw):
bw = float(bw(X, kern)) # user passed a callable custom bandwidth function
else:
# if bw is None, select optimal bandwidth for kernel
try:
bw = float(bw)
except:
bw = bandwidths.select_bandwidth(X, bw, kern) # will cross-val fit this pattern?
bw *= adjust

nobs = len(X) # after trim
Expand Down
28 changes: 28 additions & 0 deletions statsmodels/nonparametric/tests/test_kde.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from statsmodels.distributions.mixture_rvs import mixture_rvs
from statsmodels.nonparametric.kde import KDEUnivariate as KDE
import statsmodels.sandbox.nonparametric.kernels as kernels
import statsmodels.nonparametric.bandwidths as bandwidths

# get results from Stata

Expand Down Expand Up @@ -348,3 +349,30 @@ def test_fit_self(reset_randomstate):
kde = KDE(x)
assert isinstance(kde, KDE)
assert isinstance(kde.fit(), KDE)


class TestKDECustomBandwidth(object):
decimal_density = 7

@classmethod
def setup_class(cls):
cls.kde = KDE(Xi)
cls.weights_200 = np.linspace(1, 100, 200)
cls.weights_100 = np.linspace(1, 100, 100)

def test_check_is_fit_ok_with_custom_bandwidth(self):
dbivolaru marked this conversation as resolved.
Show resolved Hide resolved
def custom_bw(X, kern):
return np.std(X) * len(X)
kde = self.kde.fit(bw=custom_bw)
assert isinstance(kde, KDE)

def test_check_is_fit_ok_with_standard_custom_bandwidth(self):
# Note, we are passing the function, not the string - this is intended
kde = self.kde.fit(bw=bandwidths.bw_silverman)
s1 = kde.support.copy()
d1 = kde.density.copy()

kde = self.kde.fit(bw='silverman')

npt.assert_almost_equal(s1, kde.support, self.decimal_density)
npt.assert_almost_equal(d1, kde.density, self.decimal_density)