Skip to content

Commit

Permalink
Avoid seeing an error from statsmodels in kdeplot when data IQR == 0 (#…
Browse files Browse the repository at this point in the history
…2040)

(cherry picked from commit 09fef02)
  • Loading branch information
mwaskom committed Apr 26, 2020
1 parent 2af6cda commit 6adb36e
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 9 deletions.
2 changes: 2 additions & 0 deletions doc/releases/v0.10.1.txt
Expand Up @@ -18,6 +18,8 @@ This is minor release with bug fixes for issues identified since 0.10.0.

- Added the ``showfliers`` parameter to :func:`boxenplot` to suppress plotting of outlier data points, matching the API of :func:`boxplot`.

- Avoided seeing an error from statmodels when data with an IQR of 0 is passed to :func:`kdeplot`.

- Added the ``legend.title_fontsize`` to the :func:`plotting_context` definition.

- Several utility functions that are no longer used internally (``percentiles``, ``sig_stars``, ``pmf_hist``, and ``sort_df``) have been deprecated and marked for future removal.
20 changes: 11 additions & 9 deletions seaborn/distributions.py
Expand Up @@ -359,7 +359,16 @@ def _statsmodels_univariate_kde(data, kernel, bw, gridsize, cut, clip,
"""Compute a univariate kernel density estimate using statsmodels."""
fft = kernel == "gau"
kde = smnp.KDEUnivariate(data)
kde.fit(kernel, bw, fft, gridsize=gridsize, cut=cut, clip=clip)

try:
kde.fit(kernel, bw, fft, gridsize=gridsize, cut=cut, clip=clip)
except RuntimeError as err: # GH#1990
if stats.iqr(data) > 0:
raise err
msg = "Default bandwidth for data is 0; skipping density estimation."
warnings.warn(msg, UserWarning)
return np.array([]), np.array([])

if cumulative:
grid, y = kde.support, kde.cdf
else:
Expand All @@ -369,14 +378,7 @@ def _statsmodels_univariate_kde(data, kernel, bw, gridsize, cut, clip,

def _scipy_univariate_kde(data, bw, gridsize, cut, clip):
"""Compute a univariate kernel density estimate using scipy."""
try:
kde = stats.gaussian_kde(data, bw_method=bw)
except TypeError:
kde = stats.gaussian_kde(data)
if bw != "scott": # scipy default
msg = ("Ignoring bandwidth choice, "
"please upgrade scipy to use a different bandwidth.")
warnings.warn(msg, UserWarning)
kde = stats.gaussian_kde(data, bw_method=bw)
if isinstance(bw, str):
bw = "scotts" if bw == "scott" else bw
bw = getattr(kde, "%s_factor" % bw)() * np.std(data)
Expand Down
31 changes: 31 additions & 0 deletions seaborn/tests/test_distributions.py
Expand Up @@ -6,11 +6,19 @@
import pytest
import nose.tools as nt
import numpy.testing as npt
from distutils.version import LooseVersion

from .. import distributions as dist

_no_statsmodels = not dist._has_statsmodels

if not _no_statsmodels:
import statsmodels
import statsmodels.nonparametric as smnp
_old_statsmodels = LooseVersion(statsmodels.__version__) < "0.11"
else:
_old_statsmodels = False


class TestDistPlot(object):

Expand Down Expand Up @@ -174,6 +182,29 @@ def test_kde_singular(self):
line = ax.lines[1]
assert not line.get_xydata().size

@pytest.mark.skipif(_no_statsmodels or _old_statsmodels,
reason="no statsmodels or statsmodels without issue")
def test_statsmodels_zero_bandwidth(self):
"""Test handling of 0 bandwidth data in statsmodels."""
x = np.zeros(100)
x[0] = 1

try:

smnp.kde.bandwidths.select_bandwidth(x, "scott", "gau")

except RuntimeError:

# Only execute the actual test in the except clause, this should
# keep the test from failing in the future if statsmodels changes
# it's behavior to avoid raising the error itself.
# Track at https://github.com/statsmodels/statsmodels/issues/5419

with pytest.warns(UserWarning):
ax = dist.kdeplot(x)
line = ax.lines[0]
assert not line.get_xydata().size

@pytest.mark.parametrize("cumulative", [True, False])
def test_kdeplot_with_nans(self, cumulative):

Expand Down

0 comments on commit 6adb36e

Please sign in to comment.