Skip to content

Commit daa666e

Browse files
committed
Ignore invalid values in box/violin datasets
1 parent 15812cd commit daa666e

2 files changed

Lines changed: 46 additions & 22 deletions

File tree

proplot/axes/plot.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3480,6 +3480,7 @@ def _apply_boxplot(
34803480
kw.setdefault('positions', x)
34813481
if means:
34823482
kw['showmeans'] = kw['meanline'] = True
3483+
y = data._dist_clean(y)
34833484
artists = self._plot_native('boxplot', y, vert=vert, **kw)
34843485
artists = artists or {} # necessary?
34853486
artists = {
@@ -3592,8 +3593,10 @@ def _apply_violinplot(self, x, y, vert=True, **kwargs):
35923593
y, kw = data._dist_reduce(y, **kw)
35933594
*eb, kw = self._plot_errorbars(x, y, vert=vert, default_boxes=True, **kw)
35943595
kw.pop('labels', None) # already applied in _parse_plot1d
3595-
kw.setdefault('positions', x)
3596-
y = _not_none(kw.pop('distribution'), y) # might not have reduced the data
3596+
kw.setdefault('positions', x) # coordinates passed as keyword
3597+
if 'distribution' in kw: # i.e. was reduced
3598+
y = kw.pop('distribution')
3599+
y = data._dist_clean(y)
35973600
artists = self._plot_native(
35983601
'violinplot', y, vert=vert,
35993602
showmeans=False, showmedians=False, showextrema=False, **kw

proplot/internals/data.py

Lines changed: 41 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,23 @@ def _redirect_or_standardize(self, *args, **kwargs):
285285

286286

287287
# Stats utiltiies
288-
def _dist_reduce(y, *, mean=None, means=None, median=None, medians=None, **kwargs):
288+
def _dist_clean(distribution):
289+
"""
290+
Clean the distrubtion data for processing by `boxplot` or `violinplot`.
291+
Without this invalid values break the algorithm.
292+
"""
293+
if distribution.ndim == 1:
294+
distribution = distribution[:, None]
295+
distribution, units = _to_masked_array(distribution) # no copy needed
296+
distribution = tuple(
297+
distribution[..., i].compressed() for i in range(distribution.shape[-1])
298+
)
299+
if units is not None:
300+
distribution = tuple(dist * units for dist in distribution)
301+
return distribution
302+
303+
304+
def _dist_reduce(data, *, mean=None, means=None, median=None, medians=None, **kwargs):
289305
"""
290306
Reduce statistical distributions to means and medians. Tack on a
291307
distribution keyword argument for processing down the line.
@@ -300,25 +316,26 @@ def _dist_reduce(y, *, mean=None, means=None, median=None, medians=None, **kwarg
300316
)
301317
medians = None
302318
if means or medians:
303-
dist = y
304-
dist, units = _to_masked_array(dist)
305-
dist = dist.filled()
306-
if dist.ndim != 2:
307-
raise ValueError(f'Expected 2D array for means=True. Got {dist.ndim}D.')
319+
distribution, units = _to_masked_array(data)
320+
distribution = distribution.filled()
321+
if distribution.ndim != 2:
322+
raise ValueError(
323+
f'Expected 2D array for means=True. Got {distribution.ndim}D.'
324+
)
308325
if units is not None:
309-
dist = dist * units
326+
distribution = distribution * units
310327
if means:
311-
y = np.nanmean(dist, axis=0)
328+
data = np.nanmean(distribution, axis=0)
312329
else:
313-
y = np.nanmedian(dist, axis=0)
314-
kwargs['distribution'] = dist
330+
data = np.nanmedian(distribution, axis=0)
331+
kwargs['distribution'] = distribution
315332

316333
# Save argument passed to _error_bars
317-
return (y, kwargs)
334+
return (data, kwargs)
318335

319336

320337
def _dist_range(
321-
y, distribution, *, errdata=None, absolute=False, label=False,
338+
data, distribution, *, errdata=None, absolute=False, label=False,
322339
stds=None, pctiles=None, stds_default=None, pctiles_default=None,
323340
):
324341
"""
@@ -377,27 +394,31 @@ def _dist_range(
377394
# NOTE: Include option to pass symmetric deviation from central points
378395
if errdata is not None:
379396
# Manual error data
380-
if y.ndim != 1:
381-
raise ValueError('errdata with 2D y coordinates is not yet supported.')
397+
if data.ndim != 1:
398+
raise ValueError(
399+
"Passing both 2D data coordinates and 'errdata' is not yet supported."
400+
)
382401
label_default = 'uncertainty'
383402
err = _to_numpy_array(errdata)
384403
if (
385404
err.ndim not in (1, 2)
386-
or err.shape[-1] != y.size
405+
or err.shape[-1] != data.size
387406
or err.ndim == 2 and err.shape[0] != 2
388407
):
389-
raise ValueError(f'errdata has shape {err.shape}. Expected (2, {y.size}).')
408+
raise ValueError(
409+
f"Input 'errdata' has shape {err.shape}. Expected (2, {data.size})."
410+
)
390411
if err.ndim == 1:
391412
abserr = err
392413
err = np.empty((2, err.size))
393-
err[0, :] = y - abserr # translated back to absolute deviations below
394-
err[1, :] = y + abserr
414+
err[0, :] = data - abserr # translated back to absolute deviations below
415+
err[1, :] = data + abserr
395416
elif stds is not None:
396417
# Standard deviations
397418
# NOTE: Invalid values were handled by _dist_reduce
398419
label_default = fr'{abs(stds[1])}$\sigma$ range'
399420
stds = _to_numpy_array(stds)[:, None]
400-
err = y + stds * np.nanstd(distribution, axis=0)
421+
err = data + stds * np.nanstd(distribution, axis=0)
401422
elif pctiles is not None:
402423
# Percentiles
403424
# NOTE: Invalid values were handled by _dist_reduce
@@ -412,7 +433,7 @@ def _dist_range(
412433

413434
# Adjust error bounds
414435
if err is not None and not absolute: # for errorbar() ingestion
415-
err = err - y
436+
err = err - data
416437
err[0, :] *= -1 # absolute deviations from central points
417438

418439
# Apply legend entry

0 commit comments

Comments
 (0)