Skip to content

Commit

Permalink
DOC: Rewrite the example illustrating bxp()
Browse files Browse the repository at this point in the history
Previously, the example essentially replicated illustrations of a lot
of styling variants, which have already been shown for `boxplot()`.
Compare:
- https://matplotlib.org/3.8.3/gallery/statistics/bxp.html
- https://matplotlib.org/3.8.3/gallery/statistics/boxplot.html

I believe it's not necessary to iterate through all that again. Instead,
this rewrite focuses on the relation between `boxplot_stats()`, `bxp()`
and `boxplot`. And it shows how to use `bxp()` in isolation.
  • Loading branch information
timhoffm committed Mar 22, 2024
1 parent 47c96df commit 9565049
Showing 1 changed file with 43 additions and 84 deletions.
127 changes: 43 additions & 84 deletions galleries/examples/statistics/bxp.py
Original file line number Diff line number Diff line change
@@ -1,107 +1,65 @@
"""
=======================
Boxplot drawer function
=======================
This example demonstrates how to pass pre-computed box plot
statistics to the box plot drawer. The first figure demonstrates
how to remove and add individual components (note that the
mean is the only value not shown by default). The second
figure demonstrates how the styles of the artists can
be customized.
A good general reference on boxplots and their history can be found
here: http://vita.had.co.nz/papers/boxplots.pdf
"""

import matplotlib.pyplot as plt
import numpy as np

import matplotlib.cbook as cbook
=============================================
Separate calculation and plotting of boxplots
=============================================
# fake data
np.random.seed(19680801)
data = np.random.lognormal(size=(37, 4), mean=1.5, sigma=1.75)
labels = list('ABCD')

# compute the boxplot stats
stats = cbook.boxplot_stats(data, labels=labels, bootstrap=10000)
Drawing a `~.axes.Axes.boxplot` for a given data set, consists of two main operations,
that can also be used separately:
# %%
# After we've computed the stats, we can go through and change anything.
# Just to prove it, I'll set the median of each set to the median of all
# the data, and double the means
1. Calculating the boxplot statistics: `matplotlib.cbook.boxplot_stats`
2. Drawing the boxplot: `matplotlib.axes.Axes.bxp`
for n in range(len(stats)):
stats[n]['med'] = np.median(data)
stats[n]['mean'] *= 2
Thus, ``ax.boxplot(data)`` is equivalent to ::
print(list(stats[0]))
stats = cbook.boxplot_stats(data)
ax.bxp(stats)
fs = 10 # fontsize

# %%
# Demonstrate how to toggle the display of different elements:
All styling keyword arguments are identical between `~.axes.Axes.boxplot` and
`~.axes.Axes.bxp`, and they are passed through from `~.axes.Axes.boxplot` to
`~.axes.Axes.bxp`. However, the *tick_labels* parameter of `~.axes.Axes.boxplot`
translates to a generic *labels* parameter in `.boxplot_stats`, because the labels are
data-related and attached to the returned per-dataset dictionaries.
fig, axs = plt.subplots(nrows=2, ncols=3, figsize=(6, 6), sharey=True)
axs[0, 0].bxp(stats)
axs[0, 0].set_title('Default', fontsize=fs)
The following code demonstrates the equivalence between the two methods.
axs[0, 1].bxp(stats, showmeans=True)
axs[0, 1].set_title('showmeans=True', fontsize=fs)
"""
# sphinx_gallery_thumbnail_number = 2

axs[0, 2].bxp(stats, showmeans=True, meanline=True)
axs[0, 2].set_title('showmeans=True,\nmeanline=True', fontsize=fs)
import matplotlib.pyplot as plt
import numpy as np

axs[1, 0].bxp(stats, showbox=False, showcaps=False)
tufte_title = 'Tufte Style\n(showbox=False,\nshowcaps=False)'
axs[1, 0].set_title(tufte_title, fontsize=fs)
from matplotlib import cbook

axs[1, 1].bxp(stats, shownotches=True)
axs[1, 1].set_title('notch=True', fontsize=fs)
np.random.seed(19680801)
data = np.random.randn(20, 3)

axs[1, 2].bxp(stats, showfliers=False)
axs[1, 2].set_title('showfliers=False', fontsize=fs)
fig, (ax1, ax2) = plt.subplots()

for ax in axs.flat:
ax.set_yscale('log')
ax.set_yticklabels([])
# single boxplot call
ax1.boxplot(data, tick_labels=['A', 'B', 'C'],
patch_artist=True, boxprops={'facecolor': 'bisque'})

fig.subplots_adjust(hspace=0.4)
plt.show()
# separate calculation of statistics and plotting
stats = cbook.boxplot_stats(data, labels=['A', 'B', 'C'])
ax2.bxp(stats, patch_artist=True, boxprops={'facecolor': 'bisque'})

# %%
# Demonstrate how to customize the display different elements:

boxprops = dict(linestyle='--', linewidth=3, color='darkgoldenrod')
flierprops = dict(marker='o', markerfacecolor='green', markersize=12,
linestyle='none')
medianprops = dict(linestyle='-.', linewidth=2.5, color='firebrick')
meanpointprops = dict(marker='D', markeredgecolor='black',
markerfacecolor='firebrick')
meanlineprops = dict(linestyle='--', linewidth=2.5, color='purple')

fig, axs = plt.subplots(nrows=2, ncols=2, figsize=(6, 6), sharey=True)
axs[0, 0].bxp(stats, boxprops=boxprops)
axs[0, 0].set_title('Custom boxprops', fontsize=fs)

axs[0, 1].bxp(stats, flierprops=flierprops, medianprops=medianprops)
axs[0, 1].set_title('Custom medianprops\nand flierprops', fontsize=fs)
# Using the separate functions allows to pre-calculate statistics, in case you need
# them explicitly for other purposes, or to reuse the statistics for multiple plots.
#
# Conversely, you can also use the `~.axes.Axes.bxp` function directly, if you already
# have the statistical parameters:

axs[1, 0].bxp(stats, meanprops=meanpointprops, meanline=False,
showmeans=True)
axs[1, 0].set_title('Custom mean\nas point', fontsize=fs)
fig, ax = plt.subplots()

axs[1, 1].bxp(stats, meanprops=meanlineprops, meanline=True,
showmeans=True)
axs[1, 1].set_title('Custom mean\nas line', fontsize=fs)
stats = [
dict(med=0, q1=-1, q3=1, whislo=-2, whishi=2, fliers=[-4, -3, 3, 4], label='A'),
dict(med=0, q1=-2, q3=2, whislo=-3, whishi=3, fliers=[], label='B'),
dict(med=0, q1=-3, q3=3, whislo=-4, whishi=4, fliers=[], label='C'),
]

for ax in axs.flat:
ax.set_yscale('log')
ax.set_yticklabels([])
ax.bxp(stats, patch_artist=True, boxprops={'facecolor': 'bisque'})

fig.suptitle("I never said they'd be pretty")
fig.subplots_adjust(hspace=0.4)
plt.show()

# %%
Expand All @@ -112,4 +70,5 @@
# in this example:
#
# - `matplotlib.axes.Axes.bxp`
# - `matplotlib.axes.Axes.boxplot`
# - `matplotlib.cbook.boxplot_stats`

0 comments on commit 9565049

Please sign in to comment.