Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow for irregular bin widths in hist.plot.plot_pull_array #369

Open
wants to merge 17 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# Changelog

## WIP

* When plotting a histogram against a PDF or other histogram with
`plot_pull` or `plot_ratio`, normalize the histogram values to the mean bin
width to correct for irregular bin widths when using axes of type
`axis.Variable`.
[#369](https://github.com/scikit-hep/hist/pull/369)

## Version 2.6.1

* Fall back on normal repr when histogram is too large
Expand Down
40 changes: 29 additions & 11 deletions src/hist/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,8 +330,15 @@ def _fit_callable_to_hist(

# Infer best fit model parameters and covariance matrix
xdata = histogram.axes[0].centers

# For axes with varying bin widths correct hist values with widths while
# maintaining normalization.
bin_widths = histogram.axes[0].widths
bin_width_fractions = bin_widths / np.mean(bin_widths)
h_values_width_corrected = histogram.values() / bin_width_fractions

popt, pcov = _curve_fit_wrapper(
model, xdata, histogram.values(), hist_uncert, likelihood=likelihood
model, xdata, h_values_width_corrected, hist_uncert, likelihood=likelihood
)
model_values = model(xdata, *popt)

Expand Down Expand Up @@ -366,7 +373,14 @@ def _plot_fit_result(
)
hist_uncert = np.sqrt(variances)

errorbars = ax.errorbar(x_values, __hist.values(), hist_uncert, **eb_kwargs)
bin_widths = __hist.axes[0].widths
bin_width_fractions = bin_widths / np.sum(bin_widths) * len(bin_widths)

h_values_width_corrected = __hist.values() / bin_width_fractions
hist_uncert_width_corrected = hist_uncert / bin_width_fractions
errorbars = ax.errorbar(
x_values, h_values_width_corrected, hist_uncert_width_corrected, **eb_kwargs
)

# Ensure zorder draws data points above model
line_zorder = errorbars[0].get_zorder() - 1
Expand Down Expand Up @@ -424,7 +438,7 @@ def plot_ratio_array(
)
axis_artists = RatioErrorbarArtists(central_value_artist, errorbar_artists)
elif uncert_draw_type == "bar":
bar_width = (right_edge - left_edge) / len(ratio)
bar_widths = __hist.axes[0].widths

bar_top = ratio + ratio_uncert[1]
bar_bottom = ratio - ratio_uncert[0]
Expand All @@ -439,7 +453,7 @@ def plot_ratio_array(
bar_artists = ax.bar(
x_values,
height=bar_height,
width=bar_width,
width=bar_widths,
bottom=bar_bottom,
fill=False,
linewidth=0,
Expand Down Expand Up @@ -493,12 +507,12 @@ def plot_pull_array(
right_edge = __hist.axes.edges[-1][-1]

# Pull: plot the pulls using Matplotlib bar method
width = (right_edge - left_edge) / len(pulls)
bar_artists = ax.bar(x_values, pulls, width=width, **bar_kwargs)
bin_widths = __hist.axes[0].widths
bar_artists = ax.bar(x_values, pulls, width=bin_widths, **bar_kwargs)

pp_num = pp_kwargs.pop("num", 5)
patch_height = max(np.abs(pulls)) / pp_num
patch_width = width * len(pulls)
patch_width = right_edge - left_edge
patch_artists = []
for i in range(pp_num):
# gradient color patches
Expand Down Expand Up @@ -609,6 +623,9 @@ def _plot_ratiolike(

# Computation and Fit
hist_values = self.values()
bin_widths = self.axes[0].widths
bin_width_fractions = bin_widths / np.mean(bin_widths)
hist_values_width_corrected = hist_values / bin_width_fractions

main_ax_artists: MainAxisArtists # Type now due to control flow
if callable(other) or isinstance(other, str):
Expand Down Expand Up @@ -648,7 +665,8 @@ def _plot_ratiolike(
ub_kwargs=ub_kwargs,
)
else:
compare_values = other.values()
other_bin_width_fractions = other.axes[0].widths / np.mean(bin_widths)
compare_values = other.values() / other_bin_width_fractions

self_artists = histplot(self, ax=main_ax, label=rp_kwargs["num_label"])
other_artists = histplot(other, ax=main_ax, label=rp_kwargs["denom_label"])
Expand All @@ -659,9 +677,9 @@ def _plot_ratiolike(
# Compute ratios: containing no INF values
with np.errstate(divide="ignore", invalid="ignore"):
if view == "ratio":
ratios = hist_values / compare_values
ratios = hist_values_width_corrected / compare_values
ratio_uncert = ratio_uncertainty(
num=hist_values,
num=hist_values_width_corrected,
denom=compare_values,
uncertainty_type=rp_kwargs["uncertainty_type"],
)
Expand All @@ -672,7 +690,7 @@ def _plot_ratiolike(

elif view == "pull":
pulls: np.typing.NDArray[Any] = (
hist_values - compare_values
hist_values_width_corrected - compare_values
) / hist_values_uncert
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Btw, hist_values_uncert seems unbound if other is not a callable or string but a histogram. This is not something I introduced in this PR so if I'm not mistaken this might warrant a separate issue.

The question would be whether we just want to use np.sqrt(self.variances()) or in case other also has variances, if we want to statistically combine both somehow 🤷‍♂️


pulls[np.isnan(pulls) | np.isinf(pulls)] = 0
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
85 changes: 73 additions & 12 deletions tests/test_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -692,31 +692,92 @@ def test_image_plot_ratio_hist():
return fig


def _make_plot_ratio_callable_test_figure(hist_axis, n_numbers):
"""
Helper function to create a plot_ratio figure with a gaussian model.

The purpose is to create tests for histograms with different hist_axes, e.g.
regular and various variable binnings.
"""
np.random.seed(42)
hist_1 = Hist(hist_axis).fill(np.random.normal(size=n_numbers))

def model(x, a=1 / np.sqrt(2 * np.pi), x0=0, sigma=1, offset=0):
return a * np.exp(-((x - x0) ** 2) / (2 * sigma**2)) + offset

fig = plt.figure()
assert hist_1.plot_ratio(
model, eb_color="black", fp_color="blue", ub_color="lightblue"
)
return fig


@pytest.mark.mpl_image_compare(baseline_dir="baseline", savefig_kwargs={"dpi": 50})
def test_image_plot_ratio_callable():
"""
Test plot_pull by comparing against a reference image generated via
`pytest --mpl-generate-path=tests/baseline`
"""

np.random.seed(42)
hist_axis = axis.Regular(
50, -5, 5, name="X", label="x [units]", underflow=False, overflow=False
)
return _make_plot_ratio_callable_test_figure(hist_axis, 1000)

hist_1 = Hist(
axis.Regular(
50, -5, 5, name="X", label="x [units]", underflow=False, overflow=False
)
).fill(np.random.normal(size=1000))

def model(x, a=1 / np.sqrt(2 * np.pi), x0=0, sigma=1, offset=0):
return a * np.exp(-((x - x0) ** 2) / (2 * sigma**2)) + offset
@pytest.mark.mpl_image_compare(
baseline_dir="baseline",
filename="test_image_plot_ratio_callable.png",
savefig_kwargs={"dpi": 50},
)
def test_image_plot_ratio_variable_axis_with_regular_bins():
"""
Test plot_pull with an ``axis.Variable`` that uses regular edges.

fig = plt.figure()
The resulting image should be the same as that from
``test_image_plot_ratio_callable`` which uses an ``axis.Regular``.
"""

assert hist_1.plot_ratio(
model, eb_color="black", fp_color="blue", ub_color="lightblue"
hist_axis = axis.Variable(
np.linspace(-5, 5, 51),
name="X",
label="x [units]",
underflow=False,
overflow=False,
)
return _make_plot_ratio_callable_test_figure(hist_axis, 1000)

return fig

@pytest.mark.mpl_image_compare(baseline_dir="baseline", savefig_kwargs={"dpi": 50})
def test_image_plot_ratio_callable_bayesian_blocks_bins():
"""
Test plot_pull with variable bin widths generated using bayesian blocks.


Mostly like ``test_image_plot_ratio_callable``, just using a variable
binning instead of a regular one.
"""
# bin edges generated via hepstats.modeling.bayesian_blocks on 5k random-distributed numbers
bayesian_bin_edges = np.array(
[
-3.24126734,
-3.01357225,
-2.22544451,
-1.61651199,
-1.13075189,
-0.65398704,
0.72260413,
1.23926518,
1.67818774,
2.09883056,
2.63836271,
3.92623771,
]
)
hist_axis = axis.Variable(
bayesian_bin_edges, name="X", label="x [units]", underflow=False, overflow=False
)
return _make_plot_ratio_callable_test_figure(hist_axis, 5000)


@pytest.mark.mpl_image_compare(baseline_dir="baseline", savefig_kwargs={"dpi": 50})
Expand Down