diff --git a/docs/changelog.md b/docs/changelog.md index 02110721..a446bfbd 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,5 +1,13 @@ # Changelog +## WIP + +* When plotting a histogram against a PDF or other histogram with + `plot_pull` or `plot_ratio`, normalize the histogram values to the mean bin + width to correct for irregular bin widths when using axes of type + `axis.Variable`. + [#369](https://github.com/scikit-hep/hist/pull/369) + ## Version 2.6.1 * Fall back on normal repr when histogram is too large diff --git a/src/hist/plot.py b/src/hist/plot.py index 7ca5bbd6..1fc04a81 100644 --- a/src/hist/plot.py +++ b/src/hist/plot.py @@ -330,8 +330,15 @@ def _fit_callable_to_hist( # Infer best fit model parameters and covariance matrix xdata = histogram.axes[0].centers + + # For axes with varying bin widths correct hist values with widths while + # maintaining normalization. + bin_widths = histogram.axes[0].widths + bin_width_fractions = bin_widths / np.mean(bin_widths) + h_values_width_corrected = histogram.values() / bin_width_fractions + popt, pcov = _curve_fit_wrapper( - model, xdata, histogram.values(), hist_uncert, likelihood=likelihood + model, xdata, h_values_width_corrected, hist_uncert, likelihood=likelihood ) model_values = model(xdata, *popt) @@ -366,7 +373,14 @@ def _plot_fit_result( ) hist_uncert = np.sqrt(variances) - errorbars = ax.errorbar(x_values, __hist.values(), hist_uncert, **eb_kwargs) + bin_widths = __hist.axes[0].widths + bin_width_fractions = bin_widths / np.sum(bin_widths) * len(bin_widths) + + h_values_width_corrected = __hist.values() / bin_width_fractions + hist_uncert_width_corrected = hist_uncert / bin_width_fractions + errorbars = ax.errorbar( + x_values, h_values_width_corrected, hist_uncert_width_corrected, **eb_kwargs + ) # Ensure zorder draws data points above model line_zorder = errorbars[0].get_zorder() - 1 @@ -424,7 +438,7 @@ def plot_ratio_array( ) axis_artists = RatioErrorbarArtists(central_value_artist, errorbar_artists) elif uncert_draw_type == "bar": - bar_width = (right_edge - left_edge) / len(ratio) + bar_widths = __hist.axes[0].widths bar_top = ratio + ratio_uncert[1] bar_bottom = ratio - ratio_uncert[0] @@ -439,7 +453,7 @@ def plot_ratio_array( bar_artists = ax.bar( x_values, height=bar_height, - width=bar_width, + width=bar_widths, bottom=bar_bottom, fill=False, linewidth=0, @@ -493,12 +507,12 @@ def plot_pull_array( right_edge = __hist.axes.edges[-1][-1] # Pull: plot the pulls using Matplotlib bar method - width = (right_edge - left_edge) / len(pulls) - bar_artists = ax.bar(x_values, pulls, width=width, **bar_kwargs) + bin_widths = __hist.axes[0].widths + bar_artists = ax.bar(x_values, pulls, width=bin_widths, **bar_kwargs) pp_num = pp_kwargs.pop("num", 5) patch_height = max(np.abs(pulls)) / pp_num - patch_width = width * len(pulls) + patch_width = right_edge - left_edge patch_artists = [] for i in range(pp_num): # gradient color patches @@ -609,6 +623,9 @@ def _plot_ratiolike( # Computation and Fit hist_values = self.values() + bin_widths = self.axes[0].widths + bin_width_fractions = bin_widths / np.mean(bin_widths) + hist_values_width_corrected = hist_values / bin_width_fractions main_ax_artists: MainAxisArtists # Type now due to control flow if callable(other) or isinstance(other, str): @@ -648,7 +665,8 @@ def _plot_ratiolike( ub_kwargs=ub_kwargs, ) else: - compare_values = other.values() + other_bin_width_fractions = other.axes[0].widths / np.mean(bin_widths) + compare_values = other.values() / other_bin_width_fractions self_artists = histplot(self, ax=main_ax, label=rp_kwargs["num_label"]) other_artists = histplot(other, ax=main_ax, label=rp_kwargs["denom_label"]) @@ -659,9 +677,9 @@ def _plot_ratiolike( # Compute ratios: containing no INF values with np.errstate(divide="ignore", invalid="ignore"): if view == "ratio": - ratios = hist_values / compare_values + ratios = hist_values_width_corrected / compare_values ratio_uncert = ratio_uncertainty( - num=hist_values, + num=hist_values_width_corrected, denom=compare_values, uncertainty_type=rp_kwargs["uncertainty_type"], ) @@ -672,7 +690,7 @@ def _plot_ratiolike( elif view == "pull": pulls: np.typing.NDArray[Any] = ( - hist_values - compare_values + hist_values_width_corrected - compare_values ) / hist_values_uncert pulls[np.isnan(pulls) | np.isinf(pulls)] = 0 diff --git a/tests/baseline/test_image_plot_ratio_callable_bayesian_blocks_bins.png b/tests/baseline/test_image_plot_ratio_callable_bayesian_blocks_bins.png new file mode 100644 index 00000000..982691d1 Binary files /dev/null and b/tests/baseline/test_image_plot_ratio_callable_bayesian_blocks_bins.png differ diff --git a/tests/test_plot.py b/tests/test_plot.py index cededea7..0274b83f 100644 --- a/tests/test_plot.py +++ b/tests/test_plot.py @@ -692,6 +692,26 @@ def test_image_plot_ratio_hist(): return fig +def _make_plot_ratio_callable_test_figure(hist_axis, n_numbers): + """ + Helper function to create a plot_ratio figure with a gaussian model. + + The purpose is to create tests for histograms with different hist_axes, e.g. + regular and various variable binnings. + """ + np.random.seed(42) + hist_1 = Hist(hist_axis).fill(np.random.normal(size=n_numbers)) + + def model(x, a=1 / np.sqrt(2 * np.pi), x0=0, sigma=1, offset=0): + return a * np.exp(-((x - x0) ** 2) / (2 * sigma**2)) + offset + + fig = plt.figure() + assert hist_1.plot_ratio( + model, eb_color="black", fp_color="blue", ub_color="lightblue" + ) + return fig + + @pytest.mark.mpl_image_compare(baseline_dir="baseline", savefig_kwargs={"dpi": 50}) def test_image_plot_ratio_callable(): """ @@ -699,24 +719,65 @@ def test_image_plot_ratio_callable(): `pytest --mpl-generate-path=tests/baseline` """ - np.random.seed(42) + hist_axis = axis.Regular( + 50, -5, 5, name="X", label="x [units]", underflow=False, overflow=False + ) + return _make_plot_ratio_callable_test_figure(hist_axis, 1000) - hist_1 = Hist( - axis.Regular( - 50, -5, 5, name="X", label="x [units]", underflow=False, overflow=False - ) - ).fill(np.random.normal(size=1000)) - def model(x, a=1 / np.sqrt(2 * np.pi), x0=0, sigma=1, offset=0): - return a * np.exp(-((x - x0) ** 2) / (2 * sigma**2)) + offset +@pytest.mark.mpl_image_compare( + baseline_dir="baseline", + filename="test_image_plot_ratio_callable.png", + savefig_kwargs={"dpi": 50}, +) +def test_image_plot_ratio_variable_axis_with_regular_bins(): + """ + Test plot_pull with an ``axis.Variable`` that uses regular edges. - fig = plt.figure() + The resulting image should be the same as that from + ``test_image_plot_ratio_callable`` which uses an ``axis.Regular``. + """ - assert hist_1.plot_ratio( - model, eb_color="black", fp_color="blue", ub_color="lightblue" + hist_axis = axis.Variable( + np.linspace(-5, 5, 51), + name="X", + label="x [units]", + underflow=False, + overflow=False, ) + return _make_plot_ratio_callable_test_figure(hist_axis, 1000) - return fig + +@pytest.mark.mpl_image_compare(baseline_dir="baseline", savefig_kwargs={"dpi": 50}) +def test_image_plot_ratio_callable_bayesian_blocks_bins(): + """ + Test plot_pull with variable bin widths generated using bayesian blocks. + + + Mostly like ``test_image_plot_ratio_callable``, just using a variable + binning instead of a regular one. + """ + # bin edges generated via hepstats.modeling.bayesian_blocks on 5k random-distributed numbers + bayesian_bin_edges = np.array( + [ + -3.24126734, + -3.01357225, + -2.22544451, + -1.61651199, + -1.13075189, + -0.65398704, + 0.72260413, + 1.23926518, + 1.67818774, + 2.09883056, + 2.63836271, + 3.92623771, + ] + ) + hist_axis = axis.Variable( + bayesian_bin_edges, name="X", label="x [units]", underflow=False, overflow=False + ) + return _make_plot_ratio_callable_test_figure(hist_axis, 5000) @pytest.mark.mpl_image_compare(baseline_dir="baseline", savefig_kwargs={"dpi": 50})