scikit-hep · meliache · Jan 27, 2022 · Feb 15, 2022 · Feb 15, 2022 · Feb 16, 2022
diff --git a/docs/changelog.md b/docs/changelog.md
@@ -1,5 +1,13 @@
 # Changelog
 
+## WIP
+
+* When plotting a histogram against a PDF or other histogram with
+  `plot_pull` or `plot_ratio`, normalize the histogram values to the mean bin
+  width to correct for irregular bin widths when using axes of type
+  `axis.Variable`.
+  [#369](https://github.com/scikit-hep/hist/pull/369)
+
 ## Version 2.6.1
 
 * Fall back on normal repr when histogram is too large

diff --git a/src/hist/plot.py b/src/hist/plot.py
@@ -330,8 +330,15 @@ def _fit_callable_to_hist(
 
     # Infer best fit model parameters and covariance matrix
     xdata = histogram.axes[0].centers
+
+    # For axes with varying bin widths correct hist values with widths while
+    # maintaining normalization.
+    bin_widths = histogram.axes[0].widths
+    bin_width_fractions = bin_widths / np.mean(bin_widths)
+    h_values_width_corrected = histogram.values() / bin_width_fractions
+
     popt, pcov = _curve_fit_wrapper(
-        model, xdata, histogram.values(), hist_uncert, likelihood=likelihood
+        model, xdata, h_values_width_corrected, hist_uncert, likelihood=likelihood
     )
     model_values = model(xdata, *popt)
 
@@ -366,7 +373,14 @@ def _plot_fit_result(
         )
     hist_uncert = np.sqrt(variances)
 
-    errorbars = ax.errorbar(x_values, __hist.values(), hist_uncert, **eb_kwargs)
+    bin_widths = __hist.axes[0].widths
+    bin_width_fractions = bin_widths / np.sum(bin_widths) * len(bin_widths)
+
+    h_values_width_corrected = __hist.values() / bin_width_fractions
+    hist_uncert_width_corrected = hist_uncert / bin_width_fractions
+    errorbars = ax.errorbar(
+        x_values, h_values_width_corrected, hist_uncert_width_corrected, **eb_kwargs
+    )
 
     # Ensure zorder draws data points above model
     line_zorder = errorbars[0].get_zorder() - 1
@@ -424,7 +438,7 @@ def plot_ratio_array(
         )
         axis_artists = RatioErrorbarArtists(central_value_artist, errorbar_artists)
     elif uncert_draw_type == "bar":
-        bar_width = (right_edge - left_edge) / len(ratio)
+        bar_widths = __hist.axes[0].widths
 
         bar_top = ratio + ratio_uncert[1]
         bar_bottom = ratio - ratio_uncert[0]
@@ -439,7 +453,7 @@ def plot_ratio_array(
         bar_artists = ax.bar(
             x_values,
             height=bar_height,
-            width=bar_width,
+            width=bar_widths,
             bottom=bar_bottom,
             fill=False,
             linewidth=0,
@@ -493,12 +507,12 @@ def plot_pull_array(
     right_edge = __hist.axes.edges[-1][-1]
 
     # Pull: plot the pulls using Matplotlib bar method
-    width = (right_edge - left_edge) / len(pulls)
-    bar_artists = ax.bar(x_values, pulls, width=width, **bar_kwargs)
+    bin_widths = __hist.axes[0].widths
+    bar_artists = ax.bar(x_values, pulls, width=bin_widths, **bar_kwargs)
 
     pp_num = pp_kwargs.pop("num", 5)
     patch_height = max(np.abs(pulls)) / pp_num
-    patch_width = width * len(pulls)
+    patch_width = right_edge - left_edge
     patch_artists = []
     for i in range(pp_num):
         # gradient color patches
@@ -609,6 +623,9 @@ def _plot_ratiolike(
 
     # Computation and Fit
     hist_values = self.values()
+    bin_widths = self.axes[0].widths
+    bin_width_fractions = bin_widths / np.mean(bin_widths)
+    hist_values_width_corrected = hist_values / bin_width_fractions
 
     main_ax_artists: MainAxisArtists  # Type now due to control flow
     if callable(other) or isinstance(other, str):
@@ -648,7 +665,8 @@ def _plot_ratiolike(
             ub_kwargs=ub_kwargs,
         )
     else:
-        compare_values = other.values()
+        other_bin_width_fractions = other.axes[0].widths / np.mean(bin_widths)
+        compare_values = other.values() / other_bin_width_fractions
 
         self_artists = histplot(self, ax=main_ax, label=rp_kwargs["num_label"])
         other_artists = histplot(other, ax=main_ax, label=rp_kwargs["denom_label"])
@@ -659,9 +677,9 @@ def _plot_ratiolike(
     # Compute ratios: containing no INF values
     with np.errstate(divide="ignore", invalid="ignore"):
         if view == "ratio":
-            ratios = hist_values / compare_values
+            ratios = hist_values_width_corrected / compare_values
             ratio_uncert = ratio_uncertainty(
-                num=hist_values,
+                num=hist_values_width_corrected,
                 denom=compare_values,
                 uncertainty_type=rp_kwargs["uncertainty_type"],
             )
@@ -672,7 +690,7 @@ def _plot_ratiolike(
 
         elif view == "pull":
             pulls: np.typing.NDArray[Any] = (
-                hist_values - compare_values
+                hist_values_width_corrected - compare_values
             ) / hist_values_uncert
 
             pulls[np.isnan(pulls) | np.isinf(pulls)] = 0

diff --git a/tests/baseline/test_image_plot_ratio_callable_bayesian_blocks_bins.png b/tests/baseline/test_image_plot_ratio_callable_bayesian_blocks_bins.png
diff --git a/tests/test_plot.py b/tests/test_plot.py
@@ -692,31 +692,92 @@ def test_image_plot_ratio_hist():
     return fig
 
 
+def _make_plot_ratio_callable_test_figure(hist_axis, n_numbers):
+    """
+    Helper function to create a plot_ratio figure with a gaussian model.
+
+    The purpose is to create tests for histograms with different hist_axes, e.g.
+    regular and various variable binnings.
+    """
+    np.random.seed(42)
+    hist_1 = Hist(hist_axis).fill(np.random.normal(size=n_numbers))
+
+    def model(x, a=1 / np.sqrt(2 * np.pi), x0=0, sigma=1, offset=0):
+        return a * np.exp(-((x - x0) ** 2) / (2 * sigma**2)) + offset
+
+    fig = plt.figure()
+    assert hist_1.plot_ratio(
+        model, eb_color="black", fp_color="blue", ub_color="lightblue"
+    )
+    return fig
+
+
 @pytest.mark.mpl_image_compare(baseline_dir="baseline", savefig_kwargs={"dpi": 50})
 def test_image_plot_ratio_callable():
     """
     Test plot_pull by comparing against a reference image generated via
     `pytest --mpl-generate-path=tests/baseline`
     """
 
-    np.random.seed(42)
+    hist_axis = axis.Regular(
+        50, -5, 5, name="X", label="x [units]", underflow=False, overflow=False
+    )
+    return _make_plot_ratio_callable_test_figure(hist_axis, 1000)
 
-    hist_1 = Hist(
-        axis.Regular(
-            50, -5, 5, name="X", label="x [units]", underflow=False, overflow=False
-        )
-    ).fill(np.random.normal(size=1000))
 
-    def model(x, a=1 / np.sqrt(2 * np.pi), x0=0, sigma=1, offset=0):
-        return a * np.exp(-((x - x0) ** 2) / (2 * sigma**2)) + offset
+@pytest.mark.mpl_image_compare(
+    baseline_dir="baseline",
+    filename="test_image_plot_ratio_callable.png",
+    savefig_kwargs={"dpi": 50},
+)
+def test_image_plot_ratio_variable_axis_with_regular_bins():
+    """
+    Test plot_pull with an ``axis.Variable`` that uses regular edges.
 
-    fig = plt.figure()
+    The resulting image should be the same as that from
+    ``test_image_plot_ratio_callable`` which uses an ``axis.Regular``.
+    """
 
-    assert hist_1.plot_ratio(
-        model, eb_color="black", fp_color="blue", ub_color="lightblue"
+    hist_axis = axis.Variable(
+        np.linspace(-5, 5, 51),
+        name="X",
+        label="x [units]",
+        underflow=False,
+        overflow=False,
     )
+    return _make_plot_ratio_callable_test_figure(hist_axis, 1000)
 
-    return fig
+
+@pytest.mark.mpl_image_compare(baseline_dir="baseline", savefig_kwargs={"dpi": 50})
+def test_image_plot_ratio_callable_bayesian_blocks_bins():
+    """
+    Test plot_pull with variable bin widths generated using bayesian blocks.
+
+
+    Mostly like ``test_image_plot_ratio_callable``, just using a variable
+    binning instead of a regular one.
+    """
+    # bin edges generated via hepstats.modeling.bayesian_blocks on 5k random-distributed numbers
+    bayesian_bin_edges = np.array(
+        [
+            -3.24126734,
+            -3.01357225,
+            -2.22544451,
+            -1.61651199,
+            -1.13075189,
+            -0.65398704,
+            0.72260413,
+            1.23926518,
+            1.67818774,
+            2.09883056,
+            2.63836271,
+            3.92623771,
+        ]
+    )
+    hist_axis = axis.Variable(
+        bayesian_bin_edges, name="X", label="x [units]", underflow=False, overflow=False
+    )
+    return _make_plot_ratio_callable_test_figure(hist_axis, 5000)
 
 
 @pytest.mark.mpl_image_compare(baseline_dir="baseline", savefig_kwargs={"dpi": 50})