Skip to content

Commit

Permalink
Calculate hist properties with new stats info
Browse files Browse the repository at this point in the history
Comparisons are made to ROOT to ensure that they're done properly
  • Loading branch information
raymondEhlers committed Aug 18, 2019
1 parent aa4cb32 commit c5b23a7
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 0 deletions.
61 changes: 61 additions & 0 deletions pachyderm/histogram.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,67 @@ def x(self) -> np.ndarray:

return self._x

@property
def mean(self) -> float:
""" Mean of values filled into the histogram.
Calculated in the same way as ROOT and physt.
Args:
None.
Returns:
Mean of the histogram.
"""
# Validation
if "total_sum_wx" not in self.metadata:
raise ValueError("Sum of weights * x is not available, so we cannot calculate the mean.")
# Calculate the mean.
total_sum_w = np.sum(self.y)
if total_sum_w > 0:
return cast(float, self.metadata["total_sum_wx"] / total_sum_w)
# Can't divide, so return NaN
return np.nan # type: ignore

@property
def std_dev(self) -> float:
""" Standard deviation of the values filled into the histogram.
Calculated in the same way as ROOT and physt.
Args:
None.
Returns:
Standard deviation of the histogram.
"""
return cast(float, np.sqrt(self.variance))

@property
def variance(self) -> float:
""" Variance of the values filled into the histogram.
Calculated in the same way as ROOT and physt.
Args:
None.
Returns:
Variance of the histogram.
"""
# Validation
if "total_sum_wx" not in self.metadata:
raise ValueError("Sum of weights * x is not available, so we cannot calculate the variance.")
# Validation
if "total_sum_wx2" not in self.metadata:
raise ValueError("Sum of weights * x * x is not available, so we cannot calculate the variance.")
# Calculate the variance.
total_sum_w = np.sum(self.y)
if total_sum_w > 0:
return cast(
float,
(self.metadata["total_sum_wx2"] - (self.metadata["total_sum_wx"] ** 2 / total_sum_w)) / total_sum_w
)
# Can't divide, so return NaN
return np.nan # type: ignore

def find_bin(self, value: float) -> int:
""" Find the bin corresponding to the specified value.
Expand Down
13 changes: 13 additions & 0 deletions tests/test_histogram.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,19 @@ def test_uproot_hist_to_histogram(setup_histogram_conversion: Any) -> None:
# Cleanup
del uproot_file

@pytest.mark.ROOT # type: ignore
def test_derived_properties(logging_mixin: Any, test_root_hists: Any) -> None:
""" Test derived histogram properties (mean, std. dev, variance, etc). """
h_root = test_root_hists.hist1D
h = histogram.Histogram1D.from_existing_hist(h_root)

# Mean
assert np.isclose(h.mean, h_root.GetMean())
# Standard deviation
assert np.isclose(h.std_dev, h_root.GetStdDev())
# Variance
assert np.isclose(h.variance, h_root.GetStdDev() ** 2)

@pytest.mark.parametrize("bin_edges, y, errors_squared", [ # type: ignore
(np.array([1, 2, 3]), np.array([1, 2, 3]), np.array([1, 2, 3])),
(np.array([1, 2, 3]), np.array([1, 2, 3]), np.array([1, 2])),
Expand Down

0 comments on commit c5b23a7

Please sign in to comment.