Skip to content

Commit

Permalink
feat: yield uncertainty caching (#266)
Browse files Browse the repository at this point in the history
* added caching to yield uncertainty calculation in model_utils.yield_stdev
  • Loading branch information
alexander-held committed Aug 24, 2021
1 parent 2261b60 commit 726f92c
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 2 deletions.
32 changes: 30 additions & 2 deletions src/cabinetry/model_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@
log = logging.getLogger(__name__)


# cache holding results from yield uncertainty calculations
_YIELD_STDEV_CACHE: Dict[Any, Tuple[List[List[float]], List[float]]] = {}


def model_and_data(
spec: Dict[str, Any], asimov: bool = False, with_aux: bool = True
) -> Tuple[pyhf.pdf.Model, List[float]]:
Expand Down Expand Up @@ -179,7 +183,8 @@ def yield_stdev(
Returns both the uncertainties per bin (in a list of channels), and the uncertainty
of the total yield per channel (again, for a list of channels). To calculate the
uncertainties for the total yield, the function internally treats the sum of yields
per channel like another channel with one bin.
per channel like another channel with one bin. The results of this function are
cached to speed up subsequent calls with the same arguments.
Args:
model (pyhf.pdf.Model): the model for which to calculate the standard deviations
Expand All @@ -193,6 +198,14 @@ def yield_stdev(
- list of channels, each channel is a list of standard deviations per bin
- list of standard deviations per channel
"""
# check whether results are already stored in cache
cached_results = _YIELD_STDEV_CACHE.get(
(model, tuple(parameters), tuple(uncertainty), corr_mat.data.tobytes()), None
)
if cached_results is not None:
# return results from cache
return cached_results

# indices where to split to separate all bins into regions
region_split_indices = _channel_boundary_indices(model)

Expand Down Expand Up @@ -277,7 +290,22 @@ def yield_stdev(
total_stdev_per_channel = ak.flatten(np.sqrt(total_variance[n_channels:]))
log.debug(f"total stdev is {total_stdev_per_bin}")
log.debug(f"total stdev per channel is {total_stdev_per_channel}")
return ak.to_list(total_stdev_per_bin), ak.to_list(total_stdev_per_channel)

# convert to lists
total_stdev_per_bin = ak.to_list(total_stdev_per_bin)
total_stdev_per_channel = ak.to_list(total_stdev_per_channel)

# save to cache
_YIELD_STDEV_CACHE.update(
{
(model, tuple(parameters), tuple(uncertainty), corr_mat.data.tobytes()): (
total_stdev_per_bin,
total_stdev_per_channel,
)
}
)

return total_stdev_per_bin, total_stdev_per_channel


def unconstrained_parameter_count(model: pyhf.pdf.Model) -> int:
Expand Down
15 changes: 15 additions & 0 deletions tests/test_model_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,21 @@ def test_yield_stdev(example_spec, example_spec_multibin):
assert np.allclose(total_stdev_bin[i_reg], expected_stdev_bin[i_reg])
assert np.allclose(total_stdev_chan[i_reg], expected_stdev_chan[i_reg])

# test caching by calling again with same arguments
total_stdev_bin, total_stdev_chan = model_utils.yield_stdev(
model, parameters, uncertainty, corr_mat
)
for i_reg in range(2):
assert np.allclose(total_stdev_bin[i_reg], expected_stdev_bin[i_reg])
assert np.allclose(total_stdev_chan[i_reg], expected_stdev_chan[i_reg])
# also look up cache directly
from_cache = model_utils._YIELD_STDEV_CACHE[
model, tuple(parameters), tuple(uncertainty), corr_mat.tobytes()
]
for i_reg in range(2):
assert np.allclose(from_cache[0][i_reg], expected_stdev_bin[i_reg])
assert np.allclose(from_cache[1][i_reg], expected_stdev_chan[i_reg])


def test_unconstrained_parameter_count(example_spec, example_spec_shapefactor):
model = pyhf.Workspace(example_spec).model()
Expand Down

0 comments on commit 726f92c

Please sign in to comment.