diff --git a/README.md b/README.md index 0faf8f5f..7f60f12c 100644 --- a/README.md +++ b/README.md @@ -59,7 +59,8 @@ model, data = cabinetry.model_utils.model_and_data(ws) fit_results = cabinetry.fit.fit(model, data) # visualize the post-fit model prediction and data -cabinetry.visualize.data_mc(model, data, config=config, fit_results=fit_results) +model_postfit = cabinetry.model_utils.prediction(model, fit_results=fit_results) +cabinetry.visualize.data_mc(model_postfit, data, config=config) ``` The above is an abbreviated version of an example included in `example.py`, which shows how to use `cabinetry`. diff --git a/docs/api.rst b/docs/api.rst index 9f97b1bd..1cc8e437 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -54,6 +54,12 @@ cabinetry.fit .. automodule:: cabinetry.fit :members: +cabinetry.fit.results_containers +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. automodule:: cabinetry.fit.results_containers + :members: + cabinetry.visualize ------------------- diff --git a/example.py b/example.py index bb16245c..0bb3aa74 100644 --- a/example.py +++ b/example.py @@ -39,6 +39,13 @@ cabinetry.visualize.pulls(fit_results) cabinetry.visualize.correlation_matrix(fit_results) + # obtain pre- and post-fit model predictions + model_prefit = cabinetry.model_utils.prediction(model) + model_postfit = cabinetry.model_utils.prediction(model, fit_results=fit_results) + + # show post-fit yield table + cabinetry.tabulate.yields(model_postfit, data) + # visualize pre- and post-fit distributions - cabinetry.visualize.data_mc(model, data, config=config) - cabinetry.visualize.data_mc(model, data, config=config, fit_results=fit_results) + cabinetry.visualize.data_mc(model_prefit, data, config=config) + cabinetry.visualize.data_mc(model_postfit, data, config=config) diff --git a/src/cabinetry/cli/__init__.py b/src/cabinetry/cli/__init__.py index aae86569..02c74387 100644 --- a/src/cabinetry/cli/__init__.py +++ b/src/cabinetry/cli/__init__.py @@ -309,12 +309,13 @@ def data_mc( # optionally perform maximum likelihood fit to obtain post-fit model fit_results = cabinetry_fit.fit(model, data) if postfit else None + model_prediction = cabinetry_model_utils.prediction(model, fit_results=fit_results) cabinetry_visualize.data_mc( - model, + model_prediction, data, config=cabinetry_config, figure_folder=figfolder, - fit_results=fit_results, + close_figure=True, ) diff --git a/src/cabinetry/fit.py b/src/cabinetry/fit/__init__.py similarity index 90% rename from src/cabinetry/fit.py rename to src/cabinetry/fit/__init__.py index 478cc91a..d3d678a5 100644 --- a/src/cabinetry/fit.py +++ b/src/cabinetry/fit/__init__.py @@ -1,5 +1,5 @@ import logging -from typing import Any, Dict, List, NamedTuple, Optional, Tuple, Union +from typing import Any, Dict, List, Optional, Tuple, Union import iminuit import numpy as np @@ -8,108 +8,18 @@ import scipy.stats from cabinetry import model_utils +from cabinetry.fit.results_containers import ( + FitResults, + LimitResults, + RankingResults, + ScanResults, + SignificanceResults, +) log = logging.getLogger(__name__) -class FitResults(NamedTuple): - """Collects fit results in one object. - - Args: - bestfit (numpy.ndarray): best-fit results of parameters - uncertainty (numpy.ndarray): uncertainties of best-fit parameter results - labels (List[str]): parameter labels - corr_mat (np.ndarray): parameter correlation matrix - best_twice_nll (float): -2 log(likelihood) at best-fit point - goodess_of_fit (float, optional): goodness-of-fit p-value, defaults to -1 - """ - - bestfit: np.ndarray - uncertainty: np.ndarray - labels: List[str] - corr_mat: np.ndarray - best_twice_nll: float - goodness_of_fit: float = -1 - - -class RankingResults(NamedTuple): - """Collects nuisance parameter ranking results in one object. - - The best-fit results per parameter, the uncertainties, and the labels should not - include the parameter of interest, since no impact for it is calculated. - - Args: - bestfit (numpy.ndarray): best-fit results of parameters - uncertainty (numpy.ndarray): uncertainties of best-fit parameter results - labels (List[str]): parameter labels - prefit_up (numpy.ndarray): pre-fit impact in "up" direction - prefit_down (numpy.ndarray): pre-fit impact in "down" direction - postfit_up (numpy.ndarray): post-fit impact in "up" direction - postfit_down (numpy.ndarray): post-fit impact in "down" direction - """ - - bestfit: np.ndarray - uncertainty: np.ndarray - labels: List[str] - prefit_up: np.ndarray - prefit_down: np.ndarray - postfit_up: np.ndarray - postfit_down: np.ndarray - - -class ScanResults(NamedTuple): - """Collects likelihood scan results in one object. - - Args: - name (str): name of parameter in scan - bestfit (float): best-fit parameter value from unconstrained fit - uncertainty (float): uncertainty of parameter in unconstrained fit - parameter_values (np.ndarray): parameter values used in scan - delta_nlls (np.ndarray): -2 log(L) difference at each scanned point - """ - - name: str - bestfit: float - uncertainty: float - parameter_values: np.ndarray - delta_nlls: np.ndarray - - -class LimitResults(NamedTuple): - """Collects parameter upper limit results in one object. - - Args: - observed_limit (np.ndarray): observed limit - expected_limit (np.ndarray): expected limit, including 1 and 2 sigma bands - observed_CLs (np.ndarray): observed CLs values - expected_CLs (np.ndarray): expected CLs values, including 1 and 2 sigma bands - poi_values (np.ndarray): POI values used in scan - """ - - observed_limit: float - expected_limit: np.ndarray - observed_CLs: np.ndarray - expected_CLs: np.ndarray - poi_values: np.ndarray - - -class SignificanceResults(NamedTuple): - """Collects results from a discovery significance calculation in one object. - - Args: - observed_p_value (float): observed p-value - observed_significance (float): observed significance - expected_p_value (float): expected/median p-value - expected_significance (float): expected/median significance - """ - - observed_p_value: float - observed_significance: float - expected_p_value: float - expected_significance: float - - def print_results( fit_results: FitResults, ) -> None: @@ -840,6 +750,9 @@ def significance(model: pyhf.pdf.Model, data: List[float]) -> SignificanceResult Args: model (pyhf.pdf.Model): model to use in fits data (List[float]): data (including auxdata) the model is fit to + + Returns: + SignificanceResults: observed and expected p-values and significances """ pyhf.set_backend(pyhf.tensorlib, pyhf.optimize.minuit_optimizer(verbose=1)) diff --git a/src/cabinetry/fit/results_containers.py b/src/cabinetry/fit/results_containers.py new file mode 100644 index 00000000..661187fc --- /dev/null +++ b/src/cabinetry/fit/results_containers.py @@ -0,0 +1,100 @@ +from typing import List, NamedTuple + +import numpy as np + + +class FitResults(NamedTuple): + """Collects fit results in one object. + + Args: + bestfit (numpy.ndarray): best-fit results of parameters + uncertainty (numpy.ndarray): uncertainties of best-fit parameter results + labels (List[str]): parameter labels + corr_mat (np.ndarray): parameter correlation matrix + best_twice_nll (float): -2 log(likelihood) at best-fit point + goodess_of_fit (float, optional): goodness-of-fit p-value, defaults to -1 + """ + + bestfit: np.ndarray + uncertainty: np.ndarray + labels: List[str] + corr_mat: np.ndarray + best_twice_nll: float + goodness_of_fit: float = -1 + + +class RankingResults(NamedTuple): + """Collects nuisance parameter ranking results in one object. + + The best-fit results per parameter, the uncertainties, and the labels should not + include the parameter of interest, since no impact for it is calculated. + + Args: + bestfit (numpy.ndarray): best-fit results of parameters + uncertainty (numpy.ndarray): uncertainties of best-fit parameter results + labels (List[str]): parameter labels + prefit_up (numpy.ndarray): pre-fit impact in "up" direction + prefit_down (numpy.ndarray): pre-fit impact in "down" direction + postfit_up (numpy.ndarray): post-fit impact in "up" direction + postfit_down (numpy.ndarray): post-fit impact in "down" direction + """ + + bestfit: np.ndarray + uncertainty: np.ndarray + labels: List[str] + prefit_up: np.ndarray + prefit_down: np.ndarray + postfit_up: np.ndarray + postfit_down: np.ndarray + + +class ScanResults(NamedTuple): + """Collects likelihood scan results in one object. + + Args: + name (str): name of parameter in scan + bestfit (float): best-fit parameter value from unconstrained fit + uncertainty (float): uncertainty of parameter in unconstrained fit + parameter_values (np.ndarray): parameter values used in scan + delta_nlls (np.ndarray): -2 log(L) difference at each scanned point + """ + + name: str + bestfit: float + uncertainty: float + parameter_values: np.ndarray + delta_nlls: np.ndarray + + +class LimitResults(NamedTuple): + """Collects parameter upper limit results in one object. + + Args: + observed_limit (np.ndarray): observed limit + expected_limit (np.ndarray): expected limit, including 1 and 2 sigma bands + observed_CLs (np.ndarray): observed CLs values + expected_CLs (np.ndarray): expected CLs values, including 1 and 2 sigma bands + poi_values (np.ndarray): POI values used in scan + """ + + observed_limit: float + expected_limit: np.ndarray + observed_CLs: np.ndarray + expected_CLs: np.ndarray + poi_values: np.ndarray + + +class SignificanceResults(NamedTuple): + """Collects results from a discovery significance calculation in one object. + + Args: + observed_p_value (float): observed p-value + observed_significance (float): observed significance + expected_p_value (float): expected/median p-value + expected_significance (float): expected/median significance + """ + + observed_p_value: float + observed_significance: float + expected_p_value: float + expected_significance: float diff --git a/src/cabinetry/model_utils.py b/src/cabinetry/model_utils.py index 34a3d3c2..32cbf9ef 100644 --- a/src/cabinetry/model_utils.py +++ b/src/cabinetry/model_utils.py @@ -1,10 +1,13 @@ import logging -from typing import Any, Dict, List, Tuple, Union +from typing import Any, Dict, List, NamedTuple, Optional, Tuple, Union import awkward as ak import numpy as np import pyhf +from cabinetry import configuration +from cabinetry.fit.results_containers import FitResults + log = logging.getLogger(__name__) @@ -13,6 +16,27 @@ _YIELD_STDEV_CACHE: Dict[Any, Tuple[List[List[float]], List[float]]] = {} +class ModelPrediction(NamedTuple): + """Model prediction with yields and total uncertainties per bin and channel. + + Args: + model (pyhf.pdf.Model): model to which prediction corresponds to + model_yields (List[List[List[float]]]): yields per sample, channel and bin, + indices: channel, sample, bin + total_stdev_model_bins (List[List[float]]): total yield uncertainty per channel + and per bin, indices: channel, bin + total_stdev_model_channels (List[float]): total yield uncertainty per channel, + index: channel + label (str): label for the prediction, e.g. "pre-fit" or "post-fit" + """ + + model: pyhf.pdf.Model + model_yields: List[List[List[float]]] + total_stdev_model_bins: List[List[float]] + total_stdev_model_channels: List[float] + label: str + + def model_and_data( spec: Dict[str, Any], asimov: bool = False, with_aux: bool = True ) -> Tuple[pyhf.pdf.Model, List[float]]: @@ -308,6 +332,68 @@ def yield_stdev( return total_stdev_per_bin, total_stdev_per_channel +def prediction( + model: pyhf.pdf.Model, + fit_results: Optional[FitResults] = None, + label: Optional[str] = None, +) -> ModelPrediction: + """Returns model prediction, including model yields and uncertainties. + + If the optional fit result is not provided, the pre-fit Asimov yields and + uncertainties are calculated. If the fit result is provided, the best-fit parameter + values, uncertainties, and the parameter correlations are used to obtain the post- + fit model and its associated uncertainties. + + Args: + model (pyhf.pdf.Model): model to evaluate yield prediction for + fit_results (Optional[FitResults], optional): parameter configuration to use, + includes best-fit settings and uncertainties, as well as correlation matrix, + defaults to None (then the pre-fit configuration is used) + label (Optional[str], optional): label to include in model prediction, defaults + to None (then will use "pre-fit" if fit results are not included, and "post- + fit" otherwise) + + Returns: + ModelPrediction: model, yields and uncertainties per bin and channel + """ + if fit_results is not None: + # fit results specified, so they are used + param_values = fit_results.bestfit + param_uncertainty = fit_results.uncertainty + corr_mat = fit_results.corr_mat + label = "post-fit" if label is None else label + + else: + # no fit results specified, generate pre-fit parameter values, uncertainties, + # and diagonal correlation matrix + param_values = asimov_parameters(model) + param_uncertainty = prefit_uncertainties(model) + corr_mat = np.zeros(shape=(len(param_values), len(param_values))) + np.fill_diagonal(corr_mat, 1.0) + label = "pre-fit" if label is None else label + + yields_combined = pyhf.tensorlib.to_numpy( + model.main_model.expected_data(param_values, return_by_sample=True) + ) # all channels concatenated + + # slice the yields into list of lists (of lists) where first index is channel, + # second index is sample (and third index is bin) + region_split_indices = _channel_boundary_indices(model) + model_yields = [ + m.tolist() for m in np.split(yields_combined, region_split_indices, axis=1) + ] + + # calculate the total standard deviation of the model prediction + # indices: channel (and bin) for per-bin uncertainties, channel for per-channel + total_stdev_model_bins, total_stdev_model_channels = yield_stdev( + model, param_values, param_uncertainty, corr_mat + ) + + return ModelPrediction( + model, model_yields, total_stdev_model_bins, total_stdev_model_channels, label + ) + + def unconstrained_parameter_count(model: pyhf.pdf.Model) -> int: """Returns the number of unconstrained parameters in a model. @@ -350,3 +436,72 @@ def _parameter_index(par_name: str, labels: Union[List[str], Tuple[str, ...]]) - if par_index == -1: log.error(f"parameter {par_name} not found in model") return par_index + + +def _strip_auxdata(model: pyhf.pdf.Model, data: List[float]) -> List[float]: + """Always returns observed yields, no matter whether data includes auxdata. + + Args: + model (pyhf.pdf.Model): model to which data corresponds to + data (List[float]): data, either including auxdata which is then stripped off or + only observed yields + + Returns: + List[float]: observed data yields + """ + + n_bins_total = sum(model.config.channel_nbins.values()) + if len(data) != n_bins_total: + # strip auxdata, only observed yields are needed + data = data[:n_bins_total] + return data + + +def _data_per_channel(model: pyhf.pdf.Model, data: List[float]) -> List[List[float]]: + """Returns data split per channel, and strips off auxiliary data if included. + + Args: + model (pyhf.pdf.Model): model to which data corresponds to + data (List[float]): data (not split by channel), can either include auxdata + which is then stripped off, or only observed yields + + Returns: + List[List[float]]: data per channel and per bin + """ + # strip off auxiliary data + data_combined = _strip_auxdata(model, data) + channel_split_indices = _channel_boundary_indices(model) + # data is indexed by channel (and bin) + data_yields = [d.tolist() for d in np.split(data_combined, channel_split_indices)] + return data_yields + + +def _filter_channels( + model: pyhf.pdf.Model, channels: Optional[Union[str, List[str]]] +) -> List[str]: + """Returns a list of channels in a model after applying filtering. + + Args: + model (pyhf.pdf.Model): model from which to extract channels + channels (Optional[Union[str, List[str]]]): name of channel or list of channels + to filter, only including those channels provided via this argument in the + return of the function + + Returns: + List[str]: list of channels after filtering + """ + # channels included in model + filtered_channels = model.config.channels + # if one or more custom channels are provided, only include those + if channels is not None: + channels = configuration._setting_to_list(channels) + # only include if channel exists in model + filtered_channels = [ch for ch in channels if ch in model.config.channels] + + if filtered_channels == []: + log.warning( + f"channel(s) {channels} not found in model, available channel(s): " + f"{model.config.channels}" + ) + + return filtered_channels diff --git a/src/cabinetry/tabulate.py b/src/cabinetry/tabulate.py index bcc2e3ae..c1f26492 100644 --- a/src/cabinetry/tabulate.py +++ b/src/cabinetry/tabulate.py @@ -1,10 +1,13 @@ import logging -from typing import Any, Dict, List +from typing import Any, Dict, List, Optional, Union +import awkward as ak import numpy as np import pyhf import tabulate +from cabinetry import model_utils + log = logging.getLogger(__name__) @@ -13,7 +16,7 @@ def _header_name(channel_name: str, i_bin: int, unique: bool = True) -> str: """Constructs the header name for a column in a yield table. There are two modes: by default the names are unique (to be used as keys). With - ``unique=False``, the region names are skipped for bins beyond the first bin (for + ``unique=False``, the channel names are skipped for bins beyond the first bin (for less redundant output). Args: @@ -36,6 +39,8 @@ def _yields_per_bin( model_yields: List[List[List[float]]], total_stdev_model: List[List[float]], data: List[List[float]], + channels: List[str], + label: str, ) -> List[Dict[str, Any]]: """Outputs and returns a yield table with predicted and observed yields per bin. @@ -45,6 +50,8 @@ def _yields_per_bin( total_stdev_model (List[List[float]]): total model standard deviation per channel and bin data (List[List[float]]): data yield per channel and bin + channels (List[str]): names of channels to use + label (str): label for model prediction to include in log Returns: List[Dict[str, Any]]: yield table for use with the ``tabulate`` package @@ -52,10 +59,13 @@ def _yields_per_bin( table = [] # table containing all yields headers = {} # headers with nicer formatting for output + # indices of included channels + channel_indices = [model.config.channels.index(ch) for ch in channels] + # rows for each individual sample for i_sam, sample_name in enumerate(model.config.samples): sample_dict = {"sample": sample_name} # one dict per sample - for i_chan, channel_name in enumerate(model.config.channels): + for i_chan, channel_name in zip(channel_indices, channels): for i_bin in range(model.config.channel_nbins[channel_name]): sample_dict.update( { @@ -69,7 +79,7 @@ def _yields_per_bin( # dicts for total model prediction and data total_dict = {"sample": "total"} data_dict = {"sample": "data"} - for i_chan, channel_name in enumerate(model.config.channels): + for i_chan, channel_name in zip(channel_indices, channels): total_model = np.sum(model_yields[i_chan], axis=0) # sum over samples for i_bin in range(model.config.channel_nbins[channel_name]): header_name = _header_name(channel_name, i_bin) @@ -86,7 +96,7 @@ def _yields_per_bin( table += [total_dict, data_dict] log.info( - "yields per bin:\n" + f"yields per bin for {label} model prediction:\n" + tabulate.tabulate( table, headers=headers, @@ -101,6 +111,8 @@ def _yields_per_channel( model_yields: List[List[float]], total_stdev_model: List[float], data: List[float], + channels: List[str], + label: str, ) -> List[Dict[str, Any]]: """Outputs and returns a yield table with predicted and observed yields per channel. @@ -109,23 +121,28 @@ def _yields_per_channel( model_yields (List[List[float]]): yields per channel and sample total_stdev_model (List[float]): total model standard deviation per channel data (List[float]): data yield per channel + channels (List[str]): names of channels to use + label (str): label for model prediction to include in log Returns: List[Dict[str, Any]]: yield table for use with the ``tabulate`` package """ table = [] # table containing all yields + # indices of included channels + channel_indices = [model.config.channels.index(ch) for ch in channels] + # rows for each individual sample for i_sam, sample_name in enumerate(model.config.samples): sample_dict = {"sample": sample_name} # one dict per sample - for i_chan, channel_name in enumerate(model.config.channels): + for i_chan, channel_name in zip(channel_indices, channels): sample_dict.update({channel_name: f"{model_yields[i_chan][i_sam]:.2f}"}) table.append(sample_dict) # dicts for total model prediction and data total_dict = {"sample": "total"} data_dict = {"sample": "data"} - for i_chan, channel_name in enumerate(model.config.channels): + for i_chan, channel_name in zip(channel_indices, channels): total_model = np.sum(model_yields[i_chan], axis=0) # sum over samples total_dict.update( { @@ -137,7 +154,7 @@ def _yields_per_channel( table += [total_dict, data_dict] log.info( - "yields per channel:\n" + f"yields per channel for {label} model prediction:\n" + tabulate.tabulate( table, headers="keys", @@ -145,3 +162,70 @@ def _yields_per_channel( ) ) return table + + +def yields( + model_prediction: model_utils.ModelPrediction, + data: List[float], + channels: Optional[Union[str, List[str]]] = None, + per_bin: bool = True, + per_channel: bool = False, +) -> None: + """Generates yield tables, showing model prediction and data. + + Channels can be filtered via the optional ``channels`` argument. Either yields per + bin, or yields per channel, or both can be shown. + + Args: + model_prediction (model_utils.ModelPrediction): model prediction to show in + table + data (List[float]): data to include in table, can either include auxdata (the + auxdata is then stripped internally) or only observed yields + channels (Optional[Union[str, List[str]]], optional): name of channel to show, + or list of names to include, defaults to None (uses all channels) + per_bin (bool, optional): whether to show a table with yields per bin, defaults + to True + per_channel (bool, optional): whether to show a table with yields per channel, + defaults to False + """ + if not (per_bin or per_channel): + log.warning( + "requested neither yields per bin nor per channel, no table produced" + ) + return + + # strip off auxdata (if needed) and obtain data indexed by channel (and bin) + data_yields = model_utils._data_per_channel(model_prediction.model, data) + + # channels to include in table, with optional filtering applied + filtered_channels = model_utils._filter_channels(model_prediction.model, channels) + + if filtered_channels == []: + # nothing to include in tables, warning already raised via _filter_channels + return None + + if per_bin: + # yield table with yields per bin + _yields_per_bin( + model_prediction.model, + model_prediction.model_yields, + model_prediction.total_stdev_model_bins, + data_yields, + filtered_channels, + model_prediction.label, + ) + + if per_channel: + # yields per channel + model_yields_per_channel = np.sum( + ak.from_iter(model_prediction.model_yields), axis=-1 + ).tolist() + data_per_channel = [sum(d) for d in data_yields] + _yields_per_channel( + model_prediction.model, + model_yields_per_channel, + model_prediction.total_stdev_model_channels, + data_per_channel, + filtered_channels, + model_prediction.label, + ) diff --git a/src/cabinetry/visualize/__init__.py b/src/cabinetry/visualize/__init__.py index 2c034129..2dec56bd 100644 --- a/src/cabinetry/visualize/__init__.py +++ b/src/cabinetry/visualize/__init__.py @@ -5,16 +5,13 @@ import pathlib from typing import Any, Dict, List, Optional, Tuple, Union -import awkward as ak import matplotlib as mpl import numpy as np -import pyhf from cabinetry import configuration from cabinetry import fit from cabinetry import histo from cabinetry import model_utils -from cabinetry import tabulate from cabinetry import template_builder from cabinetry.visualize import plot_model from cabinetry.visualize import plot_result @@ -23,22 +20,19 @@ log = logging.getLogger(__name__) -def _figure_name(region_name: str, is_prefit: bool) -> str: +def _figure_name(region_name: str, label: str) -> str: """Constructs a file name for a figure. Args: region_name (str): name of the region shown in the figure - is_prefit (bool): whether the figure shows the pre- or post-fit model + label (str): additional label, e.g. from model prediction Returns: str: name of the file the figure should be saved to """ - figure_name = region_name.replace(" ", "-") - if is_prefit: - figure_name += "_prefit" - else: - figure_name += "_postfit" - figure_name += ".pdf" + if label in ["pre-fit", "post-fit"]: + label = label.replace("-", "") # replace pre-fit by prefit, post-fit by postfit + figure_name = f"{region_name.replace(' ', '-')}_{label}.pdf" return figure_name @@ -108,7 +102,7 @@ def data_mc_from_histograms( if not is_data: model_stdevs.append(histogram.stdev) - figure_name = _figure_name(region["Name"], True) + figure_name = _figure_name(region["Name"], "pre-fit") total_model_unc = _total_yield_uncertainty(model_stdevs) bin_edges = histogram.bins label = f"{region['Name']}\npre-fit" @@ -130,17 +124,16 @@ def data_mc_from_histograms( def data_mc( - model: pyhf.pdf.Model, + model_prediction: model_utils.ModelPrediction, data: List[float], config: Optional[Dict[str, Any]] = None, figure_folder: Union[str, pathlib.Path] = "figures", - fit_results: Optional[fit.FitResults] = None, log_scale: Optional[bool] = None, log_scale_x: bool = False, - include_table: bool = True, + channels: Optional[Union[str, List[str]]] = None, close_figure: bool = False, save_figure: bool = True, -) -> List[Dict[str, Any]]: +) -> Optional[List[Dict[str, Any]]]: """Draws pre- and post-fit data/MC histograms for a ``pyhf`` model and data. The ``config`` argument is optional, but required to determine correct axis labels @@ -149,7 +142,7 @@ def data_mc( models that were not created with ``cabinetry``, and for which no config exists. Args: - model (pyhf.pdf.Model): model to visualize + model_prediction (model_utils.ModelPrediction): model prediction to show data (List[float]): data to include in visualization, can either include auxdata (the auxdata is then stripped internally) or only observed yields config (Optional[Dict[str, Any]], optional): cabinetry configuration needed for @@ -157,89 +150,40 @@ def data_mc( then) figure_folder (Union[str, pathlib.Path], optional): path to the folder to save figures in, defaults to "figures" - fit_results (Optional[fit.FitResults]): parameter configuration to use for plot, - includes best-fit settings and uncertainties, as well as correlation matrix, - defaults to None (then the pre-fit configuration is drawn) log_scale (Optional[bool], optional): whether to use logarithmic vertical axis, defaults to None (automatically determine whether to use linear/log scale) log_scale_x (bool, optional): whether to use logarithmic horizontal axis, defaults to False - include_table (bool, optional): whether to also output a yield table, defaults - to True + channels (Optional[Union[str, List[str]]], optional): name of channel to show, + or list of names to include, defaults to None (uses all channels) close_figure (bool, optional): whether to close each figure immediately after saving it, defaults to False (enable when producing many figures to avoid memory issues, prevents rendering in notebooks) save_figure (bool, optional): whether to save figures, defaults to True Returns: - List[Dict[str, Any]]: list of dictionaries, where each dictionary contains a - figure and the associated region name + Optional[List[Dict[str, Any]]]: list of dictionaries, where each dictionary + contains a figure and the associated region name, or None if no figure was + produced """ - n_bins_total = sum(model.config.channel_nbins.values()) - if len(data) != n_bins_total: - # strip auxdata, only observed yields are needed - data_combined = data[:n_bins_total] - else: - data_combined = data + # strip off auxdata (if needed) and obtain data indexed by channel (and bin) + data_yields = model_utils._data_per_channel(model_prediction.model, data) - if fit_results is not None: - # fit results specified, draw a post-fit plot with them applied - prefit = False - param_values = fit_results.bestfit - param_uncertainty = fit_results.uncertainty - corr_mat = fit_results.corr_mat + # channels to include in table, with optional filtering applied + filtered_channels = model_utils._filter_channels(model_prediction.model, channels) - else: - # no fit results specified, draw a pre-fit plot - prefit = True - # use pre-fit parameter values, uncertainties, and diagonal correlation matrix - param_values = model_utils.asimov_parameters(model) - param_uncertainty = model_utils.prefit_uncertainties(model) - corr_mat = np.zeros(shape=(len(param_values), len(param_values))) - np.fill_diagonal(corr_mat, 1.0) - - yields_combined = pyhf.tensorlib.to_numpy( - model.main_model.expected_data(param_values, return_by_sample=True) - ) # all channels concatenated - - # slice the yields into list of lists (of lists) where first index is channel, - # second index is sample (and third index is bin) - region_split_indices = model_utils._channel_boundary_indices(model) - model_yields = [ - m.tolist() for m in np.split(yields_combined, region_split_indices, axis=1) - ] - # data is only indexed by channel (and bin) - data_yields = [d.tolist() for d in np.split(data_combined, region_split_indices)] + if filtered_channels == []: + # nothing to include in tables, warning already raised via _filter_channels + return None - # calculate the total standard deviation of the model prediction - # indices: channel (and bin) for per-bin uncertainties, channel for per-channel - total_stdev_model_bins, total_stdev_model_channels = model_utils.yield_stdev( - model, param_values, param_uncertainty, corr_mat - ) - - if include_table: - # show yield table - if prefit: - log.info("generating pre-fit yield table") - else: - log.info("generating post-fit yield table") - tabulate._yields_per_bin( - model, model_yields, total_stdev_model_bins, data_yields - ) - - # yields per channel - model_yields_per_channel = np.sum(ak.from_iter(model_yields), axis=-1).tolist() - data_per_channel = [sum(d) for d in data_yields] - tabulate._yields_per_channel( - model, - model_yields_per_channel, - total_stdev_model_channels, - data_per_channel, - ) + # indices of included channels + channel_indices = [ + model_prediction.model.config.channels.index(ch) for ch in filtered_channels + ] # process channel by channel figure_dict_list = [] - for i_chan, channel_name in enumerate(model.config.channels): + for i_chan, channel_name in zip(channel_indices, filtered_channels): histogram_dict_list = [] # one dict per region/channel if config is not None: @@ -252,12 +196,12 @@ def data_mc( bin_edges = np.arange(len(data_yields[i_chan]) + 1) variable = "bin" - for i_sam, sample_name in enumerate(model.config.samples): + for i_sam, sample_name in enumerate(model_prediction.model.config.samples): histogram_dict_list.append( { "label": sample_name, "isData": False, - "yields": model_yields[i_chan][i_sam], + "yields": model_prediction.model_yields[i_chan][i_sam], "variable": variable, } ) @@ -272,25 +216,18 @@ def data_mc( } ) - if prefit: - # path is None if figure should not be saved - figure_path = ( - pathlib.Path(figure_folder) / _figure_name(channel_name, True) - if save_figure - else None - ) - label = f"{channel_name}\npre-fit" - else: - figure_path = ( - pathlib.Path(figure_folder) / _figure_name(channel_name, False) - if save_figure - else None - ) - label = f"{channel_name}\npost-fit" + # path is None if figure should not be saved + figure_path = ( + pathlib.Path(figure_folder) + / _figure_name(channel_name, model_prediction.label) + if save_figure + else None + ) + label = f"{channel_name}\n{model_prediction.label}" fig = plot_model.data_mc( histogram_dict_list, - np.asarray(total_stdev_model_bins[i_chan]), + np.asarray(model_prediction.total_stdev_model_bins[i_chan]), bin_edges, figure_path, log_scale=log_scale, diff --git a/tests/cli/test_cli.py b/tests/cli/test_cli.py index 9374b07c..e571fecd 100644 --- a/tests/cli/test_cli.py +++ b/tests/cli/test_cli.py @@ -50,7 +50,7 @@ def test_templates(mock_validate, mock_create_histograms, cli_helpers, tmp_path) # different method result = runner.invoke(cli.templates, ["--method", "unknown", config_path]) assert result.exit_code == 0 - assert mock_create_histograms.call_args_list[-1] == ( + assert mock_create_histograms.call_args == ( (config,), {"method": "unknown"}, ) @@ -134,8 +134,8 @@ def test_fit(mock_util, mock_fit, mock_pulls, mock_corrmat, tmp_path): # Asimov result = runner.invoke(cli.fit, ["--asimov", workspace_path]) assert result.exit_code == 0 - assert mock_util.call_args_list[-1] == ((workspace,), {"asimov": True}) - assert mock_fit.call_args_list[-1] == ( + assert mock_util.call_args == ((workspace,), {"asimov": True}) + assert mock_fit.call_args == ( ("model", "data"), {"minos": None, "goodness_of_fit": False}, ) @@ -143,8 +143,8 @@ def test_fit(mock_util, mock_fit, mock_pulls, mock_corrmat, tmp_path): # MINOS for one parameter result = runner.invoke(cli.fit, ["--minos", "par", workspace_path]) assert result.exit_code == 0 - assert mock_util.call_args_list[-1] == ((workspace,), {"asimov": False}) - assert mock_fit.call_args_list[-1] == ( + assert mock_util.call_args == ((workspace,), {"asimov": False}) + assert mock_fit.call_args == ( ("model", "data"), {"minos": ["par"], "goodness_of_fit": False}, ) @@ -154,8 +154,8 @@ def test_fit(mock_util, mock_fit, mock_pulls, mock_corrmat, tmp_path): cli.fit, ["--minos", "par_a", "--minos", "par_b", workspace_path] ) assert result.exit_code == 0 - assert mock_util.call_args_list[-1] == ((workspace,), {"asimov": False}) - assert mock_fit.call_args_list[-1] == ( + assert mock_util.call_args == ((workspace,), {"asimov": False}) + assert mock_fit.call_args == ( ("model", "data"), {"minos": ["par_a", "par_b"], "goodness_of_fit": False}, ) @@ -163,8 +163,8 @@ def test_fit(mock_util, mock_fit, mock_pulls, mock_corrmat, tmp_path): # goodness-of-fit result = runner.invoke(cli.fit, ["--goodness_of_fit", workspace_path]) assert result.exit_code == 0 - assert mock_util.call_args_list[-1] == ((workspace,), {"asimov": False}) - assert mock_fit.call_args_list[-1] == ( + assert mock_util.call_args == ((workspace,), {"asimov": False}) + assert mock_fit.call_args == ( ("model", "data"), {"minos": None, "goodness_of_fit": True}, ) @@ -186,11 +186,11 @@ def test_fit(mock_util, mock_fit, mock_pulls, mock_corrmat, tmp_path): cli.fit, ["--figfolder", "folder", "--pulls", "--corrmat", workspace_path] ) assert result.exit_code == 0 - assert mock_corrmat.call_args_list[-1] == ( + assert mock_corrmat.call_args == ( (fit_results,), {"figure_folder": "folder"}, ) - assert mock_pulls.call_args_list[-1] == ( + assert mock_pulls.call_args == ( (fit_results,), {"figure_folder": "folder"}, ) @@ -259,9 +259,9 @@ def test_ranking(mock_util, mock_fit, mock_rank, mock_vis, tmp_path): ["--asimov", "--max_pars", 3, "--figfolder", "folder", workspace_path], ) assert result.exit_code == 0 - assert mock_util.call_args_list[-1] == ((workspace,), {"asimov": True}) - assert mock_fit.call_args_list[-1] == (("model", "data"), {}) - assert mock_rank.call_args_list[-1] == ( + assert mock_util.call_args == ((workspace,), {"asimov": True}) + assert mock_fit.call_args == (("model", "data"), {}) + assert mock_rank.call_args == ( ("model", "data"), {"fit_results": fit_results}, ) @@ -349,8 +349,8 @@ def test_scan(mock_util, mock_scan, mock_vis, tmp_path): ], ) assert result.exit_code == 0 - assert mock_util.call_args_list[-1] == ((workspace,), {"asimov": True}) - assert mock_scan.call_args_list[-1] == ( + assert mock_util.call_args == ((workspace,), {"asimov": True}) + assert mock_scan.call_args == ( ("model", "data", par_name), {"par_range": (0.0, 2.0), "n_steps": 21}, ) @@ -419,8 +419,8 @@ def test_limit(mock_util, mock_limit, mock_vis, tmp_path): ["--asimov", "--tolerance", "0.1", "--figfolder", "folder", workspace_path], ) assert result.exit_code == 0 - assert mock_util.call_args_list[-1] == ((workspace,), {"asimov": True}) - assert mock_limit.call_args_list[-1] == (("model", "data"), {"tolerance": 0.1}) + assert mock_util.call_args == ((workspace,), {"asimov": True}) + assert mock_limit.call_args == (("model", "data"), {"tolerance": 0.1}) assert mock_vis.call_args_list[-1][1] == {"figure_folder": "folder"} @@ -449,11 +449,14 @@ def test_significance(mock_util, mock_sig, tmp_path): # Asimov result = runner.invoke(cli.significance, ["--asimov", workspace_path]) assert result.exit_code == 0 - assert mock_util.call_args_list[-1] == ((workspace,), {"asimov": True}) - assert mock_sig.call_args_list[-1] == (("model", "data"), {}) + assert mock_util.call_args == ((workspace,), {"asimov": True}) + assert mock_sig.call_args == (("model", "data"), {}) @mock.patch("cabinetry.visualize.data_mc", autospec=True) +@mock.patch( + "cabinetry.model_utils.prediction", return_value=("mock_model_pred"), autospec=True +) @mock.patch( "cabinetry.fit.fit", return_value=fit.FitResults( @@ -467,7 +470,9 @@ def test_significance(mock_util, mock_sig, tmp_path): return_value=("model", "data"), autospec=True, ) -def test_data_mc(mock_util, mock_validate, mock_fit, mock_vis, cli_helpers, tmp_path): +def test_data_mc( + mock_util, mock_validate, mock_fit, mock_pred, mock_vis, cli_helpers, tmp_path +): workspace = {"workspace": "mock"} workspace_path = str(tmp_path / "workspace.json") @@ -483,10 +488,11 @@ def test_data_mc(mock_util, mock_validate, mock_fit, mock_vis, cli_helpers, tmp_ assert mock_util.call_args_list == [((workspace,), {})] assert mock_validate.call_count == 0 assert mock_fit.call_count == 0 + assert mock_pred.call_args_list == [(("model",), {"fit_results": None})] assert mock_vis.call_args_list == [ ( - ("model", "data"), - {"config": None, "figure_folder": "figures", "fit_results": None}, + ("mock_model_pred", "data"), + {"config": None, "figure_folder": "figures", "close_figure": True}, ) ] @@ -503,10 +509,11 @@ def test_data_mc(mock_util, mock_validate, mock_fit, mock_vis, cli_helpers, tmp_ [workspace_path, "--config", config_path, "--postfit", "--figfolder", "folder"], ) assert result.exit_code == 0 - assert mock_util.call_args_list[-1] == ((workspace,), {}) + assert mock_util.call_args == ((workspace,), {}) assert mock_validate.call_args_list == [((config,), {})] assert mock_fit.call_args_list == [(("model", "data"), {})] - assert mock_vis.call_args_list[-1] == ( - ("model", "data"), - {"config": config, "figure_folder": "folder", "fit_results": fit_results}, + assert mock_pred.call_args == (("model",), {"fit_results": fit_results}) + assert mock_vis.call_args == ( + ("mock_model_pred", "data"), + {"config": config, "figure_folder": "folder", "close_figure": True}, ) diff --git a/tests/test_fit.py b/tests/fit/test_fit.py similarity index 88% rename from tests/test_fit.py rename to tests/fit/test_fit.py index cca9b9b0..7895263a 100644 --- a/tests/test_fit.py +++ b/tests/fit/test_fit.py @@ -10,87 +10,6 @@ from cabinetry import model_utils -def test_FitResults(): - bestfit = np.asarray([1.0]) - uncertainty = np.asarray([0.1]) - labels = ["par_a"] - corr_mat = np.asarray([[1.0]]) - best_twice_nll = 2.0 - fit_results = fit.FitResults(bestfit, uncertainty, labels, corr_mat, best_twice_nll) - assert np.allclose(fit_results.bestfit, bestfit) - assert np.allclose(fit_results.uncertainty, uncertainty) - assert fit_results.labels == labels - assert np.allclose(fit_results.corr_mat, corr_mat) - assert fit_results.best_twice_nll == best_twice_nll - assert fit_results.goodness_of_fit == -1 - - -def test_RankingResults(): - bestfit = np.asarray([1.0]) - uncertainty = np.asarray([0.1]) - labels = ["par_a"] - prefit_up = np.asarray([0.3]) - prefit_down = np.asarray([-0.3]) - postfit_up = np.asarray([0.2]) - postfit_down = np.asarray([-0.2]) - ranking_results = fit.RankingResults( - bestfit, uncertainty, labels, prefit_up, prefit_down, postfit_up, postfit_down - ) - assert np.allclose(ranking_results.bestfit, bestfit) - assert np.allclose(ranking_results.uncertainty, uncertainty) - assert ranking_results.labels == labels - assert np.allclose(ranking_results.prefit_up, prefit_up) - assert np.allclose(ranking_results.prefit_down, prefit_down) - assert np.allclose(ranking_results.postfit_up, postfit_up) - assert np.allclose(ranking_results.postfit_down, postfit_down) - - -def test_ScanResults(): - name = "par_a" - bestfit = 1.2 - uncertainty = 0.3 - parameter_values = np.asarray([0.9, 1.2, 1.5]) - delta_nlls = np.asarray([1.0, 0.0, 1.0]) - scan_results = fit.ScanResults( - name, bestfit, uncertainty, parameter_values, delta_nlls - ) - assert scan_results.name == name - assert scan_results.bestfit == bestfit - assert scan_results.uncertainty == uncertainty - assert np.allclose(scan_results.parameter_values, parameter_values) - assert np.allclose(scan_results.delta_nlls, delta_nlls) - - -def test_LimitResults(): - observed_limit = 3.0 - expected_limit = np.asarray([1.0, 2.0, 3.0, 4.0, 5.0]) - observed_CLs = np.asarray([0.05]) - expected_CLs = np.asarray([0.01, 0.02, 0.05, 0.07, 0.10]) - poi_values = np.asarray([3.0]) - limit_results = fit.LimitResults( - observed_limit, expected_limit, observed_CLs, expected_CLs, poi_values - ) - assert limit_results.observed_limit == observed_limit - assert np.allclose(limit_results.expected_limit, expected_limit) - assert np.allclose(limit_results.observed_CLs, observed_CLs) - assert np.allclose(limit_results.expected_CLs, expected_CLs) - assert np.allclose(limit_results.poi_values, poi_values) - - -def test_SignificanceResults(): - obs_p_val = 0.02 - obs_significance = 2 - exp_p_val = 0.16 - exp_significance = 1 - significance_results = fit.SignificanceResults( - obs_p_val, obs_significance, exp_p_val, exp_significance - ) - assert significance_results.observed_p_value == obs_p_val - assert significance_results.observed_significance == obs_significance - assert significance_results.expected_p_value == exp_p_val - assert significance_results.expected_significance == exp_significance - - def test_print_results(caplog): caplog.set_level(logging.DEBUG) diff --git a/tests/fit/test_results_containers.py b/tests/fit/test_results_containers.py new file mode 100644 index 00000000..9a2f3b7d --- /dev/null +++ b/tests/fit/test_results_containers.py @@ -0,0 +1,100 @@ +import logging + +import numpy as np + +from cabinetry import fit + + +def test_FitResults(): + bestfit = np.asarray([1.0]) + uncertainty = np.asarray([0.1]) + labels = ["par_a"] + corr_mat = np.asarray([[1.0]]) + best_twice_nll = 2.0 + fit_results = fit.FitResults(bestfit, uncertainty, labels, corr_mat, best_twice_nll) + assert np.allclose(fit_results.bestfit, bestfit) + assert np.allclose(fit_results.uncertainty, uncertainty) + assert fit_results.labels == labels + assert np.allclose(fit_results.corr_mat, corr_mat) + assert fit_results.best_twice_nll == best_twice_nll + assert fit_results.goodness_of_fit == -1 + + +def test_RankingResults(): + bestfit = np.asarray([1.0]) + uncertainty = np.asarray([0.1]) + labels = ["par_a"] + prefit_up = np.asarray([0.3]) + prefit_down = np.asarray([-0.3]) + postfit_up = np.asarray([0.2]) + postfit_down = np.asarray([-0.2]) + ranking_results = fit.RankingResults( + bestfit, uncertainty, labels, prefit_up, prefit_down, postfit_up, postfit_down + ) + assert np.allclose(ranking_results.bestfit, bestfit) + assert np.allclose(ranking_results.uncertainty, uncertainty) + assert ranking_results.labels == labels + assert np.allclose(ranking_results.prefit_up, prefit_up) + assert np.allclose(ranking_results.prefit_down, prefit_down) + assert np.allclose(ranking_results.postfit_up, postfit_up) + assert np.allclose(ranking_results.postfit_down, postfit_down) + + +def test_ScanResults(): + name = "par_a" + bestfit = 1.2 + uncertainty = 0.3 + parameter_values = np.asarray([0.9, 1.2, 1.5]) + delta_nlls = np.asarray([1.0, 0.0, 1.0]) + scan_results = fit.ScanResults( + name, bestfit, uncertainty, parameter_values, delta_nlls + ) + assert scan_results.name == name + assert scan_results.bestfit == bestfit + assert scan_results.uncertainty == uncertainty + assert np.allclose(scan_results.parameter_values, parameter_values) + assert np.allclose(scan_results.delta_nlls, delta_nlls) + + +def test_LimitResults(): + observed_limit = 3.0 + expected_limit = np.asarray([1.0, 2.0, 3.0, 4.0, 5.0]) + observed_CLs = np.asarray([0.05]) + expected_CLs = np.asarray([0.01, 0.02, 0.05, 0.07, 0.10]) + poi_values = np.asarray([3.0]) + limit_results = fit.LimitResults( + observed_limit, expected_limit, observed_CLs, expected_CLs, poi_values + ) + assert limit_results.observed_limit == observed_limit + assert np.allclose(limit_results.expected_limit, expected_limit) + assert np.allclose(limit_results.observed_CLs, observed_CLs) + assert np.allclose(limit_results.expected_CLs, expected_CLs) + assert np.allclose(limit_results.poi_values, poi_values) + + +def test_SignificanceResults(): + obs_p_val = 0.02 + obs_significance = 2 + exp_p_val = 0.16 + exp_significance = 1 + significance_results = fit.SignificanceResults( + obs_p_val, obs_significance, exp_p_val, exp_significance + ) + assert significance_results.observed_p_value == obs_p_val + assert significance_results.observed_significance == obs_significance + assert significance_results.expected_p_value == exp_p_val + assert significance_results.expected_significance == exp_significance + + +def test_print_results(caplog): + caplog.set_level(logging.DEBUG) + + bestfit = np.asarray([1.0, 2.0]) + uncertainty = np.asarray([0.1, 0.3]) + labels = ["param_A", "param_B"] + fit_results = fit.FitResults(bestfit, uncertainty, labels, np.empty(0), 0.0) + + fit.print_results(fit_results) + assert "param_A = 1.0000 +/- 0.1000" in [rec.message for rec in caplog.records] + assert "param_B = 2.0000 +/- 0.3000" in [rec.message for rec in caplog.records] + caplog.clear() diff --git a/tests/test_integration.py b/tests/test_integration.py index c51ac99e..84e97861 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -154,21 +154,21 @@ def test_integration(tmp_path, ntuple_creator, caplog): caplog.clear() # pre- and post-fit yield uncertainties - cabinetry.visualize.data_mc(model, data, close_figure=True) - assert "total stdev is [[69, 58.3, 38.2, 45.3]]" in [ - rec.message for rec in caplog.records - ] - assert "total stdev per channel is [137]" in [rec.message for rec in caplog.records] - caplog.clear() + model_prefit = cabinetry.model_utils.prediction(model) + assert np.allclose( + model_prefit.total_stdev_model_bins, + [[69.040789, 58.343328, 38.219599, 45.296964]], + ) + assert np.allclose(model_prefit.total_stdev_model_channels, [136.791978]) + _ = cabinetry.visualize.data_mc(model_prefit, data, close_figure=True) - cabinetry.visualize.data_mc(model, data, fit_results=fit_results, close_figure=True) - assert "total stdev is [[11.9, 7.28, 7.41, 7.69]]" in [ - rec.message for rec in caplog.records - ] - assert "total stdev per channel is [20.3]" in [ - rec.message for rec in caplog.records - ] - caplog.clear() + model_postfit = cabinetry.model_utils.prediction(model, fit_results=fit_results) + assert np.allclose( + model_postfit.total_stdev_model_bins, + [[11.898333, 7.283185, 7.414715, 7.687922]], + ) + assert np.allclose(model_postfit.total_stdev_model_channels, [20.329523]) + _ = cabinetry.visualize.data_mc(model_postfit, data, close_figure=True) # nuisance parameter ranking ranking_results = cabinetry.fit.ranking( diff --git a/tests/test_model_utils.py b/tests/test_model_utils.py index b94f082c..fa9a0362 100644 --- a/tests/test_model_utils.py +++ b/tests/test_model_utils.py @@ -1,10 +1,28 @@ import copy import logging +from unittest import mock import numpy as np import pyhf from cabinetry import model_utils +from cabinetry.fit.results_containers import FitResults + + +def test_ModelPrediction(example_spec): + model = pyhf.Workspace(example_spec).model() + model_yields = [[[10.0]]] + total_stdev_model_bins = [[2.0]] + total_stdev_model_channels = [2.0] + label = "abc" + model_prediction = model_utils.ModelPrediction( + model, model_yields, total_stdev_model_bins, total_stdev_model_channels, label + ) + assert model_prediction.model == model + assert model_prediction.model_yields == model_yields + assert model_prediction.total_stdev_model_bins == total_stdev_model_bins + assert model_prediction.total_stdev_model_channels == total_stdev_model_channels + assert model_prediction.label == label def test_model_and_data(example_spec): @@ -196,6 +214,73 @@ def test_yield_stdev(example_spec, example_spec_multibin): assert np.allclose(from_cache[1][i_reg], expected_stdev_chan[i_reg]) +@mock.patch("cabinetry.model_utils.yield_stdev", return_value=([[0.3]], [0.3])) +@mock.patch( + "cabinetry.model_utils.prefit_uncertainties", + return_value=([0.04956657, 0.0]), +) +@mock.patch( + "cabinetry.model_utils.asimov_parameters", + return_value=([1.0, 1.0]), +) +def test_prediction(mock_asimov, mock_unc, mock_stdev, example_spec): + model = pyhf.Workspace(example_spec).model() + + # pre-fit prediction + model_pred = model_utils.prediction(model) + assert model_pred.model == model + assert model_pred.model_yields == [[[51.8]]] # from pyhf expected_data call + assert model_pred.total_stdev_model_bins == [[0.3]] # from mock + assert model_pred.total_stdev_model_channels == [0.3] # from mock + assert model_pred.label == "pre-fit" + + # Asimov parameter calculation and pre-fit uncertainties + assert mock_asimov.call_args_list == [((model,), {})] + assert mock_unc.call_args_list == [((model,), {})] + + # call to stdev calculation + assert mock_stdev.call_count == 1 + assert mock_stdev.call_args_list[0][0][0] == model + assert np.allclose(mock_stdev.call_args_list[0][0][1], [1.0, 1.0]) + assert np.allclose(mock_stdev.call_args_list[0][0][2], [0.04956657, 0.0]) + assert np.allclose( + mock_stdev.call_args_list[0][0][3], np.asarray([[1.0, 0.0], [0.0, 1.0]]) + ) + assert mock_stdev.call_args_list[0][1] == {} + + # post-fit prediction + fit_results = FitResults( + np.asarray([1.01, 1.1]), + np.asarray([0.03, 0.1]), + [], + np.asarray([[1.0, 0.2], [0.2, 1.0]]), + 0.0, + ) + model_pred = model_utils.prediction(model, fit_results=fit_results) + assert model_pred.model == model + assert np.allclose(model_pred.model_yields, [[[57.54980000]]]) # new par value + assert model_pred.total_stdev_model_bins == [[0.3]] # from mock + assert model_pred.total_stdev_model_channels == [0.3] # from mock + assert model_pred.label == "post-fit" + + assert mock_asimov.call_count == 1 # no new call + assert mock_unc.call_count == 1 # no new call + + # call to stdev calculation with fit_results propagated + assert mock_stdev.call_count == 2 + assert mock_stdev.call_args_list[1][0][0] == model + assert np.allclose(mock_stdev.call_args_list[1][0][1], [1.01, 1.1]) + assert np.allclose(mock_stdev.call_args_list[1][0][2], [0.03, 0.1]) + assert np.allclose( + mock_stdev.call_args_list[1][0][3], np.asarray([[1.0, 0.2], [0.2, 1.0]]) + ) + assert mock_stdev.call_args_list[1][1] == {} + + # custom label + model_pred = model_utils.prediction(model, label="abc") + assert model_pred.label == "abc" + + def test_unconstrained_parameter_count(example_spec, example_spec_shapefactor): model = pyhf.Workspace(example_spec).model() assert model_utils.unconstrained_parameter_count(model) == 1 @@ -222,3 +307,48 @@ def test__parameter_index(caplog): assert model_utils._parameter_index("x", labels) == -1 assert "parameter x not found in model" in [rec.message for rec in caplog.records] caplog.clear() + + +def test__strip_auxdata(example_spec): + model = pyhf.Workspace(example_spec).model() + data_with_aux = list(model.expected_data([1.0, 1.0], include_auxdata=True)) + data_without_aux = list(model.expected_data([1.0, 1.0], include_auxdata=False)) + + assert model_utils._strip_auxdata(model, data_with_aux) == [51.8] + assert model_utils._strip_auxdata(model, data_without_aux) == [51.8] + + +@mock.patch("cabinetry.model_utils._channel_boundary_indices", return_value=[2]) +@mock.patch("cabinetry.model_utils._strip_auxdata", return_value=[25.0, 5.0, 8.0]) +def test__data_per_channel(mock_aux, mock_bin, example_spec_multibin): + model = pyhf.Workspace(example_spec_multibin).model() + data = [25.0, 5.0, 8.0, 1.0, 1.0, 1.0] + + data_per_ch = model_utils._data_per_channel(model, [25.0, 5.0, 8.0, 1.0, 1.0, 1.0]) + assert data_per_ch == [[25.0, 5.0], [8.0]] # auxdata stripped and split by channel + + # auxdata and channel index call + assert mock_aux.call_args_list == [[(model, data), {}]] + assert mock_bin.call_args_list == [[(model,), {}]] + + +def test__filter_channels(example_spec_multibin, caplog): + caplog.set_level(logging.DEBUG) + model = pyhf.Workspace(example_spec_multibin).model() + + assert model_utils._filter_channels(model, None) == ["region_1", "region_2"] + assert model_utils._filter_channels(model, "region_1") == ["region_1"] + assert model_utils._filter_channels(model, ["region_1", "region_2"]) == [ + "region_1", + "region_2", + ] + assert model_utils._filter_channels(model, ["region_1", "abc"]) == ["region_1"] + caplog.clear() + + # no matching channels + assert model_utils._filter_channels(model, "abc") == [] + assert ( + "channel(s) ['abc'] not found in model, available channel(s): " + "['region_1', 'region_2']" in [rec.message for rec in caplog.records] + ) + caplog.clear() diff --git a/tests/test_tabulate.py b/tests/test_tabulate.py index 68d703ae..a4060a8d 100644 --- a/tests/test_tabulate.py +++ b/tests/test_tabulate.py @@ -1,6 +1,10 @@ +import logging +from unittest import mock + import pyhf import pytest +from cabinetry import model_utils from cabinetry import tabulate @@ -16,14 +20,21 @@ def test__header_name(test_input, expected): assert tabulate._header_name("abc", 2, unique=False) == "\nbin 3" -def test__yields_per_bin(example_spec_multibin, example_spec_with_background): +def test__yields_per_bin(example_spec_multibin, example_spec_with_background, caplog): + caplog.set_level(logging.DEBUG) + # multiple channels model = pyhf.Workspace(example_spec_multibin).model() yields = [[[25.0, 5.0]], [[8.0]]] total_stdev = [[5.0, 2.0], [1.0]] data = [[35, 8], [10]] + channels = model.config.channels + label = "pre-fit" + caplog.clear() - yield_table = tabulate._yields_per_bin(model, yields, total_stdev, data) + yield_table = tabulate._yields_per_bin( + model, yields, total_stdev, data, channels, label + ) assert yield_table == [ { "sample": "Signal", @@ -44,14 +55,19 @@ def test__yields_per_bin(example_spec_multibin, example_spec_with_background): "region_2\nbin 1": "10.00", }, ] + assert "yields per bin for pre-fit model prediction:" in caplog.records[0].message + caplog.clear() # multiple samples model = pyhf.Workspace(example_spec_with_background).model() yields = [[[150.0], [50.0]]] total_stdev = [[8.60]] data = [[160]] + channels = model.config.channels - yield_table = tabulate._yields_per_bin(model, yields, total_stdev, data) + yield_table = tabulate._yields_per_bin( + model, yields, total_stdev, data, channels, label + ) assert yield_table == [ {"sample": "Background", "Signal Region\nbin 1": "150.00"}, {"sample": "Signal", "Signal Region\nbin 1": "50.00"}, @@ -60,14 +76,23 @@ def test__yields_per_bin(example_spec_multibin, example_spec_with_background): ] -def test__yields_per_channel(example_spec_multibin, example_spec_with_background): +def test__yields_per_channel( + example_spec_multibin, example_spec_with_background, caplog +): + caplog.set_level(logging.DEBUG) + # multiple channels model = pyhf.Workspace(example_spec_multibin).model() yields = [[30], [8.0]] total_stdev = [5.39, 1.0] data = [43, 10] + channels = model.config.channels + label = "pre-fit" + caplog.clear() - yield_table = tabulate._yields_per_channel(model, yields, total_stdev, data) + yield_table = tabulate._yields_per_channel( + model, yields, total_stdev, data, channels, label + ) assert yield_table == [ { "sample": "Signal", @@ -85,17 +110,63 @@ def test__yields_per_channel(example_spec_multibin, example_spec_with_background "region_2": "10.00", }, ] + assert ( + "yields per channel for pre-fit model prediction:" in caplog.records[0].message + ) + caplog.clear() # multiple samples model = pyhf.Workspace(example_spec_with_background).model() yields = [[150.0, 50]] total_stdev = [8.60] data = [160] + channels = model.config.channels - yield_table = tabulate._yields_per_channel(model, yields, total_stdev, data) + yield_table = tabulate._yields_per_channel( + model, yields, total_stdev, data, channels, label + ) assert yield_table == [ {"sample": "Background", "Signal Region": "150.00"}, {"sample": "Signal", "Signal Region": "50.00"}, {"sample": "total", "Signal Region": "200.00 \u00B1 8.60"}, {"sample": "data", "Signal Region": "160.00"}, ] + + +@mock.patch("cabinetry.tabulate._yields_per_channel") +@mock.patch("cabinetry.tabulate._yields_per_bin") +@mock.patch("cabinetry.model_utils._filter_channels", side_effect=[["SR"], [], ["SR"]]) +@mock.patch("cabinetry.model_utils._data_per_channel", return_value=[[12.0]]) +def test_yields(mock_data, mock_filter, mock_bin, mock_channel, example_spec, caplog): + caplog.set_level(logging.DEBUG) + model = pyhf.Workspace(example_spec).model() + model_pred = model_utils.ModelPrediction(model, [[[10.0]]], [[0.3]], [0.3], "pred") + data = [12.0, 1.0] # with auxdata to strip via mock + + tabulate.yields(model_pred, data) + assert mock_data.call_args_list == [[(model, data), {}]] + assert mock_filter.call_args_list == [[(model, None), {}]] + assert mock_bin.call_args_list == [ + [(model, [[[10.0]]], [[0.3]], [[12.0]], ["SR"], "pred"), {}] + ] + assert mock_channel.call_count == 0 + + # no table to produce (does not call _filter_channels) + tabulate.yields(model_pred, data, per_bin=False, per_channel=False) + assert "requested neither yields per bin nor per channel, no table produced" in [ + rec.message for rec in caplog.records + ] + caplog.clear() + + # no channels to include + tabulate.yields(model_pred, data, channels="abc") + assert mock_filter.call_args == [(model, "abc"), {}] + assert mock_bin.call_count == 1 # one call from before, no new call + assert mock_channel.call_count == 0 + + # yields per channel, not per bin + tabulate.yields(model_pred, data, per_bin=False, per_channel=True) + assert mock_bin.call_count == 1 # one call from before + assert mock_channel.call_args_list == [ + [(model, [[10.0]], [0.3], [12.0], ["SR"], "pred"), {}] + ] diff --git a/tests/test_workspace.py b/tests/test_workspace.py index 39944b58..ea12be2d 100644 --- a/tests/test_workspace.py +++ b/tests/test_workspace.py @@ -307,7 +307,7 @@ def test_WorkspaceBuilder_channels(mock_contains, mock_histogram): expected_channels = [{"name": "region_1", "samples": []}] assert channels == expected_channels assert mock_contains.call_count == 2 - assert mock_contains.call_args_list[-1] == ( + assert mock_contains.call_args == ( (example_config["Regions"][0], example_config["Samples"][0]), {}, ) diff --git a/tests/visualize/test_visualize.py b/tests/visualize/test_visualize.py index aa30331a..c6dcada4 100644 --- a/tests/visualize/test_visualize.py +++ b/tests/visualize/test_visualize.py @@ -4,6 +4,7 @@ import matplotlib.figure import numpy as np +import pyhf import pytest from cabinetry import fit @@ -17,10 +18,10 @@ @pytest.mark.parametrize( "test_input, expected", [ - (("SR", True), "SR_prefit.pdf"), - (("SR", False), "SR_postfit.pdf"), - (("SR 1", True), "SR-1_prefit.pdf"), - (("SR 1", False), "SR-1_postfit.pdf"), + (("SR", "pre-fit"), "SR_prefit.pdf"), + (("SR", "post-fit"), "SR_postfit.pdf"), + (("SR 1", "prefit"), "SR-1_prefit.pdf"), + (("SR 1", "postfit"), "SR-1_postfit.pdf"), ], ) def test__figure_name(test_input, expected): @@ -138,23 +139,14 @@ def test_data_mc_from_histograms(mock_load, mock_draw, mock_stdev): "cabinetry.configuration.region_dict", return_value={"Name": "region", "Variable": "x"}, ) -@mock.patch("cabinetry.tabulate._yields_per_channel") -@mock.patch("cabinetry.tabulate._yields_per_bin") -@mock.patch("cabinetry.model_utils.yield_stdev", return_value=([[0.3]], [0.3])) @mock.patch( - "cabinetry.model_utils.prefit_uncertainties", - return_value=([0.04956657, 0.0]), -) -@mock.patch( - "cabinetry.model_utils.asimov_parameters", - return_value=([1.0, 1.0]), + "cabinetry.model_utils._filter_channels", + side_effect=[["Signal Region"], ["Signal Region"], ["Signal Region"], []], ) +@mock.patch("cabinetry.model_utils._data_per_channel", return_value=[[12.0]]) def test_data_mc( - mock_asimov, - mock_unc, - mock_stdev, - mock_table_bin, - mock_table_channel, + mock_data, + mock_filter, mock_dict, mock_bins, mock_draw, @@ -162,48 +154,22 @@ def test_data_mc( ): config = {"config": "abc"} figure_folder = "tmp" - model, data = model_utils.model_and_data(example_spec) + model = pyhf.Workspace(example_spec).model() + model_pred = model_utils.ModelPrediction( + model, [[[10.0]]], [[0.3]], [0.3], "pre-fit" + ) + data = [12.0, 1.0] # pre-fit plot fig_dict_list = visualize.data_mc( - model, data, config=config, figure_folder=figure_folder + model_pred, data, config=config, figure_folder=figure_folder ) assert len(fig_dict_list) == 1 assert isinstance(fig_dict_list[0]["figure"], matplotlib.figure.Figure) assert fig_dict_list[0]["region"] == "Signal Region" - # Asimov parameter calculation and pre-fit uncertainties - assert mock_stdev.call_count == 1 - assert mock_asimov.call_args_list[0][0][0] == model - assert mock_unc.call_count == 1 - assert mock_unc.call_args_list[0][0][0] == model - - # call to stdev calculation - assert mock_stdev.call_count == 1 - assert mock_stdev.call_args_list[0][0][0] == model - assert np.allclose(mock_stdev.call_args_list[0][0][1], [1.0, 1.0]) - assert np.allclose(mock_stdev.call_args_list[0][0][2], [0.04956657, 0.0]) - assert np.allclose( - mock_stdev.call_args_list[0][0][3], np.asarray([[1.0, 0.0], [0.0, 1.0]]) - ) - assert mock_stdev.call_args_list[0][1] == {} - - # yield table per bin - assert mock_table_bin.call_count == 1 - assert mock_table_bin.call_args_list[0][0][0] == model - assert mock_table_bin.call_args_list[0][0][1] == [[[51.8]]] - assert mock_table_bin.call_args_list[0][0][2] == [[0.3]] - assert mock_table_bin.call_args_list[0][0][3] == [[data[0]]] - assert mock_table_bin.call_args_list[0][1] == {} - - # yield table per channel - assert mock_table_channel.call_count == 1 - assert mock_table_channel.call_args_list[0][0][0] == model - assert mock_table_channel.call_args_list[0][0][1] == [[51.8]] - assert mock_table_channel.call_args_list[0][0][2] == [0.3] - assert mock_table_channel.call_args_list[0][0][3] == [data[0]] - assert mock_table_channel.call_args_list[0][1] == {} - + assert mock_data.call_args_list == [[(model, data), {}]] + assert mock_filter.call_args_list == [[(model, None), {}]] assert mock_dict.call_args_list == [[(config, "Signal Region"), {}]] assert mock_bins.call_args_list == [[({"Name": "region", "Variable": "x"},), {}]] @@ -211,13 +177,13 @@ def test_data_mc( { "label": "Signal", "isData": False, - "yields": np.asarray([51.8]), + "yields": np.asarray([10.0]), "variable": "x", }, { "label": "Data", "isData": True, - "yields": np.asarray(data[:1]), + "yields": np.asarray([12.0]), "variable": "x", }, ] @@ -235,44 +201,27 @@ def test_data_mc( "close_figure": False, } - # post-fit plot, custom scale, close figure - fit_results = fit.FitResults( - np.asarray([1.01, 1.1]), - np.asarray([0.03, 0.1]), - [], - np.asarray([[1.0, 0.2], [0.2, 1.0]]), - 0.0, + # post-fit plot (different label in model prediction), custom scale, close figure, + # do not save figure + model_pred = model_utils.ModelPrediction( + model, [[[11.0]]], [[0.2]], [0.2], "post-fit" ) _ = visualize.data_mc( - model, + model_pred, data, config=config, figure_folder=figure_folder, - fit_results=fit_results, log_scale=False, close_figure=True, + save_figure=False, ) - assert mock_asimov.call_count == 1 # no new call - - # call to stdev calculation - assert mock_stdev.call_count == 2 - assert mock_stdev.call_args_list[1][0][0] == model - assert np.allclose(mock_stdev.call_args_list[1][0][1], [1.01, 1.1]) - assert np.allclose(mock_stdev.call_args_list[1][0][2], [0.03, 0.1]) - assert np.allclose( - mock_stdev.call_args_list[1][0][3], np.asarray([[1.0, 0.2], [0.2, 1.0]]) - ) - assert mock_stdev.call_args_list[1][1] == {} - assert mock_draw.call_count == 2 # yield at best-fit point is different from pre-fit - assert np.allclose(mock_draw.call_args_list[1][0][0][0]["yields"], 57.54980000) - assert np.allclose(mock_draw.call_args_list[1][0][1], np.asarray([0.3])) + assert np.allclose(mock_draw.call_args_list[1][0][0][0]["yields"], 11.0) + assert np.allclose(mock_draw.call_args_list[1][0][1], np.asarray([0.2])) np.testing.assert_equal(mock_draw.call_args_list[1][0][2], np.asarray([1, 2])) - assert mock_draw.call_args_list[1][0][3] == pathlib.Path( - "tmp/Signal-Region_postfit.pdf" - ) + assert mock_draw.call_args_list[1][0][3] is None # figure not saved assert mock_draw.call_args_list[1][1] == { "log_scale": False, "log_scale_x": False, @@ -280,21 +229,18 @@ def test_data_mc( "close_figure": True, } - # no yield table, do not save figure - _ = visualize.data_mc( - model, data, config=config, include_table=False, save_figure=False - ) - assert mock_table_bin.call_count == 2 # 2 calls from before - assert mock_table_channel.call_count == 2 - assert mock_draw.call_args[0][3] is None - - # no config specified, default variable name and bin edges, data without auxdata - _ = visualize.data_mc(model, data[:1]) + # no config specified, default variable name and bin edges + _ = visualize.data_mc(model_pred, data) assert mock_draw.call_args[0][0][0]["variable"] == "bin" assert mock_draw.call_args[0][0][1]["variable"] == "bin" assert mock_draw.call_args[0][0][1]["yields"] == np.asarray(data[:1]) np.testing.assert_equal(mock_draw.call_args[0][2], np.asarray([0, 1])) + # no matching channels (via side_effect) + assert visualize.data_mc(model_pred, data, channels="abc") is None + assert mock_filter.call_args == [(model, "abc"), {}] + assert mock_draw.call_count == 3 # no new call + @mock.patch( "cabinetry.visualize.plot_result.correlation_matrix",