Skip to content

Commit

Permalink
feat: model prediction API and yield table changes (#267)
Browse files Browse the repository at this point in the history
* breaking change: visualize.data_mc now requires model prediction argument
* new function model_utils.prediction to generate model predictions
* yield tables removed from visualize.data_mc, now available via new function tabulate.yields
* visualize.data_mc and tabulate.yields support a new optional channels argument
* refactored fit module, result containers split out into submodule
  • Loading branch information
alexander-held committed Aug 25, 2021
1 parent 726f92c commit 5220056
Show file tree
Hide file tree
Showing 17 changed files with 809 additions and 432 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ model, data = cabinetry.model_utils.model_and_data(ws)
fit_results = cabinetry.fit.fit(model, data)

# visualize the post-fit model prediction and data
cabinetry.visualize.data_mc(model, data, config=config, fit_results=fit_results)
model_postfit = cabinetry.model_utils.prediction(model, fit_results=fit_results)
cabinetry.visualize.data_mc(model_postfit, data, config=config)
```

The above is an abbreviated version of an example included in `example.py`, which shows how to use `cabinetry`.
Expand Down
6 changes: 6 additions & 0 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,12 @@ cabinetry.fit
.. automodule:: cabinetry.fit
:members:

cabinetry.fit.results_containers
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

.. automodule:: cabinetry.fit.results_containers
:members:


cabinetry.visualize
-------------------
Expand Down
11 changes: 9 additions & 2 deletions example.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,13 @@
cabinetry.visualize.pulls(fit_results)
cabinetry.visualize.correlation_matrix(fit_results)

# obtain pre- and post-fit model predictions
model_prefit = cabinetry.model_utils.prediction(model)
model_postfit = cabinetry.model_utils.prediction(model, fit_results=fit_results)

# show post-fit yield table
cabinetry.tabulate.yields(model_postfit, data)

# visualize pre- and post-fit distributions
cabinetry.visualize.data_mc(model, data, config=config)
cabinetry.visualize.data_mc(model, data, config=config, fit_results=fit_results)
cabinetry.visualize.data_mc(model_prefit, data, config=config)
cabinetry.visualize.data_mc(model_postfit, data, config=config)
5 changes: 3 additions & 2 deletions src/cabinetry/cli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,12 +309,13 @@ def data_mc(
# optionally perform maximum likelihood fit to obtain post-fit model
fit_results = cabinetry_fit.fit(model, data) if postfit else None

model_prediction = cabinetry_model_utils.prediction(model, fit_results=fit_results)
cabinetry_visualize.data_mc(
model,
model_prediction,
data,
config=cabinetry_config,
figure_folder=figfolder,
fit_results=fit_results,
close_figure=True,
)


Expand Down
109 changes: 11 additions & 98 deletions src/cabinetry/fit.py → src/cabinetry/fit/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import logging
from typing import Any, Dict, List, NamedTuple, Optional, Tuple, Union
from typing import Any, Dict, List, Optional, Tuple, Union

import iminuit
import numpy as np
Expand All @@ -8,108 +8,18 @@
import scipy.stats

from cabinetry import model_utils
from cabinetry.fit.results_containers import (
FitResults,
LimitResults,
RankingResults,
ScanResults,
SignificanceResults,
)


log = logging.getLogger(__name__)


class FitResults(NamedTuple):
"""Collects fit results in one object.
Args:
bestfit (numpy.ndarray): best-fit results of parameters
uncertainty (numpy.ndarray): uncertainties of best-fit parameter results
labels (List[str]): parameter labels
corr_mat (np.ndarray): parameter correlation matrix
best_twice_nll (float): -2 log(likelihood) at best-fit point
goodess_of_fit (float, optional): goodness-of-fit p-value, defaults to -1
"""

bestfit: np.ndarray
uncertainty: np.ndarray
labels: List[str]
corr_mat: np.ndarray
best_twice_nll: float
goodness_of_fit: float = -1


class RankingResults(NamedTuple):
"""Collects nuisance parameter ranking results in one object.
The best-fit results per parameter, the uncertainties, and the labels should not
include the parameter of interest, since no impact for it is calculated.
Args:
bestfit (numpy.ndarray): best-fit results of parameters
uncertainty (numpy.ndarray): uncertainties of best-fit parameter results
labels (List[str]): parameter labels
prefit_up (numpy.ndarray): pre-fit impact in "up" direction
prefit_down (numpy.ndarray): pre-fit impact in "down" direction
postfit_up (numpy.ndarray): post-fit impact in "up" direction
postfit_down (numpy.ndarray): post-fit impact in "down" direction
"""

bestfit: np.ndarray
uncertainty: np.ndarray
labels: List[str]
prefit_up: np.ndarray
prefit_down: np.ndarray
postfit_up: np.ndarray
postfit_down: np.ndarray


class ScanResults(NamedTuple):
"""Collects likelihood scan results in one object.
Args:
name (str): name of parameter in scan
bestfit (float): best-fit parameter value from unconstrained fit
uncertainty (float): uncertainty of parameter in unconstrained fit
parameter_values (np.ndarray): parameter values used in scan
delta_nlls (np.ndarray): -2 log(L) difference at each scanned point
"""

name: str
bestfit: float
uncertainty: float
parameter_values: np.ndarray
delta_nlls: np.ndarray


class LimitResults(NamedTuple):
"""Collects parameter upper limit results in one object.
Args:
observed_limit (np.ndarray): observed limit
expected_limit (np.ndarray): expected limit, including 1 and 2 sigma bands
observed_CLs (np.ndarray): observed CLs values
expected_CLs (np.ndarray): expected CLs values, including 1 and 2 sigma bands
poi_values (np.ndarray): POI values used in scan
"""

observed_limit: float
expected_limit: np.ndarray
observed_CLs: np.ndarray
expected_CLs: np.ndarray
poi_values: np.ndarray


class SignificanceResults(NamedTuple):
"""Collects results from a discovery significance calculation in one object.
Args:
observed_p_value (float): observed p-value
observed_significance (float): observed significance
expected_p_value (float): expected/median p-value
expected_significance (float): expected/median significance
"""

observed_p_value: float
observed_significance: float
expected_p_value: float
expected_significance: float


def print_results(
fit_results: FitResults,
) -> None:
Expand Down Expand Up @@ -840,6 +750,9 @@ def significance(model: pyhf.pdf.Model, data: List[float]) -> SignificanceResult
Args:
model (pyhf.pdf.Model): model to use in fits
data (List[float]): data (including auxdata) the model is fit to
Returns:
SignificanceResults: observed and expected p-values and significances
"""
pyhf.set_backend(pyhf.tensorlib, pyhf.optimize.minuit_optimizer(verbose=1))

Expand Down
100 changes: 100 additions & 0 deletions src/cabinetry/fit/results_containers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
from typing import List, NamedTuple

import numpy as np


class FitResults(NamedTuple):
"""Collects fit results in one object.
Args:
bestfit (numpy.ndarray): best-fit results of parameters
uncertainty (numpy.ndarray): uncertainties of best-fit parameter results
labels (List[str]): parameter labels
corr_mat (np.ndarray): parameter correlation matrix
best_twice_nll (float): -2 log(likelihood) at best-fit point
goodess_of_fit (float, optional): goodness-of-fit p-value, defaults to -1
"""

bestfit: np.ndarray
uncertainty: np.ndarray
labels: List[str]
corr_mat: np.ndarray
best_twice_nll: float
goodness_of_fit: float = -1


class RankingResults(NamedTuple):
"""Collects nuisance parameter ranking results in one object.
The best-fit results per parameter, the uncertainties, and the labels should not
include the parameter of interest, since no impact for it is calculated.
Args:
bestfit (numpy.ndarray): best-fit results of parameters
uncertainty (numpy.ndarray): uncertainties of best-fit parameter results
labels (List[str]): parameter labels
prefit_up (numpy.ndarray): pre-fit impact in "up" direction
prefit_down (numpy.ndarray): pre-fit impact in "down" direction
postfit_up (numpy.ndarray): post-fit impact in "up" direction
postfit_down (numpy.ndarray): post-fit impact in "down" direction
"""

bestfit: np.ndarray
uncertainty: np.ndarray
labels: List[str]
prefit_up: np.ndarray
prefit_down: np.ndarray
postfit_up: np.ndarray
postfit_down: np.ndarray


class ScanResults(NamedTuple):
"""Collects likelihood scan results in one object.
Args:
name (str): name of parameter in scan
bestfit (float): best-fit parameter value from unconstrained fit
uncertainty (float): uncertainty of parameter in unconstrained fit
parameter_values (np.ndarray): parameter values used in scan
delta_nlls (np.ndarray): -2 log(L) difference at each scanned point
"""

name: str
bestfit: float
uncertainty: float
parameter_values: np.ndarray
delta_nlls: np.ndarray


class LimitResults(NamedTuple):
"""Collects parameter upper limit results in one object.
Args:
observed_limit (np.ndarray): observed limit
expected_limit (np.ndarray): expected limit, including 1 and 2 sigma bands
observed_CLs (np.ndarray): observed CLs values
expected_CLs (np.ndarray): expected CLs values, including 1 and 2 sigma bands
poi_values (np.ndarray): POI values used in scan
"""

observed_limit: float
expected_limit: np.ndarray
observed_CLs: np.ndarray
expected_CLs: np.ndarray
poi_values: np.ndarray


class SignificanceResults(NamedTuple):
"""Collects results from a discovery significance calculation in one object.
Args:
observed_p_value (float): observed p-value
observed_significance (float): observed significance
expected_p_value (float): expected/median p-value
expected_significance (float): expected/median significance
"""

observed_p_value: float
observed_significance: float
expected_p_value: float
expected_significance: float

0 comments on commit 5220056

Please sign in to comment.