Skip to content

Commit

Permalink
feat: make POI optional in config and allow POI customization for inf…
Browse files Browse the repository at this point in the history
…erence (#348)

* POI is now an optional configuration argument
* added ability to set POI via poi_name keyword arguments in fit.ranking and fit.limit
* fixed pytest deprecation warning in a test
  • Loading branch information
alexander-held committed Jul 30, 2022
1 parent a237589 commit 0c75aad
Show file tree
Hide file tree
Showing 10 changed files with 176 additions and 45 deletions.
2 changes: 1 addition & 1 deletion setup.cfg
Expand Up @@ -56,7 +56,7 @@ filterwarnings =
ignore:no code associated:UserWarning:typeguard:

[flake8]
max-complexity = 16
max-complexity = 18
max-line-length = 88
exclude = docs/conf.py
count = True
Expand Down
76 changes: 59 additions & 17 deletions src/cabinetry/fit/__init__.py
Expand Up @@ -425,6 +425,7 @@ def ranking(
data: List[float],
*,
fit_results: Optional[FitResults] = None,
poi_name: Optional[str] = None,
init_pars: Optional[List[float]] = None,
fix_pars: Optional[List[bool]] = None,
par_bounds: Optional[List[Tuple[float, float]]] = None,
Expand All @@ -442,6 +443,8 @@ def ranking(
data (List[float]): data (including auxdata) the model is fit to
fit_results (Optional[FitResults], optional): nominal fit results to use for
ranking, if not specified will repeat nominal fit, defaults to None
poi_name (Optional[str], optional): impact is calculated with respect to this
parameter, defaults to None (use POI specified in workspace)
init_pars (Optional[List[float]], optional): list of initial parameter settings,
defaults to None (use ``pyhf`` suggested inits)
fix_pars (Optional[List[bool]], optional): list of booleans specifying which
Expand All @@ -451,6 +454,9 @@ def ranking(
custom_fit (bool, optional): whether to use the ``pyhf.infer`` API or
``iminuit``, defaults to False (using ``pyhf.infer``)
Raises:
ValueError: if no POI is found
Returns:
RankingResults: fit results for parameters, and pre- and post-fit impacts
"""
Expand All @@ -466,7 +472,13 @@ def ranking(

labels = model.config.par_names()
prefit_unc = model_utils.prefit_uncertainties(model)
nominal_poi = fit_results.bestfit[model.config.poi_index]

# use POI given by kwarg, fall back to POI specified in model
poi_index = model_utils._poi_index(model, poi_name=poi_name)
if poi_index is None:
raise ValueError("no POI specified, cannot calculate ranking")

nominal_poi = fit_results.bestfit[poi_index]

# need to get values for parameter settings, as they will be partially changed
# during the ranking (init/fix changes)
Expand All @@ -476,9 +488,9 @@ def ranking(

all_impacts = []
for i_par, label in enumerate(labels):
if label == model.config.poi_name:
if i_par == poi_index:
continue # do not calculate impact of POI on itself
log.info(f"calculating impact of {label} on {labels[model.config.poi_index]}")
log.info(f"calculating impact of {label} on {labels[poi_index]}")

# hold current parameter constant
fix_pars_ranking = fix_pars.copy()
Expand Down Expand Up @@ -509,7 +521,7 @@ def ranking(
par_bounds=par_bounds,
custom_fit=custom_fit,
)
poi_val = fit_results_ranking.bestfit[model.config.poi_index]
poi_val = fit_results_ranking.bestfit[poi_index]
parameter_impact = poi_val - nominal_poi
log.debug(
f"POI is {poi_val:.6f}, difference to nominal is "
Expand All @@ -526,9 +538,9 @@ def ranking(

# remove parameter of interest from bestfit / uncertainty / labels
# such that their entries match the entries of the impacts
bestfit = np.delete(fit_results.bestfit, model.config.poi_index)
uncertainty = np.delete(fit_results.uncertainty, model.config.poi_index)
labels = np.delete(fit_results.labels, model.config.poi_index).tolist()
bestfit = np.delete(fit_results.bestfit, poi_index)
uncertainty = np.delete(fit_results.uncertainty, poi_index)
labels = np.delete(fit_results.labels, poi_index).tolist()

ranking_results = RankingResults(
bestfit, uncertainty, labels, prefit_up, prefit_down, postfit_up, postfit_down
Expand Down Expand Up @@ -582,7 +594,7 @@ def scan(

# get index of parameter with name par_name
par_index = model_utils._parameter_index(par_name, labels)
if par_index == -1:
if par_index is None:
raise ValueError(f"parameter {par_name} not found in model")

# run a fit with the parameter not held constant, to find the best-fit point
Expand Down Expand Up @@ -644,6 +656,7 @@ def limit(
tolerance: float = 0.01,
maxiter: int = 100,
confidence_level: float = 0.95,
poi_name: Optional[str] = None,
init_pars: Optional[List[float]] = None,
fix_pars: Optional[List[bool]] = None,
par_bounds: Optional[List[Tuple[float, float]]] = None,
Expand All @@ -652,7 +665,9 @@ def limit(
Limits are calculated for the parameter of interest (POI) defined in the model.
Brent's algorithm is used to automatically determine POI values to be tested. The
desired confidence level can be configured, and defaults to 95%.
desired confidence level can be configured, and defaults to 95%. In order to support
setting the POI directly without model recompilation, this temporarily changes the
POI index and name in the model configuration.
Args:
model (pyhf.pdf.Model): model to use in fits
Expand All @@ -668,6 +683,8 @@ def limit(
100
confidence_level (float, optional): confidence level for calculation, defaults
to 0.95 (95%)
poi_name (Optional[str], optional): limit is calculated for this parameter,
defaults to None (use POI specified in workspace)
init_pars (Optional[List[float]], optional): list of initial parameter settings,
defaults to None (use ``pyhf`` suggested inits)
fix_pars (Optional[List[bool]], optional): list of booleans specifying which
Expand All @@ -676,13 +693,27 @@ def limit(
parameter bounds for fit, defaults to None (use ``pyhf`` suggested bounds)
Raises:
ValueError: if no POI is found
ValueError: if lower and upper bracket value are the same
ValueError: if starting brackets do not enclose the limit
Returns:
LimitResults: observed and expected limits, CLs values, and scanned points
"""
pyhf.set_backend(pyhf.tensorlib, pyhf.optimize.minuit_optimizer(verbose=1))

# use POI given by kwarg, fall back to POI specified in model
poi_index = model_utils._poi_index(model, poi_name=poi_name)
if poi_index is None:
raise ValueError("no POI specified, cannot calculate limit")

# set POI index / name in model config to desired value, hypotest will pick this up
# save original values to reset model later
original_model_poi_index = model.config.poi_index
original_model_poi_name = model.config.poi_name
model.config.poi_index = poi_index
model.config.poi_name = model.config.par_names()[poi_index]

# show two decimals only if confidence level in percent is not an integer
cl_label = (
f"{confidence_level:.{0 if (confidence_level * 100).is_integer() else 2}%}"
Expand All @@ -708,12 +739,15 @@ def limit(
if bracket is None:
bracket = (bracket_left_default, bracket_right_default)
elif bracket[0] == bracket[1]:
# set POI index / name in model back to their original values
model.config.poi_index = original_model_poi_index
model.config.poi_name = original_model_poi_name
raise ValueError(f"the two bracket values must not be the same: {bracket}")

cache_CLs: Dict[float, tuple] = {} # cache storing all relevant results

def _cls_minus_threshold(
poi: float,
poi_val: float,
model: pyhf.pdf.Model,
data: List[float],
cls_target: float,
Expand All @@ -726,7 +760,7 @@ def _cls_minus_threshold(
cache to avoid re-fitting known POI values and to store all relevant values.
Args:
poi (float): value for parameter of interest
poi_val (float): value for parameter of interest
model (pyhf.pdf.Model): model to use in fits
data (List[float]): data (including auxdata) the model is fit to
cls_target (float): target CLs value to find by varying POI
Expand All @@ -738,19 +772,20 @@ def _cls_minus_threshold(
Returns:
float: absolute value of difference to CLs=``cls_target``
"""
if poi <= 0:
if poi_val <= 0:
# no fit needed for negative POI value, return a default value
log.debug(
f"skipping fit for {model.config.poi_name} = {poi:.4f}, setting CLs = 1"
f"skipping fit for {model.config.poi_name} = {poi_val:.4f}, setting "
"CLs = 1"
)
return 1 - cls_target # distance of CLs = 1 to target CLs
cache = cache_CLs.get(poi)
cache = cache_CLs.get(poi_val)
if cache:
observed, expected = cache # use result from cache
else:
# calculate CLs
results = pyhf.infer.hypotest(
poi,
poi_val,
data,
model,
init_pars=init_pars,
Expand All @@ -761,10 +796,10 @@ def _cls_minus_threshold(
)
observed = float(results[0]) # 1 value per scan point
expected = np.asarray(results[1]) # 5 per point (with 1 and 2 sigma bands)
cache_CLs.update({poi: (observed, expected)})
cache_CLs.update({poi_val: (observed, expected)})
current_CLs = np.hstack((observed, expected))[which_limit]
log.debug(
f"{model.config.poi_name} = {poi:.4f}, {limit_label} CLs = "
f"{model.config.poi_name} = {poi_val:.4f}, {limit_label} CLs = "
f"{current_CLs:.4f}{' (cached)' if cache else ''}"
)
return current_CLs - cls_target
Expand Down Expand Up @@ -801,6 +836,9 @@ def _cls_minus_threshold(
f"CLs values at {bracket[0]:.4f} and {bracket[1]:.4f} do not bracket "
f"CLs={cls_target:.4f}, try a different starting bracket"
)
# set POI index / name in model back to their original values
model.config.poi_index = original_model_poi_index
model.config.poi_name = original_model_poi_name
raise

if not res.converged:
Expand Down Expand Up @@ -842,6 +880,10 @@ def _cls_minus_threshold(

bracket = (bracket_left, bracket_right)

# set POI index / name in model back to their original values
model.config.poi_index = original_model_poi_index
model.config.poi_name = original_model_poi_name

# report all results
log.info(f"total of {steps_total} steps to calculate all limits")
if not all_converged:
Expand Down
46 changes: 41 additions & 5 deletions src/cabinetry/model_utils.py
Expand Up @@ -424,27 +424,63 @@ def unconstrained_parameter_count(model: pyhf.pdf.Model) -> int:
return n_pars


def _parameter_index(par_name: str, labels: Union[List[str], Tuple[str, ...]]) -> int:
def _parameter_index(
par_name: str, labels: Union[List[str], Tuple[str, ...]]
) -> Optional[int]:
"""Returns the position of a parameter with a given name in the list of parameters.
Useful together with ``pyhf.pdf._ModelConfig.par_names`` to find the position of a
parameter when the name is known. If the parameter is not found, logs an error and
returns a default value of -1.
returns a default value of None.
Args:
par_name (str): name of parameter to find in list
labels (Union[List[str], Tuple[str, ...]]): list or tuple with all parameter
names in the model
Returns:
int: index of parameter
Optional[int]: index of parameter, or None if parameter was not found
"""
par_index = next((i for i, label in enumerate(labels) if label == par_name), -1)
if par_index == -1:
par_index = next((i for i, label in enumerate(labels) if label == par_name), None)
if par_index is None:
log.error(f"parameter {par_name} not found in model")
return par_index


def _poi_index(
model: pyhf.pdf.Model, *, poi_name: Optional[str] = None
) -> Optional[int]:
"""Returns the index of the POI specified in the argument or the model default.
If a string is given as argument, this takes priority. Otherwise the POI from the
model is used. If no POI is specified there either, logs an error and returns a
default value of None.
Args:
model (pyhf.pdf.Model): model for which to find the POI index
poi_name (Optional[str], optional): name of the POI, defaults to None
Raises:
ValueError: if the specified POI name cannot be found in the model
Returns:
Optional[int]: POI index, or None if no POI could be found
"""
if poi_name is not None:
# use POI given by kwarg if specified
poi_index = _parameter_index(poi_name, model.config.par_names())
if poi_index is None:
raise ValueError(f"parameter {poi_name} not found in model")
elif model.config.poi_index is not None:
# use POI specified in model
poi_index = model.config.poi_index
else:
log.error("could not find POI for model")
poi_index = None

return poi_index


def _strip_auxdata(model: pyhf.pdf.Model, data: List[float]) -> List[float]:
"""Always returns observed yields, no matter whether data includes auxdata.
Expand Down
4 changes: 2 additions & 2 deletions src/cabinetry/schemas/config.json
Expand Up @@ -62,14 +62,14 @@
"$$target": "#/definitions/general",
"description": "general settings",
"type": "object",
"required": ["Measurement", "POI", "InputPath", "HistogramFolder"],
"required": ["Measurement", "InputPath", "HistogramFolder"],
"properties": {
"Measurement": {
"description": "name of measurement",
"type": "string"
},
"POI": {
"description": "name of parameter of interest",
"description": "name of parameter of interest, defaults to empty string",
"type": "string"
},
"InputPath": {
Expand Down
3 changes: 2 additions & 1 deletion src/cabinetry/workspace.py
Expand Up @@ -358,7 +358,8 @@ def measurements(self) -> List[Dict[str, Any]]:

parameters = {"parameters": parameters_list}
config_dict.update(parameters)
config_dict.update({"poi": self.config["General"]["POI"]})
# POI defaults to "" (interpreted as "no POI" by pyhf) if not specified
config_dict.update({"poi": self.config["General"].get("POI", "")})
measurement.update({"config": config_dict})
measurements.append(measurement)
return measurements
Expand Down

0 comments on commit 0c75aad

Please sign in to comment.