# Discovery test example

In [None]:
from __future__ import annotations

import matplotlib.pyplot as plt
import numpy as np
import zfit
from utils import plotfitresult, pltdist
from zfit.loss import ExtendedUnbinnedNLL
from zfit.minimize import Minuit

from hepstats.hypotests import Discovery
from hepstats.hypotests.calculators import FrequentistCalculator
from hepstats.hypotests.parameters import POI

In [None]:
plt.rcParams["figure.figsize"] = (8, 6)
plt.rcParams["font.size"] = 16

### Fit of a Gaussian signal over an exponential background:

In [None]:
bounds = (0.1, 3.0)

# Data and signal

np.random.seed(0)
tau = -2.0
beta = -1 / tau
data = np.random.exponential(beta, 300)
peak = np.random.normal(1.2, 0.1, 25)
data = np.concatenate((data, peak))
data = data[(data > bounds[0]) & (data < bounds[1])]

In [None]:
pltdist(data, bins=80, bounds=bounds)

In [None]:
obs = zfit.Space("x", limits=bounds)

In [None]:
lambda_ = zfit.Parameter("lambda", -2.0, -4.0, -1.0)
Nsig = zfit.Parameter("Nsig", 20.0, -20.0, len(data))
Nbkg = zfit.Parameter("Nbkg", len(data), 0.0, len(data) * 1.1)

In [None]:
signal = zfit.pdf.Gauss(obs=obs, mu=1.2, sigma=0.1).create_extended(Nsig)
background = zfit.pdf.Exponential(obs=obs, lambda_=lambda_).create_extended(Nbkg)
tot_model = zfit.pdf.SumPDF([signal, background])

In [None]:
# Create the negative log likelihood
data_ = zfit.data.Data.from_numpy(obs=obs, array=data)
nll = ExtendedUnbinnedNLL(model=tot_model, data=data_)

In [None]:
# Instantiate a minuit minimizer
minimizer = Minuit()

In [None]:
# minimisation of the loss function
minimum = minimizer.minimize(loss=nll)
minimum.hesse()

In [None]:
nbins = 80
pltdist(data, nbins, bounds)
plotfitresult(tot_model, bounds, nbins)
plt.xlabel("m [GeV/c$^2$]")
plt.ylabel("number of events")

### Discovery test

In a discovery test the null hypothesis is the absence of signal, .i.e Nsig = 0.

In [None]:
# instantation of the calculator
# calculator = FrequentistCalculator(nll, minimizer, ntoysnull=5000)
calculator = FrequentistCalculator.from_yaml("toys/discovery_freq_zfit_toys.yml", nll, minimizer, ntoysnull=5000)
calculator.bestfit = minimum  # optionnal

In [None]:
# parameter of interest of the null hypothesis
poinull = POI(Nsig, 0)

In [None]:
# instantation of the discovery test
discovery_test = Discovery(calculator, poinull)

In [None]:
pnull, significance = discovery_test.result()

In [None]:
plt.hist(
    calculator.qnull(poinull, None, onesided=True, onesideddiscovery=True)[poinull],
    bins=20,
    label="qnull distribution",
    log=True,
)
plt.axvline(
    calculator.qobs(poinull, onesided=True, onesideddiscovery=True),
    color="red",
    label="qobs",
)
plt.legend(loc="best")
plt.xlabel("q")

In [None]:
calculator.to_yaml("toys/discovery_freq_zfit_toys.yml")