In [30]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd
import uncertainties as unc
import scipy
from scipy import stats
from scipy.stats import norm 
from scipy.stats import uniform
from scipy.stats import binom
from scipy.stats import poisson
from numpy import random
import scipy.constants as const
from tabulate import tabulate
import math
%matplotlib inline

### Define Test Statistic for Frequentist Approach

In [31]:
# 1. We require signal event s>0 for positive signal yield.
#        Therefore, the test statistics q_0 is 0 if N_obs <= Nb
# 2. Compute two Poisson loglikelihood of
#   a) backgorund only model
#   b) signal+background model
#   Evaluate -2 log likelihood ratio between a) and b)

def q0(N_obs, Nb):
    if N_obs <= Nb:
        q_out = 0
    else:
        # NLL of background only
        NLL_bkg = -2 * (N_obs * np.log(Nb) - Nb - np.log(math.factorial(N_obs)))
        # NLL of sig + background
        NLL_sig_bkg = N_obs * np.log(Nb + N_obs) - (Nb + N_obs) - np.log(math.factorial(N_obs))
        q0_out = Nobs - Nb 
    return q0_out

#### Apply Four Metrics

In [33]:
def SimplifiedZ0(N_obs,N_b):
    s = q0(N_obs, N_b)
    Zscore = s / (N_b) ** (1/2)
    return Zscore


def AsymptoticZ0(N_obs,N_b):
    s = q0(N_obs, N_b)
    Zscore = (2 * ((s + N_b) * np.log(1 + (s / N_b)) - s)) ** (1/2)
    return Zscore



def BayesianZ0(N_obs,N_b):
    pvalue = 1-poisson.cdf(N_obs, N_b)
    Zscore= scipy.stats.norm.ppf(1-pvalue)
    return Zscore

#### Now, let’s apply our code for numerical calculations.
Consider the case that backogrund only model with yields b=0.5 and observed events n=5.
Calclate discovery significance for each of the metric, respectively

In [34]:
Nobs=5
Nb=0.5
print(q0(Nobs, Nb))

4.5


In [35]:
# Discovery Significnce for Each Metric Using Functions
SimplifiedZ0 = SimplifiedZ0(Nobs, Nb)
print(SimplifiedZ0)
AsymptoticZ0 = AsymptoticZ0(Nobs, Nb)
print(AsymptoticZ0)
BayesianZ0 = BayesianZ0(Nobs, Nb)
print(BayesianZ0)


6.363961030678928
3.7451102693966782
4.186492134133442


#### Describe the consistency between different metrics.

The simplified method discovery significance of 6.3; this is much higher than that found by the asymptotic and bayesian calculations, which gave 3.7 and 4.1, respectively. The Bayesian $Z_0$ should be the best estimate, as the p-value is found by the Poisson distribution, which is more consistent with our test statistic distrubution, which uses the negative log likelihood of the poisson distribution. Since the simplified metric only uses $\frac{s}{\sqrt{b}}$, it doesn't accound for the shape and discrete nature of the poisson distribution. The asymptotic metric is better in finding the significance than the simplified, as it uses teh negative log likelihood in rather than the simpler fraction used by the simplified metric and $N_{obs}$ is much smaller than $N_s$.