In [1]:
import numpy as np
import glob, re
from scipy.optimize import minimize
from scipy.optimize import curve_fit
from scipy.stats import poisson, norm, kstest
import numdifftools

In [2]:
def Zscorefunc(obs,pred,sigma):
    pred2=pred-sigma**2
    LLR=0.25*(-2*obs+pred2**2/sigma**2+2*sigma**2+
              pred2*(4-np.sqrt(pred2**2+4*obs*sigma**2)/sigma**2)
             -4*obs*np.log((pred2+np.sqrt(pred2**2+4*obs*sigma**2))/(2*obs))
             )
    return np.sqrt(2*LLR)
    
    
    
def perform_fit(ydata,yerr,xdata,fit_func,mask_sb,mask_sr):

    #Least square fit, masking out the signal region and the region around pmdec=0
    popt, pcov = curve_fit(fit_func, xdata[mask_sb], ydata[mask_sb]
                           ,sigma=yerr[mask_sb],maxfev=10000)
    print('fit params: ', popt)
    
    ydata_fit = np.array([fit_func(x,*popt) for x in xdata]) #,popt[6]) for x in xdata])

    
    #This function returns array of signal predictions in the signal region
    def signal_fit_func_array(parr):
        #see the ATLAS diboson resonance search: https://arxiv.org/pdf/1708.04445.pdf.
        return np.array([np.sum([fit_func(x,*parr) for i, x in enumerate(xdata[mask_sr])])]) #*xwidths[mask[i]] for i, x in enumerate(xdata[mask])])])

    #Get covariance matrix of prediction uncertainties in the signal region
    jac=numdifftools.core.Jacobian(signal_fit_func_array)
    x_signal_cov=np.dot(np.dot(jac(popt),pcov),jac(popt).T)
    print('x_signal_cov: ',x_signal_cov)
    
    # We want to compute the log likelihood ratio between the S+B hypothesis and the B only hypothesis,
    # including the uncertainty in the sideband fit as profiled nuisance parameters.
    # This means that the likelihood for the S+B only hypothesis is given by (theta is the nuisance)
    # P(S,theta)=1/Nobs! (Bpred+S+theta)^Nobs e^{-Bpred-S-theta} 1/(sqrt{2pi}sigma)e^{-theta^2/2sigma^2}
    # The LR we want is is P(Shat,thetahat(Shat))/P(S=0,thetahat(S=0))
    # where thetahat(S) is the value of theta for a given S that maximizes the likelihood, and Shat is the value of S
    # that maximizes the (profiled) likelihood. 
    # For a single bin, Shat=Nobs-Bpred and thetahat(Shat)=0.
    # Also, thetahat(S=0) can be calculated analytically for a single bin.
    # Finally, the Z-score (number of sigmas) is given asymptotically by the formula Z = sqrt(-2LLR)
    
    
    obs=np.sum(ydata[mask_sr]).astype('float64')
    exp=np.sum(ydata_fit[mask_sr]).astype('float64')
    sigma=np.sqrt(x_signal_cov)[0][0]

    
#    pval=cdf_reg(np.sum(ydata[mask_sr]).astype('float64'),np.sum(ydata_fit[mask_sr]).astype('float64'))
#    pval=1-np.exp(pval)
#    print("P-VALUE ",pval)

#    pvaltry=get_p_value(countstofit,binstofit,mask=mask_sr,verbose=True)
#    print("P_VALUE FROM CWOLA CODE ",pvaltry)
    
    if(obs<exp):
        Zvaltryme=0
        pvaltryme=0.5
    else:    
        Zvaltryme=Zscorefunc(obs,exp,sigma)
        pvaltryme = 1-norm.cdf(Zvaltryme)
        
    print("Z SCORE: ",Zvaltryme)
    print("P VALUE: ",pvaltryme)
    
#    pvaluelist.append([minpmdec,maxpmdec,np.sum(ydata[mask_sr]),np.sum(ydata_fit[mask_sr])])
#    pvaluetrylist.append([minpmdec,maxpmdec,pvaltry])
    
    return popt,pcov,ydata_fit,pvaltryme
    
def perform_fit2(ydata,yerr,xdata,fit_func,mask_sb,mask_sr):

    #Least square fit, masking out the signal region and the region around pmdec=0
    popt, pcov = curve_fit(fit_func, xdata[mask_sb], ydata[mask_sb]
                           ,sigma=yerr[mask_sb],maxfev=100000)
    print('fit params: ', popt)
    
    ydata_fit = np.array([fit_func(x,*popt) for x in xdata]) #,popt[6]) for x in xdata])

    
    #This function returns array of signal predictions in the signal region
    def signal_fit_func_array(parr):
        #see the ATLAS diboson resonance search: https://arxiv.org/pdf/1708.04445.pdf.
        return np.array([np.sum([fit_func(x,*parr) for i, x in enumerate(xdata[mask_sr])])]) #*xwidths[mask[i]] for i, x in enumerate(xdata[mask])])])

    #Get covariance matrix of prediction uncertainties in the signal region
    jac=numdifftools.core.Jacobian(signal_fit_func_array)
    x_signal_cov=np.dot(np.dot(jac(popt),pcov),jac(popt).T)
    print('x_signal_cov: ',x_signal_cov)
    
    # We want to compute the log likelihood ratio between the S+B hypothesis and the B only hypothesis,
    # including the uncertainty in the sideband fit as profiled nuisance parameters.
    # This means that the likelihood for the S+B only hypothesis is given by (theta is the nuisance)
    # P(S,theta)=1/Nobs! (Bpred+S+theta)^Nobs e^{-Bpred-S-theta} 1/(sqrt{2pi}sigma)e^{-theta^2/2sigma^2}
    # The LR we want is is P(Shat,thetahat(Shat))/P(S=0,thetahat(S=0))
    # where thetahat(S) is the value of theta for a given S that maximizes the likelihood, and Shat is the value of S
    # that maximizes the (profiled) likelihood. 
    # For a single bin, Shat=Nobs-Bpred and thetahat(Shat)=0.
    # Also, thetahat(S=0) can be calculated analytically for a single bin.
    # Finally, the Z-score (number of sigmas) is given asymptotically by the formula Z = sqrt(-2LLR)
    
    
    obs=np.sum(ydata[mask_sr]).astype('float64')
    exp=np.sum(ydata_fit[mask_sr]).astype('float64')
    sigma=np.sqrt(x_signal_cov)[0][0]

    
#    pval=cdf_reg(np.sum(ydata[mask_sr]).astype('float64'),np.sum(ydata_fit[mask_sr]).astype('float64'))
#    pval=1-np.exp(pval)
#    print("P-VALUE ",pval)

#    pvaltry=get_p_value(countstofit,binstofit,mask=mask_sr,verbose=True)
#    print("P_VALUE FROM CWOLA CODE ",pvaltry)
    
    if(obs<exp):
        Zvaltryme=0
        pvaltryme=0.5
    else:    
        Zvaltryme=Zscorefunc(obs,exp,sigma)
        pvaltryme = 1-norm.cdf(Zvaltryme)
        
    print("Z SCORE: ",Zvaltryme)
    print("P VALUE: ",pvaltryme)
    
#    pvaluelist.append([minpmdec,maxpmdec,np.sum(ydata[mask_sr]),np.sum(ydata_fit[mask_sr])])
#    pvaluetrylist.append([minpmdec,maxpmdec,pvaltry])
    
    return popt,pcov,ydata_fit,pvaltryme
    