In [None]:
import matplotlib.pylab as plt
import numpy as np

In [None]:
def gen_events(n=1000,cat='sig',slope=3):
    
    if cat=='sig':
        m = np.random.normal(90,3,n)
        v = 7-np.random.exponential(slope,n)

        mask = (v<7)&(v>0)

        mass = m[mask]
        var = v[mask]
        
    else:
        m = 70*np.random.random(n) + 60
        v = np.random.exponential(slope,n)

        mask = (v<7)&(v>0)

        mass = m[mask]
        var = v[mask]
        
    return mass,var


In [None]:
sigmass,sigvar = gen_events(n=100000,cat='sig',slope=slope)
bkgmass,bkgvar = gen_events(n=1000000,cat='bkg',slope=slope)

data = sigmass.tolist() + bkgmass.tolist()
data = np.array(data)

plt.figure(figsize=(8,8))
plt.hist(data,range=(60,120),bins=60);
plt.xlabel(r'Mass (GeV/c$^{2}$)',fontsize=18)
plt.savefig('optimal_case.png')

plt.figure(figsize=(8,8))
plt.hist(sigmass,range=(60,120),bins=60);
plt.xlabel(r'Mass (GeV/c$^{2}$)',fontsize=18)
plt.savefig('optimal_case_sig.png')

plt.figure(figsize=(8,8))
plt.hist(bkgmass,range=(60,120),bins=60);
plt.xlabel(r'Mass (GeV/c$^{2}$)',fontsize=18)
plt.savefig('optimal_case_bkg.png')


sigmass,sigvar = gen_events(n=10,cat='sig',slope=slope)
bkgmass,bkgvar = gen_events(n=100,cat='bkg',slope=slope)

data = sigmass.tolist() + bkgmass.tolist()
data = np.array(data)

plt.figure(figsize=(8,8))
plt.hist(data,range=(60,120),bins=60);
plt.xlabel(r'Mass (GeV/c$^{2}$)',fontsize=18)
plt.savefig('suboptimal_case.png')


for i in range(0,10):
    bkgmass,bkgvar = gen_events(n=100,cat='bkg',slope=slope)

    plt.figure(figsize=(8,8))
    plt.hist(bkgmass,range=(60,120),bins=60);
    plt.xlabel(r'Mass (GeV/c$^{2}$)',fontsize=18)
    plt.savefig(f'suboptimal_case_bkg_{i}.png')


In [None]:
# Generate random numbers

#np.random.seed(0)


for n in [10, 100, 1000, 10000]:
    sig = np.random.normal(90,3,n)
    bkg = 60*np.random.random(30*n) + 60

    data = sig.tolist() + bkg.tolist()
    data = np.array(data)
    
    mask = (bkg>=82)&(bkg<=98)
    
    mask_lo = (bkg>66)&(bkg<82)
    mask_hi = (bkg>98)&(bkg<114)
    
    mask_sidebands = mask_lo | mask_hi
    
    nsig = n
    nbkg = len(bkg[mask])
    
    nbkg_lo = len(bkg[mask_lo])
    nbkg_hi = len(bkg[mask_hi])

    nbkg_side = (nbkg_lo+nbkg_hi)/2.0
    
    fom0 = nsig/np.sqrt(nbkg)
    fom1 = nsig/np.sqrt(nsig + nbkg)
    fom2 = nsig/np.sqrt(nsig + nbkg_side)

    
    print(nsig,nbkg,nbkg_lo,nbkg_hi,nbkg_side,fom0,fom1,fom2)
    
    #print(data)

    plt.figure(figsize=(12,3))

    plt.subplot(1,3,1)
    plt.hist(data,range=(60,120));
    plt.hist(bkg[mask],range=(60,120));
    plt.hist(bkg[mask_sidebands],range=(60,120));
    plt.xlabel(r'Mass (GeV/c$^{2}$)',fontsize=14)
    
    plt.subplot(1,3,2)
    plt.hist(data,range=(60,120),bins=60);
    plt.hist(bkg[mask],range=(60,120),bins=60);
    plt.hist(bkg[mask_sidebands],range=(60,120),bins=60);
    plt.xlabel(r'Mass (GeV/c$^{2}$)',fontsize=14)

    plt.subplot(1,3,3)
    plt.hist(data,range=(60,120),bins=120);
    plt.hist(bkg[mask],range=(60,120),bins=120);
    plt.hist(bkg[mask_sidebands],range=(60,120),bins=120);
    plt.xlabel(r'Mass (GeV/c$^{2}$)',fontsize=14)
    
    plt.tight_layout()
    plt.savefig(f'mass_dist_nsig_{n}_nbkg_{n*30}.png')
    
    if n==10000:

        plt.figure(figsize=(6,6))
        plt.hist(data,range=(60,120),bins=60);
        plt.xlabel(r'Mass (GeV/c$^{2}$)',fontsize=14)
        plt.savefig('sidebands_00.png')

        plt.figure(figsize=(6,6))
        plt.hist(data,range=(60,120),bins=60);
        plt.hist(bkg[mask_sidebands],range=(60,120),bins=60);
        plt.xlabel(r'Mass (GeV/c$^{2}$)',fontsize=14)
        plt.savefig('sidebands_01.png')

        
        plt.figure(figsize=(6,6))
        plt.hist(data,range=(60,120),bins=60);
        plt.hist(bkg[mask_sidebands],range=(60,120),bins=60);
        plt.hist(bkg[mask],range=(60,120),bins=60);
        plt.xlabel(r'Mass (GeV/c$^{2}$)',fontsize=14)
        plt.savefig('sidebands_02.png')


In [None]:
slope = 2.0

#bkg = np.random.exponential(slope,10000)
#sig = 7-np.random.exponential(slope,10000)

#mask_bkg = (bkg<7)&(bkg>0)
#mask_sig = (sig<7)&(sig>0)

#print(len(bkg[mask_bkg]))
#print(len(sig[mask_sig]))

sigmass,sigvar = gen_events(n=10000,cat='sig',slope=slope)
bkgmass,bkgvar = gen_events(n=10000,cat='bkg',slope=slope)

def plot_sig_and_disc(sigmass,sigvar,bkgmass,bkgvar,cutpoint=-1,tag=None):
    f = plt.figure(figsize=(12,4))
    plt.subplot(1,2,1)
    plt.hist(bkgmass,bins=120,range=(60,120),label='Signal');
    plt.hist(sigmass,bins=120,range=(60,120),alpha=0.5,label='Background');
    plt.xlabel(r'Mass (GeV/c$^{2}$)',fontsize=14)
    plt.legend()

    plt.subplot(1,2,2)
    plt.hist(bkgvar,bins=100,range=(0,7),label='Signal');
    plt.hist(sigvar,bins=100,range=(0,7),alpha=0.5,label='Background');
    plt.xlabel(r'Discriminating variable',fontsize=14)
    if cutpoint>=0:
        plt.plot([cutpoint,cutpoint],[0,plt.gca().get_ylim()[1]],'k--')
        plt.arrow(cutpoint, plt.gca().get_ylim()[1]/2, 0.75,0,head_width=15, head_length=0.3, fc='r', ec='r')
    plt.legend()
    plt.tight_layout()
    
    if tag is not None:
        plt.savefig(f'mass_disc_cut{cutpoint:0.1f}_{tag}.png')
        plt.close(f)

    
plot_sig_and_disc(sigmass,sigvar,bkgmass,bkgvar,cutpoint=-1,tag='demonstration')
nbkg_tot = len(bkgmass)
nsig_tot = len(sigmass)

print(nbkg_tot,nsig_tot)


In [None]:
x = []
yb = []
ys = []
for i in np.arange(0,7.0,0.1):
    
    print(i)
    
    nsig = len(sigvar[sigvar>=i])
    nbkg = len(bkgvar[bkgvar>=i])
    #print(nsig,nbkg,nsig_tot,nbkg_tot)
    ys.append(nsig/nsig_tot)
    yb.append(nbkg/nbkg_tot)
    
    #plot_sig_and_disc(sigmass[sigvar>=i],sigvar,bkgmass[bkgvar>=i],bkgvar,cutpoint=i,tag='for_anim')

    x.append(i)
    
plt.figure(figsize=(12,4))
plt.subplot(1,2,1)
plt.plot(x,yb,'o',label='# bkg')
plt.plot(x,ys,'^',label='# sig')
plt.ylabel('Frac. of sig/bkg surviving',fontsize=14)
plt.xlabel('Cut on $>$ disc. variable ',fontsize=14)
plt.legend()

plt.subplot(1,2,2)
plt.plot(yb,ys,'o')
plt.xlabel('Frac. of bkg surviving',fontsize=14)
plt.ylabel('Frac. of sig surviving',fontsize=14)
plt.tight_layout()

plt.savefig(f'ROC_curve_for_slope_{slope:0.2f}.png')

# convert -delay 100 -loop 0 *.jpeg animatedGIF.gif

In [None]:
for n in [10,100,1000,10000]:
    nsig_tot = n
    nbkg_tot = 7*nsig_tot
    
    #sig = np.random.normal(90,3,nsig_tot)
    #bkg = 70*np.random.random(30*nsig_tot) + 60
    
    sigmass,sigvar = gen_events(n=nsig_tot,cat='sig',slope=slope)
    bkgmass,bkgvar = gen_events(n=30*nsig_tot,cat='bkg',slope=slope)
    
    foms = []
    maxfom = 0
    maxcut = 0
    for xcut,nbcut,nscut in zip(x,yb,ys):
        
        nsig = nsig_tot*nscut
        #nbkg = nbkg_tot*nbcut
        nbkg = len(bkgmass[(bkgmass>81)&(bkgmass<99)])*nbcut
        fom = nsig/np.sqrt(nsig + nbkg)

        foms.append(fom)
        if maxfom<fom:
            maxfom = fom
            maxcut = xcut
        
    plt.figure(figsize=(15,4))
    plt.subplot(1,4,1)
    plt.plot(x,foms,'o')
    plt.plot(x,foms,'o')
    plt.xlabel('Cut on $>$ disc. variable ',fontsize=14)
    plt.ylabel('Significance',fontsize=14)

    data = sigmass.tolist() + bkgmass.tolist()
    plt.subplot(1,4,2)
    plt.hist(data,range=(60,120),bins=60,label="No cut",color='darkviolet')
    plt.xlabel(r'Mass (GeV/c$^{2}$)',fontsize=14)
    plt.legend(fontsize=14)


    data = sigmass[sigvar>maxcut].tolist() + bkgmass[bkgvar>maxcut].tolist()
    plt.subplot(1,4,3)
    plt.hist(data,range=(60,120),bins=60,label="cut > 4.5",color='darkviolet')
    plt.xlabel(r'Mass (GeV/c$^{2}$)',fontsize=14)
    plt.legend(fontsize=14)
    
    data = sigmass[sigvar>6.5].tolist() + bkgmass[bkgvar>6.5].tolist()
    plt.subplot(1,4,4)
    plt.hist(data,range=(60,120),bins=60,label="cut > 6.5",color='darkviolet')
    plt.xlabel(r'Mass (GeV/c$^{2}$)',fontsize=14)
    plt.legend(fontsize=14)
    
    plt.tight_layout()

    plt.savefig(f'ROC_curve_for_slope_{slope:0.2f}_nsig_{nsig_tot}_nbkg_{nsig_tot*30}.png')

    
    print(maxfom,maxcut)

    
        
        