In [None]:
import os
import time
import numpy as np

from dask import dataframe as dd
import dask_histogram

import matplotlib.pyplot as plt
import matplotlib.patches as patches
from matplotlib.colors import LogNorm
from copy import copy

from iminuit import Minuit

In [None]:
filepath = "/DATA_MASTER_PATH/23_hike_pinunu-background/2309_zoptical-zanalyze_final_vars_SHORTBL/pred2_norm_conv1plus_withgencls_FINAL/"

def filesel(s):
    sel = True
    return sel

files_2pi = [s for s in os.listdir(filepath) if ((os.path.isfile(os.path.join(filepath, s))) & filesel(s) & ("2p0_mal" in s))]
files_sig = [s for s in os.listdir(filepath) if ((os.path.isfile(os.path.join(filepath, s))) & filesel(s) & ("sig_mal" in s))]
files_lam = [s for s in os.listdir(filepath) if ((os.path.isfile(os.path.join(filepath, s))) & filesel(s) & ("lambda" in s))]

In [None]:
def histsum(files):

    n_decays = 0

    for ifile, file, in enumerate(files):
            
        n_decays_0 = n_decays
        for i in range(1, 10):
            if "_%dM_"%(i) in file:
                n_decays += i*1e6
            if "_%dM_"%(10*i) in file:
                n_decays += i*10e6
            if "_%dM_"%(100*i) in file:
                n_decays += i*100e6
            if "_%dG_"%(i) in file:
                n_decays += i*1e9
            if "_%dG_"%(10*i) in file:
                n_decays += i*10e9
        print("opened %d/%d, with %d new decays" % (ifile+1, len(files), n_decays-n_decays_0))

        if ifile==0:
            #preds = pd.read_csv(filepath+file)
            preds = dd.read_csv(filepath+file)

        else:
            #preds = pd.concat((preds, pd.read_csv(filepath+file)))
            preds0 = dd.read_csv(filepath+file)
            preds = dd.multi.concat([preds, preds0])

    return preds, n_decays
    
preds_sig, n_decays_sig = histsum(files_sig)
preds_2pi, n_decays_2pi = histsum(files_2pi)
preds_lam, n_decays_lam = histsum(files_lam)

# working with this dataframe from now on
preds_tot = preds_sig.assign(cls=1)
preds_tot = dd.multi.concat([preds_tot, preds_2pi.assign(cls=0)])
preds_tot = dd.multi.concat([preds_tot, preds_lam.assign(cls=5)])
n_decays_tot = {
    1 : n_decays_sig,
    0 : n_decays_2pi,
    5 : n_decays_lam,
}

In [None]:
preds_tot.describe()

In [None]:
n_decays_tot

In [None]:
# all general settings here

bnorm = False

clslabels = {
    1 : "k-pinunu",
    0 : "k-2pi",
    5 : "lambda-pin",
}

clscolors = {
    1 : "Blues",
    0 : "Oranges",
    5 : "Greens",
}

normfact = {
    1 : 37800/n_decays_tot[1], #15483/n_decays_tot[1],
    0 : 1089e9/n_decays_tot[0], #446e9/n_decays_tot[0],
    5 : 20e13/n_decays_tot[5], #8.22e13/n_decays_tot[5],
}

shift_fv = 0 #150
box_edges = (130-shift_fv, 190-shift_fv, 0.14, 10) #(280-shift_fv, 350-shift_fv, 0.14, 10)

sig_sel = (0.5065, 0.5100)

In [None]:
# add classifier predictions to main dataframe (pred0/1_bool)
preds_tot = preds_tot.assign(pred0_bool=(preds_tot["pred0"]>sig_sel[0]))
preds_tot = preds_tot.assign(pred1_bool=(preds_tot["pred1"]>sig_sel[1]))

In [None]:
# add squared weights to main dataframe (Wsq)
preds_tot = preds_tot.assign(Wsq=(preds_tot["W"]*preds_tot["W"]))

In [None]:
# create sub-dataframe with events in fiducial box only
preds_tot_fv = preds_tot\
    .query(f'Vertex_xRec_Z > {box_edges[0]}').query(f'Vertex_xRec_Z < {box_edges[1]}')\
    .query(f'Vertex_pRecPi_T > {box_edges[2]}').query(f'Vertex_pRecPi_T < {box_edges[3]}')

---

In [None]:
edges = (0.490, 0.510, 0.485, 0.545)  # BDT-BDT manually selected plot edges

In [None]:
time0 = time.time()  # BDT-BDT hist.

In [None]:
# BDT-BDT hist.
# no real-life statistics weight here

hists = {}

hists_fv = {}

for cls in (1, 0, 5):
    
    # --> whole z-pt space
    preds = preds_tot[preds_tot["cls"]==cls]
    hist = dask_histogram.histogram2d(
        x=preds["pred0"], y=preds["pred1"], weights=preds["W"],
        bins=(np.linspace(edges[0], edges[1], 200), np.linspace(edges[2], edges[3], 200)),
    )
    hists.update({cls : np.array([x.compute() for x in hist])})

    # --> fiducial box only
    preds = preds_tot_fv[preds_tot_fv["cls"]==cls]
    hist = dask_histogram.histogram2d(
        x=preds["pred0"], y=preds["pred1"], weights=preds["W"],
        bins=(np.linspace(edges[0], edges[1], 200), np.linspace(edges[2], edges[3], 200)),
    )
    hists_fv.update({cls : np.array([x.compute() for x in hist])})

In [None]:
time1 = time.time()  # BDT-BDT hist.
print("execution time: %f" % (time1-time0))

In [None]:
extent = [  # another expression of the plot edges (obtained from the histograms)
    min(hists[1][1]), max(hists[1][1]), min(hists[1][2]), max(hists[1][2])
]

In [None]:
# cut optimisation
# real-life statistics weight is applied here

sig_sel_old = sig_sel

# function to minimise
def scorer(cut0, cut1):
    p = {}
    
    for cls in (1, 0, 5):
        h = hists[cls]
        z = h[0]
        x, y = h[1][:-1]+0.5*(h[1][1]-h[1][0]), h[2][:-1]+0.5*(h[2][1]-h[2][0])
        bool_x, bool_y = x>cut0, y>cut1
        p.update({cls : np.sum(z[np.ix_(bool_x, bool_y)]) * normfact[cls]})
        
    return (-p[1] / np.sqrt(sum(p.values()))) #* (p[1]/60)**0.5

#def scorer(cut1_0, cut1_1):
#    p = {}
#    
#    for cls in (1, 0, 5):
#        h = hists[cls]
#        z = h[0]
#        x, y = h[1][:-1]+0.5*(h[1][1]-h[1][0]), h[2][:-1]+0.5*(h[2][1]-h[2][0])
#        xx, yy = np.meshgrid(x, y)
#        bool_2d = \
#            ((xx>0.5025) & (xx<=0.5030) & (yy>cut1_0)) |\
#            ((xx>=0.5030) & (yy>cut1_1))
#        p.update({cls : np.sum(z[bool_2d]) * normfact[cls]})
#        
#    return -p[1] / np.sqrt(sum(p.values()))

#def scorer(cutdiag_m, cutdiag_q):
#    p = {}
#    
#    for cls in (1, 0, 5):
#        h = hists[cls]
#        z = h[0]
#        x, y = h[1][:-1]+0.5*(h[1][1]-h[1][0]), h[2][:-1]+0.5*(h[2][1]-h[2][0])
#        xx, yy = np.meshgrid(x, y)
#        bool_2d = (xx>cut0) & (yy>cut1) & (yy>(cutdiag_m*xx+cutdiag_q))
#        p.update({cls : np.sum(z[bool_2d]) * normfact[cls]})
#        
#    return -p[1] / np.sqrt(sum(p.values()))

# actual minimisation
m = Minuit(
    scorer,
    cut0=sig_sel_old[0], cut1=sig_sel_old[1],
)
m.fixed['cut0'] = False
m.fixed['cut1'] = False
m.migrad()
sig_sel_new = (
    m.values.to_dict()["cut0"],
    m.values.to_dict()["cut1"]
)

#m = Minuit(
#    scorer,
#    cut1_0=sig_sel_old[1]+1e-4, cut1_1=sig_sel_old[1]-1e-4,
#)
#m.fixed['cut1_0'] = False
#m.fixed['cut1_1'] = False
#m.migrad()
#sig_sel_new = (
#    m.values.to_dict()["cut1_0"],
#    m.values.to_dict()["cut1_1"],
#)

#m = Minuit(
#    scorer,
#    cut0=sig_sel_old[0]-0.002, cut1=sig_sel_old[1]-0.01,
#    cutdiag_m=-1/0.2, cutdiag_q=250*0.01
#)
#m.errors = [0.001, 0.01, 0.1, 0.1]
#m.fixed['cut0'] = True
#m.fixed['cut1'] = True
#m.fixed['cutdiag_m'] = False
#m.fixed['cutdiag_q'] = False
#m.limits = [None, None, (None, 0), (0.1, None)]
#m.migrad()
#sig_sel_new = (
#    m.values.to_dict()["cut0"], m.values.to_dict()["cut1"],
#    m.values.to_dict()["cutdiag_m"], m.values.to_dict()["cutdiag_q"],
#)

print("optimal cut values found:")
print(sig_sel_new)

if True:
    print("replacing the old values:")
    print(sig_sel_old)
    sig_sel = sig_sel_new
    preds_tot = preds_tot.assign(pred0_bool=(preds_tot["pred0"]>sig_sel[0]))
    preds_tot = preds_tot.assign(pred1_bool=(preds_tot["pred1"]>sig_sel[1]))
    preds_tot_fv = preds_tot_fv.assign(pred0_bool=(preds_tot_fv["pred0"]>sig_sel[0]))
    preds_tot_fv = preds_tot_fv.assign(pred1_bool=(preds_tot_fv["pred1"]>sig_sel[1]))

In [None]:
# BDT-BDT actual plot (whole z-pt plot) - after optimisation of the cuts
# no real-life statistics weight here

if True:
    fig, axs = plt.subplots(figsize=(8, 6), nrows=2, ncols=2, sharex=True, sharey=True)
    log = LogNorm() if False else None

    # all 3 classes superimposed on one another
    ax = axs[0, 0]
    ax.set_xlim((extent[0], extent[1]))
    ax.set_ylim((extent[2], extent[3]))
    for cls in (5, 0, 1):
        data = hists[cls][0].T
        hist = np.ma.masked_where(data <= 0.0*np.max(data), data) / (np.sum(data) if bnorm else 1)
        alpha = 1 if cls==5 else (0.6 if cls==1 else 0.7)
        ax.matshow(hist,
                   cmap=clscolors[cls], alpha=alpha,
                   norm=None,
                   extent=extent, origin="lower", aspect="auto",
                  )
        ax.plot((-1, -1), color=clscolors[cls][:-1].lower(), lw=0, marker="o", label="true %s" % clslabels[cls])
    ax.grid(True)
    ax.set_xlim(edges[0:2])
    ax.set_ylim(edges[2:4])

    # class 5
    ax = axs[0, 1]
    ax.set_xlim((extent[0], extent[1]))
    ax.set_ylim((extent[2], extent[3]))
    for cls in (5, 5):
        data = hists[cls][0].T
        hist = np.ma.masked_where(data <= 0.0*np.max(data), data) / (np.sum(data) if bnorm else 1)
        ax.matshow(hist,
                   cmap=clscolors[cls], alpha=1,
                   norm=log,
                   extent=extent, origin="lower", aspect="auto",
                  )
    ax.grid(True)
    ax.set_xlim(edges[0:2])
    ax.set_ylim(edges[2:4])

    # class 0
    ax = axs[1, 0]
    ax.set_xlim((extent[0], extent[1]))
    ax.set_ylim((extent[2], extent[3]))
    for cls in (0, 0):
        data = hists[cls][0].T
        hist = np.ma.masked_where(data <= 0.0*np.max(data), data) / (np.sum(data) if bnorm else 1)
        ax.matshow(hist,
                   cmap=clscolors[cls], alpha=1,
                   norm=log,
                   extent=extent, origin="lower", aspect="auto",
                  )
    ax.grid(True)
    ax.set_xlim(edges[0:2])
    ax.set_ylim(edges[2:4])

    # class 1
    ax = axs[1, 1]
    ax.set_xlim((extent[0], extent[1]))
    ax.set_ylim((extent[2], extent[3]))
    for cls in (1, 1):
        data = hists[cls][0].T
        hist = np.ma.masked_where(data <= 0.0*np.max(data), data) / (np.sum(data) if bnorm else 1)
        ax.matshow(hist,
                   cmap=clscolors[cls], alpha=1,
                   norm=log,
                   extent=extent, origin="lower", aspect="auto",
                  )
    ax.grid(True)
    ax.set_xlim(edges[0:2])
    ax.set_ylim(edges[2:4])

    # lines in all plots
    for i, ax0 in enumerate(axs):
        for j, ax in enumerate(ax0):
            ax.axvline(0.5, color="k", lw=1)
            ax.axhline(0.5, color="k", lw=1)
            ax.plot(
                (sig_sel[0], sig_sel[0], edges[1]),
                (edges[3], sig_sel[1], sig_sel[1]),
                color="red", lw=1, label="signal selection" if ((i==0)&(j==0)) else None
            )

    fig.legend(loc="lower left")
    fig.supxlabel("output of the pinunu-2pi classifier")
    fig.supylabel("output of the pinunu-lambda classifier")
    fig.tight_layout()

In [None]:
# BDT-BDT actual plot (fiducial box) - after optimisation of the cuts
# no real-life statistics weight here

if True:
    fig, axs = plt.subplots(figsize=(8, 6), nrows=2, ncols=2, sharex=True, sharey=True)
    log = LogNorm() if False else None

    # all 3 classes superimposed on one another
    ax = axs[0, 0]
    ax.set_xlim((extent[0], extent[1]))
    ax.set_ylim((extent[2], extent[3]))
    for cls in (5, 0, 1):
        data = hists_fv[cls][0].T
        hist = np.ma.masked_where(data <= 0.0*np.max(data), data) / (np.sum(data) if bnorm else 1)
        alpha = 1 if cls==5 else (0.6 if cls==1 else 0.7)
        ax.matshow(hist,
                   cmap=clscolors[cls], alpha=alpha,
                   norm=None,
                   extent=extent, origin="lower", aspect="auto",
                  )
        ax.plot((-1, -1), color=clscolors[cls][:-1].lower(), lw=0, marker="o", label="true %s" % clslabels[cls])
    ax.grid(True)
    ax.set_xlim(edges[0:2])
    ax.set_ylim(edges[2:4])

    # class 5
    ax = axs[0, 1]
    ax.set_xlim((extent[0], extent[1]))
    ax.set_ylim((extent[2], extent[3]))
    for cls in (5, 5):
        data = hists_fv[cls][0].T
        hist = np.ma.masked_where(data <= 0.0*np.max(data), data) / (np.sum(data) if bnorm else 1)
        ax.matshow(hist,
                   cmap=clscolors[cls], alpha=1,
                   norm=log,
                   extent=extent, origin="lower", aspect="auto",
                  )
    ax.grid(True)
    ax.set_xlim(edges[0:2])
    ax.set_ylim(edges[2:4])

    # class 0
    ax = axs[1, 0]
    ax.set_xlim((extent[0], extent[1]))
    ax.set_ylim((extent[2], extent[3]))
    for cls in (0, 0):
        data = hists_fv[cls][0].T
        hist = np.ma.masked_where(data <= 0.0*np.max(data), data) / (np.sum(data) if bnorm else 1)
        ax.matshow(hist,
                   cmap=clscolors[cls], alpha=1,
                   norm=log,
                   extent=extent, origin="lower", aspect="auto",
                  )
    ax.grid(True)
    ax.set_xlim(edges[0:2])
    ax.set_ylim(edges[2:4])

    # class 1
    ax = axs[1, 1]
    ax.set_xlim((extent[0], extent[1]))
    ax.set_ylim((extent[2], extent[3]))
    for cls in (1, 1):
        data = hists_fv[cls][0].T
        hist = np.ma.masked_where(data <= 0.0*np.max(data), data) / (np.sum(data) if bnorm else 1)
        ax.matshow(hist,
                   cmap=clscolors[cls], alpha=1,
                   norm=log,
                   extent=extent, origin="lower", aspect="auto",
                  )
    ax.grid(True)
    ax.set_xlim(edges[0:2])
    ax.set_ylim(edges[2:4])

    # lines in all plots
    for i, ax0 in enumerate(axs):
        for j, ax in enumerate(ax0):
            ax.axvline(0.5, color="k", lw=1)
            ax.axhline(0.5, color="k", lw=1)
            ax.plot(
                (sig_sel[0], sig_sel[0], edges[1]),
                (edges[3], sig_sel[1], sig_sel[1]),
                color="red", lw=1, label="signal selection" if ((i==0)&(j==0)) else None
            )

    fig.legend(loc="lower left")
    fig.supxlabel("output of the pinunu-2pi classifier")
    fig.supylabel("output of the pinunu-lambda classifier")
    fig.tight_layout()

In [None]:
classnames = {
    995 : "lambda-pin",
    990 : "k-2pi",
    1 : "k-pinunu",
    0 : "background",
}

In [None]:
# BDT-BDT actual plot - comparison between classes
# real-life statistics weight is applied here

blog = True

for iclass in (1, 0, 5):
    
    fig, axs = plt.subplots(ncols=2, figsize=(12, 4))

    ax = axs[0]
    
    cmap = plt.get_cmap("jet").copy()
    cmap.set_bad('white')
    
    ax.set_title("all")
    ax.set_xlim((extent[0], extent[1]))
    ax.set_ylim((extent[2], extent[3]))

    data = hists[iclass][0].T
    data = data*normfact[iclass]
    hist = np.ma.masked_where(data <1e-10, data) / (np.sum(data) if bnorm else 1)
    cax = ax.matshow(hist, origin="lower", extent=extent, aspect="auto", norm=LogNorm(vmin=np.min(hist), vmax=np.max(hist), clip=True) if blog else None, cmap=cmap)
    plt.subplot(121)
    plt.colorbar(cax)
    
    ax.axhline(y=sig_sel[1], color="k", lw=2)
    ax.axvline(x=sig_sel[0], color="k", lw=2)

    ax = axs[1]
    
    cmap = plt.get_cmap("jet").copy()
    cmap.set_bad('white')
    
    ax.set_title("in fiducial box")
    ax.set_xlim((extent[0], extent[1]))
    ax.set_ylim((extent[2], extent[3]))

    data = hists_fv[iclass][0].T
    data = data*normfact[iclass]
    hist = np.ma.masked_where(data < 1e-10, data) / (np.sum(data) if bnorm else 1)
    cax = ax.matshow(hist, origin="lower", extent=extent, aspect="auto", norm=LogNorm(vmin=np.min(hist), vmax=np.max(hist), clip=True) if blog else None, cmap=cmap)
    plt.subplot(122)
    plt.colorbar(cax)
    
    ax.axhline(y=sig_sel[1], color="k", lw=2)
    ax.axvline(x=sig_sel[0], color="k", lw=2)

    fig.suptitle(classnames[iclass if iclass==1 else (990+iclass)])
    fig.tight_layout()

---

In [None]:
time0 = time.time()  # z-pt pop. count

In [None]:
# z-pt pop. count
# real-life statistics weight is applied here

preds = preds_tot
n_tp = preds[(preds["cls"] == 1) & ((preds["pred0_bool"]) & (preds["pred1_bool"]))]["W"].sum().compute()*normfact[1]
#n_fn = preds[(preds["cls"] == 1) & ((~preds["pred0_bool"]) | (~preds["pred1_bool"]))]["W"].sum().compute()*normfact[1]
n_fp_2pi = preds[(preds["cls"] == 0) & ((preds["pred0_bool"]) & (preds["pred1_bool"]))]["W"].sum().compute()*normfact[0]
#n_tn_2pi = preds[(preds["cls"] == 0) & ((~preds["pred0_bool"]) | (~preds["pred1_bool"]))]["W"].sum().compute()*normfact[0]
n_fp_lambda = preds[(preds["cls"] == 5) & ((preds["pred0_bool"]) & (preds["pred1_bool"]))]["W"].sum().compute()*normfact[5]
#n_tn_lambda = preds[(preds["cls"] == 5) & ((~preds["pred0_bool"]) | (~preds["pred1_bool"]))]["W"].sum().compute()*normfact[5]
n_fp = n_fp_2pi + n_fp_lambda
#n_tn = n_tn_2pi + n_tn_lambda

e_n_tp = np.sqrt( preds[(preds["cls"] == 1) & ((preds["pred0_bool"]) & (preds["pred1_bool"]))]["Wsq"].sum().compute()*normfact[1]**2 )
#e_n_fn = np.sqrt( preds[(preds["cls"] == 1) & ((~preds["pred0_bool"]) | (~preds["pred1_bool"]))]["Wsq"].sum().compute()*normfact[1]**2 )
e_n_fp_2pi = np.sqrt( preds[(preds["cls"] == 0) & ((preds["pred0_bool"]) & (preds["pred1_bool"]))]["Wsq"].sum().compute()*normfact[0]**2 )
#e_n_tn_2pi = np.sqrt( preds[(preds["cls"] == 0) & ((~preds["pred0_bool"]) | (~preds["pred1_bool"]))]["Wsq"].sum().compute()*normfact[0]**2 )
e_n_fp_lambda = np.sqrt( preds[(preds["cls"] == 5) & ((preds["pred0_bool"]) & (preds["pred1_bool"]))]["Wsq"].sum().compute()*normfact[5]**2 )
#e_n_tn_lambda = np.sqrt( preds[(preds["cls"] == 5) & ((~preds["pred0_bool"]) | (~preds["pred1_bool"]))]["Wsq"].sum().compute()*normfact[5]**2 )
e_n_fp = np.sqrt( e_n_fp_2pi**2 + e_n_fp_lambda**2 )
#e_n_tn = np.sqrt( n_tn_2pi**2 + n_tn_lambda**2 )

preds = preds_tot_fv
n_tp_fv = preds[(preds["cls"] == 1) & ((preds["pred0_bool"]) & (preds["pred1_bool"]))]["W"].sum().compute()*normfact[1]
#n_fn_fv = preds[(preds["cls"] == 1) & ((~preds["pred0_bool"]) | (~preds["pred1_bool"]))]["W"].sum().compute()*normfact[1]
n_fp_fv_2pi = preds[(preds["cls"] == 0) & ((preds["pred0_bool"]) & (preds["pred1_bool"]))]["W"].sum().compute()*normfact[0]
#n_tn_fv_2pi = preds[(preds["cls"] == 0) & ((~preds["pred0_bool"]) | (~preds["pred1_bool"]))]["W"].sum().compute()*normfact[0]
n_fp_fv_lambda = preds[(preds["cls"] == 5) & ((preds["pred0_bool"]) & (preds["pred1_bool"]))]["W"].sum().compute()*normfact[5]
#n_tn_fv_lambda = preds[(preds["cls"] == 5) & ((~preds["pred0_bool"]) | (~preds["pred1_bool"]))]["W"].sum().compute()*normfact[5]
n_fp_fv = n_fp_fv_2pi + n_fp_fv_lambda
#n_tn_fv = n_tn_fv_2pi + n_tn_fv_lambda

e_n_tp_fv = np.sqrt( preds[(preds["cls"] == 1) & ((preds["pred0_bool"]) & (preds["pred1_bool"]))]["Wsq"].sum().compute()*normfact[1]**2 )
#e_n_fn_fv = np.sqrt( preds[(preds["cls"] == 1) & ((~preds["pred0_bool"]) | (~preds["pred1_bool"]))]["Wsq"].sum().compute()*normfact[1]**2 )
e_n_fp_fv_2pi = np.sqrt( preds[(preds["cls"] == 0) & ((preds["pred0_bool"]) & (preds["pred1_bool"]))]["Wsq"].sum().compute()*normfact[0]**2 )
#e_n_tn_fv_2pi = np.sqrt( preds[(preds["cls"] == 0) & ((~preds["pred0_bool"]) | (~preds["pred1_bool"]))]["Wsq"].sum().compute()*normfact[0]**2 )
e_n_fp_fv_lambda = np.sqrt( preds[(preds["cls"] == 5) & ((preds["pred0_bool"]) & (preds["pred1_bool"]))]["Wsq"].sum().compute()*normfact[5]**2 )
#e_n_tn_fv_lambda = np.sqrt( preds[(preds["cls"] == 5) & ((~preds["pred0_bool"]) | (~preds["pred1_bool"]))]["Wsq"].sum().compute()*normfact[5]**2 )
e_n_fp_fv = np.sqrt( e_n_fp_fv_2pi**2 + e_n_fp_fv_lambda**2 )
#e_n_tn_fv = np.sqrt( n_tn_2pi**2 + n_tn_lambda**2 )

In [None]:
time1 = time.time()  # z-pt pop. count
print("execution time: %f" % (time1-time0))

In [None]:
print("in whole phase space:")
print("--> true signal: \t\t%.5f (%.5f)" % (n_tp, e_n_tp))
print("--> false signal (total): \t%.5f (%.5f)" % (n_fp, e_n_fp))
print("--> false signal (2pi): \t%.5f (%.5f)" % (n_fp_2pi, e_n_fp_2pi))
print("--> false signal (lambda): \t%.5f (%.5f)" % (n_fp_lambda, e_n_fp_lambda))

print("\nin fiducial box:")
print("--> true signal: \t\t%.5f (%.5f)" % (n_tp_fv, e_n_tp_fv))
print("--> false signal (total): \t%.5f (%.5f)" % (n_fp_fv, e_n_fp_fv))
print("--> false signal (2pi): \t%.5f (%.5f)" % (n_fp_fv_2pi, e_n_fp_fv_2pi))
print("--> false signal (lambda): \t%.5f (%.5f)" % (n_fp_fv_lambda, e_n_fp_fv_lambda))

In [None]:
def s_and_b_nofv(cut0, cut1):
    p = {}
    
    for cls in (1, 0, 5):
        h = hists[cls]
        z = h[0]
        x, y = h[1][:-1]+0.5*(h[1][1]-h[1][0]), h[2][:-1]+0.5*(h[2][1]-h[2][0])
        bool_x, bool_y = x>cut0, y>cut1
        p.update({cls : np.sum(z[np.ix_(bool_x, bool_y)]) * normfact[cls]})
        
    return p[1], p[0], p[5]

def s_and_b(cut0, cut1):
    p = {}
    
    for cls in (1, 0, 5):
        h = hists_fv[cls]
        z = h[0]
        x, y = h[1][:-1]+0.5*(h[1][1]-h[1][0]), h[2][:-1]+0.5*(h[2][1]-h[2][0])
        bool_x, bool_y = x>cut0, y>cut1
        p.update({cls : np.sum(z[np.ix_(bool_x, bool_y)]) * normfact[cls]})
        
    return p[1], p[0], p[5]

In [None]:
# count S/B and S/sqrt(S+B) for different cut combinations, without fiducial box
# real-life statistics weight is applied here

cuts0 = np.linspace(0.500, 0.508, 50)
cuts1 = np.linspace(0.500, 0.516, 50)
s_ref = 15  # 33 (15) with the short (long) case
b_2pi_ref = 70  # 80 (70) with the short (long) case
for icut, cut0 in enumerate(cuts0):
    for jcut, cut1 in enumerate(cuts1):
        s, b_2pi, b_lambda = s_and_b_nofv(cut0, cut1)
        arrow = "<" if (abs(s-s_ref)<5) else ""  # arrows indicate that S or B are close to the LoI ones
        arrow += "<" if (abs(b_2pi-b_2pi_ref)<5) else ""
        asterisk = "*" if (abs(cut0-sig_sel[0])<0.0002) else ""  # asterisks indicate that one of the cut is close to the Minuit-optimised one
        asterisk += "*" if (abs(cut1-sig_sel[1])<0.0002) else ""
        rat = s / (b_2pi+b_lambda)
        rat2 = s / (b_2pi+b_lambda*0.1)
        sens = s / np.sqrt(s+b_2pi+b_lambda)
        sens2 = s / np.sqrt(s+b_2pi+b_lambda*0.1)
        print(
            "%.5f %.5f %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f %s%s%s" % \
            (cut0, cut1, s, b_2pi, b_lambda, rat, rat2, sens, sens2, asterisk, "" if len(asterisk)>0 else "", arrow)
        )
        
s, b_2pi, b_lambda = s_and_b_nofv(-10, -10)
arrow = "<" if (abs(s-s_ref)<5) else ""  # arrows indicate that S or B are close to the LoI ones
arrow += "<" if (abs(b_2pi-b_2pi_ref)<5) else ""
asterisk = "*" if (abs(cut0-sig_sel[0])<0.0002) else ""  # asterisks indicate that one of the cut is close to the Minuit-optimised one
asterisk += "*" if (abs(cut1-sig_sel[1])<0.0002) else ""
rat = s / (b_2pi+b_lambda)
rat2 = s / (b_2pi+b_lambda*0.1)
sens = s / np.sqrt(s+b_2pi+b_lambda)
sens2 = s / np.sqrt(s+b_2pi+b_lambda*0.1)
print(
    "%.5f %.5f %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f %s%s%s" % \
    (-10, -10, s, b_2pi, b_lambda, rat, rat2, sens, sens2, asterisk, "" if len(asterisk)>0 else "", arrow)
)

In [None]:
# count S/B and S/sqrt(S+B) for different cut combinations
# real-life statistics weight is applied here

cuts0 = np.linspace(0.500, 0.508, 50)
cuts1 = np.linspace(0.500, 0.516, 50)
s_ref = 15  # 33 (15) with the short (long) case
b_2pi_ref = 70  # 80 (70) with the short (long) case
for icut, cut0 in enumerate(cuts0):
    for jcut, cut1 in enumerate(cuts1):
        s, b_2pi, b_lambda = s_and_b(cut0, cut1)
        arrow = "<" if (abs(s-s_ref)<5) else ""  # arrows indicate that S or B are close to the LoI ones
        arrow += "<" if (abs(b_2pi-b_2pi_ref)<5) else ""
        asterisk = "*" if (abs(cut0-sig_sel[0])<0.0002) else ""  # asterisks indicate that one of the cut is close to the Minuit-optimised one
        asterisk += "*" if (abs(cut1-sig_sel[1])<0.0002) else ""
        rat = s / (b_2pi+b_lambda)
        rat2 = s / (b_2pi+b_lambda*0.1)
        sens = s / np.sqrt(s+b_2pi+b_lambda)
        sens2 = s / np.sqrt(s+b_2pi+b_lambda*0.1)
        print(
            "%.5f %.5f %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f %s%s%s" % \
            (cut0, cut1, s, b_2pi, b_lambda, rat, rat2, sens, sens2, asterisk, "" if len(asterisk)>0 else "", arrow)
        )
        
s, b_2pi, b_lambda = s_and_b(-10, -10)
arrow = "<" if (abs(s-s_ref)<5) else ""  # arrows indicate that S or B are close to the LoI ones
arrow += "<" if (abs(b_2pi-b_2pi_ref)<5) else ""
asterisk = "*" if (abs(cut0-sig_sel[0])<0.0002) else ""  # asterisks indicate that one of the cut is close to the Minuit-optimised one
asterisk += "*" if (abs(cut1-sig_sel[1])<0.0002) else ""
rat = s / (b_2pi+b_lambda)
rat2 = s / (b_2pi+b_lambda*0.1)
sens = s / np.sqrt(s+b_2pi+b_lambda)
sens2 = s / np.sqrt(s+b_2pi+b_lambda*0.1)
print(
    "%.5f %.5f %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f %s%s%s" % \
    (-10, -10, s, b_2pi, b_lambda, rat, rat2, sens, sens2, asterisk, "" if len(asterisk)>0 else "", arrow)
)

---

In [None]:
# settings of the z-pt plots
bins_zpt = (200, 100)
edges_zpt = ((0-shift_fv, 250-shift_fv), (0, 0.4)) 

In [None]:
time0 = time.time()  # z-pt hist.

In [None]:
# z-pt hist.
# no real-life statistics weight here

hists_zpt = {}

# true label: signal
preds = preds_tot[preds_tot["cls"]==1]
hists_zpt.update({1 : {}})
boolsig = (preds["pred0_bool"]) & (preds["pred1_bool"])

# --> pred. label: signal
hist = dask_histogram.histogram2d(
    x=preds[boolsig]["Vertex_xRec_Z"], y=preds[boolsig]["Vertex_pRecPi_T"], weights=preds[boolsig]["W"],
    bins=(np.linspace(edges_zpt[0][0], edges_zpt[0][1], bins_zpt[0]), np.linspace(edges_zpt[1][0], edges_zpt[1][1], bins_zpt[1])),
)
hists_zpt[1].update({1 : np.array([x.compute() for x in hist])})

# --> pred. label: background (any)
hist = dask_histogram.histogram2d(
    x=preds[~boolsig]["Vertex_xRec_Z"], y=preds[~boolsig]["Vertex_pRecPi_T"], weights=preds[~boolsig]["W"],
    bins=(np.linspace(edges_zpt[0][0], edges_zpt[0][1], bins_zpt[0]), np.linspace(edges_zpt[1][0], edges_zpt[1][1], bins_zpt[1])),
)
hists_zpt[1].update({0 : np.array([x.compute() for x in hist])})

# true label: background (2pi - label 990)
preds = preds_tot[preds_tot["cls"]==0]
hists_zpt.update({990 : {}})
boolsig = (preds["pred0_bool"]) & (preds["pred1_bool"])  

# --> pred. label: signal
hist = dask_histogram.histogram2d(
    x=preds[boolsig]["Vertex_xRec_Z"], y=preds[boolsig]["Vertex_pRecPi_T"], weights=preds[boolsig]["W"],
    bins=(np.linspace(edges_zpt[0][0], edges_zpt[0][1], bins_zpt[0]), np.linspace(edges_zpt[1][0], edges_zpt[1][1], bins_zpt[1])),
)
hists_zpt[990].update({1 : np.array([x.compute() for x in hist])})

# --> pred. label: background (any)
hist = dask_histogram.histogram2d(
    x=preds[~boolsig]["Vertex_xRec_Z"], y=preds[~boolsig]["Vertex_pRecPi_T"], weights=preds[~boolsig]["W"],
    bins=(np.linspace(edges_zpt[0][0], edges_zpt[0][1], bins_zpt[0]), np.linspace(edges_zpt[1][0], edges_zpt[1][1], bins_zpt[1])),
)
hists_zpt[990].update({0 : np.array([x.compute() for x in hist])})

# true label: background (lambda - label 995)
preds = preds_tot[preds_tot["cls"]==5]
hists_zpt.update({995 : {}})
boolsig = (preds["pred0_bool"]) & (preds["pred1_bool"])  

# --> pred. label: signal
hist = dask_histogram.histogram2d(
    x=preds[boolsig]["Vertex_xRec_Z"], y=preds[boolsig]["Vertex_pRecPi_T"], weights=preds[boolsig]["W"],
    bins=(np.linspace(edges_zpt[0][0], edges_zpt[0][1], bins_zpt[0]), np.linspace(edges_zpt[1][0], edges_zpt[1][1], bins_zpt[1])),
)
hists_zpt[995].update({1 : np.array([x.compute() for x in hist])})

# --> pred. label: background (any)
hist = dask_histogram.histogram2d(
    x=preds[~boolsig]["Vertex_xRec_Z"], y=preds[~boolsig]["Vertex_pRecPi_T"], weights=preds[~boolsig]["W"],
    bins=(np.linspace(edges_zpt[0][0], edges_zpt[0][1], bins_zpt[0]), np.linspace(edges_zpt[1][0], edges_zpt[1][1], bins_zpt[1])),
)
hists_zpt[995].update({0 : np.array([x.compute() for x in hist])})

In [None]:
time1 = time.time()  # z-pt hist.
print("execution time: %f" % (time1-time0))

In [None]:
# z-pt hist. again
# real-life statistics weight is applied here, and stays below

# true label: background (any - label 0)
hists_zpt[0] = {
    1 : [
        hists_zpt[990][1][0]*normfact[0] + hists_zpt[995][1][0]*normfact[5],
        hists_zpt[990][1][1],
        hists_zpt[990][1][2]
    ],
    0 : [
        hists_zpt[990][0][0]*normfact[0] + hists_zpt[995][0][0]*normfact[5],
        hists_zpt[990][0][1],
        hists_zpt[990][0][2]
    ],
}

# also, rescale true signal
hists_zpt[1][1][0] *= normfact[1]
hists_zpt[1][0][0] *= normfact[1]

In [None]:
extent_zpt = [  # another expression of the plot edges (obtained from the histograms)
    min(hists_zpt[1][1][1]), max(hists_zpt[1][1][1]), min(hists_zpt[1][1][2]), max(hists_zpt[1][1][2])
]

box = patches.Rectangle(  # fiducial volume box to be superimposed on the plots
    (box_edges[0], box_edges[2]), box_edges[1]-box_edges[0], box_edges[3]-box_edges[2],
    linewidth=1, edgecolor='0.5', facecolor='none', label="LoI fiducial box"
)

In [None]:
# z-pt plots - one per true label
# real-life statistics weight is applied here, after rewriting hists_zpt

fig, axs = plt.subplots(ncols=2, figsize=(10, 5))
blog = True

ax = axs[0]
ax.set_title("true background")
ax.set_xlim((extent_zpt[0], extent_zpt[1]))
ax.set_ylim((extent_zpt[2], extent_zpt[3]))
ax.add_patch(copy(box))

data = hists_zpt[0][0][0].T
hist = np.ma.masked_where(data == 0, data) / (np.sum(data) if bnorm else 1)
ax.matshow(hist, origin="lower", extent=extent_zpt, aspect="auto", norm=LogNorm() if blog else None, cmap="Reds")
ax.plot((-1, -1), color="r", lw=0, marker="o", label="pred. as background")

data = hists_zpt[0][1][0].T
hist = np.ma.masked_where(data == 0, data) / (np.sum(data) if bnorm else 1)
ax.matshow(hist, origin="lower", extent=extent_zpt, aspect="auto", norm=LogNorm() if blog else None, cmap="Blues", alpha=0.6 if blog else 0.9)
ax.plot((-1, -1), color="b", lw=0, marker="o", label="pred. as signal")

ax = axs[1]
ax.set_title("true signal")
ax.set_xlim((extent_zpt[0], extent_zpt[1]))
ax.set_ylim((extent_zpt[2], extent_zpt[3]))
ax.add_patch(copy(box))

data = hists_zpt[1][0][0].T
hist = np.ma.masked_where(data == 0, data) / (np.sum(data) if bnorm else 1)
ax.matshow(hist, origin="lower", extent=extent_zpt, aspect="auto", norm=LogNorm() if blog else None, cmap="Reds")

data = hists_zpt[1][1][0].T
hist = np.ma.masked_where(data == 0, data) / (np.sum(data) if bnorm else 1)
ax.matshow(hist, origin="lower", extent=extent_zpt, aspect="auto", norm=LogNorm() if blog else None, cmap="Blues", alpha=0.6 if blog else 0.9)

axs[0].legend(loc="upper left")
fig.tight_layout()

In [None]:
# z-pt plots - one per predicted label
# real-life statistics weight is applied here, after rewriting hists_zpt

fig, axs = plt.subplots(ncols=2, figsize=(10, 5))
blog = True

pred_class = 0
ax = axs[pred_class]
ax.set_title("predicted as background")
ax.set_xlim((extent_zpt[0], extent_zpt[1]))
ax.set_ylim((extent_zpt[2], extent_zpt[3]))
ax.add_patch(copy(box))

data = hists_zpt[0][0][0].T
hist = np.ma.masked_where(data == 0, data) / (np.sum(data) if bnorm else 1)
ax.matshow(hist, origin="lower", extent=extent_zpt, aspect="auto", norm=LogNorm() if blog else None, cmap="Reds")
ax.plot((-1, -1), color="r", lw=0, marker="o", label="true background")

data = hists_zpt[1][0][0].T
hist = np.ma.masked_where(data == 0, data) / (np.sum(data) if bnorm else 1)
ax.matshow(hist, origin="lower", extent=extent_zpt, aspect="auto", norm=LogNorm() if blog else None, cmap="Blues", alpha=0.6 if blog else 0.9)
ax.plot((-1, -1), color="b", lw=0, marker="o", label="true signal")

pred_class = 1
ax = axs[pred_class]
ax.set_title("predicted as signal")
ax.set_xlim((extent_zpt[0], extent_zpt[1]))
ax.set_ylim((extent_zpt[2], extent_zpt[3]))
ax.add_patch(copy(box))

data = hists_zpt[0][1][0].T
hist = np.ma.masked_where(data == 0, data) / (np.sum(data) if bnorm else 1)
ax.matshow(hist, origin="lower", extent=extent_zpt, aspect="auto", norm=LogNorm() if blog else None, cmap="Reds")

data = hists_zpt[1][1][0].T
hist = np.ma.masked_where(data == 0, data) / (np.sum(data) if bnorm else 1)
ax.matshow(hist, origin="lower", extent=extent_zpt, aspect="auto", norm=LogNorm() if blog else None, cmap="Blues", alpha=0.6 if blog else 0.9)

axs[0].legend(loc="upper left")
fig.tight_layout()

In [None]:
# z-pt plots - individual classes separately, all true lables and only signal-selected
# real-life statistics weight is applied here, after rewriting hists_zpt

cmap = plt.get_cmap("jet").copy()
cmap.set_bad('white')

for iclass in (1, 990, 995):
    
    fig, axs = plt.subplots(ncols=2, figsize=(12, 4))
    blog = False

    ax = axs[0]
    ax.set_title("all")
    ax.set_xlim((extent_zpt[0], extent_zpt[1]))
    ax.set_ylim((extent_zpt[2], extent_zpt[3]))
    ax.add_patch(copy(box))

    data = hists_zpt[iclass][1][0].T + hists_zpt[iclass][0][0].T
    hist = np.ma.masked_where(data == 0, data) / (np.sum(data) if bnorm else 1)
    cax = ax.matshow(hist, origin="lower", extent=extent_zpt, aspect="auto", norm=LogNorm() if blog else None, cmap=cmap)
    plt.subplot(121)
    plt.colorbar(cax)

    ax = axs[1]
    ax.set_title("predicted as %s" % classnames[1])
    ax.set_xlim((extent_zpt[0], extent_zpt[1]))
    ax.set_ylim((extent_zpt[2], extent_zpt[3]))
    ax.add_patch(copy(box))

    data = hists_zpt[iclass][1][0].T
    hist = np.ma.masked_where(data == 0, data) / (np.sum(data) if bnorm else 1)
    cax = ax.matshow(hist, origin="lower", extent=extent_zpt, aspect="auto", norm=LogNorm() if blog else None, cmap=cmap)
    plt.subplot(122)
    plt.colorbar(cax)

    fig.suptitle(classnames[iclass])
    fig.tight_layout()